Home | History | Annotate | Download | only in decoder
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19  *******************************************************************************
     20  * @file
     21  *  ihevc_sao.c
     22  *
     23  * @brief
     24  *  Contains function definitions for sample adaptive offset process
     25  *
     26  * @author
     27  *  Srinivas T
     28  *
     29  * @par List of Functions:
     30  *
     31  * @remarks
     32  *  None
     33  *
     34  *******************************************************************************
     35  */
     36 
     37 #include <stdio.h>
     38 #include <stddef.h>
     39 #include <stdlib.h>
     40 #include <string.h>
     41 #include <assert.h>
     42 
     43 #include "ihevc_typedefs.h"
     44 #include "iv.h"
     45 #include "ivd.h"
     46 #include "ihevcd_cxa.h"
     47 #include "ithread.h"
     48 
     49 #include "ihevc_defs.h"
     50 #include "ihevc_debug.h"
     51 #include "ihevc_defs.h"
     52 #include "ihevc_structs.h"
     53 #include "ihevc_macros.h"
     54 #include "ihevc_platform_macros.h"
     55 #include "ihevc_cabac_tables.h"
     56 #include "ihevc_sao.h"
     57 #include "ihevc_mem_fns.h"
     58 
     59 #include "ihevc_error.h"
     60 #include "ihevc_common_tables.h"
     61 
     62 #include "ihevcd_trace.h"
     63 #include "ihevcd_defs.h"
     64 #include "ihevcd_function_selector.h"
     65 #include "ihevcd_structs.h"
     66 #include "ihevcd_error.h"
     67 #include "ihevcd_nal.h"
     68 #include "ihevcd_bitstream.h"
     69 #include "ihevcd_job_queue.h"
     70 #include "ihevcd_utils.h"
     71 
     72 #include "ihevc_deblk.h"
     73 #include "ihevc_deblk_tables.h"
     74 #include "ihevcd_profile.h"
     75 #include "ihevcd_sao.h"
     76 #include "ihevcd_debug.h"
     77 
     78 #define SAO_SHIFT_CTB    8
     79 
     80 /**
     81  * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
     82  */
     83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
     84 {
     85     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
     86     UWORD8 *pu1_src_luma;
     87     UWORD8 *pu1_src_chroma;
     88     WORD32 src_strd;
     89     WORD32 ctb_size;
     90     WORD32 log2_ctb_size;
     91     sps_t *ps_sps;
     92     sao_t *ps_sao;
     93     WORD32 row, col;
     94     UWORD8 au1_avail_luma[8];
     95     UWORD8 au1_avail_chroma[8];
     96     WORD32 i;
     97     UWORD8 *pu1_src_top_luma;
     98     UWORD8 *pu1_src_top_chroma;
     99     UWORD8 *pu1_src_left_luma;
    100     UWORD8 *pu1_src_left_chroma;
    101     UWORD8 au1_src_top_right[2];
    102     UWORD8 au1_src_bot_left[2];
    103     UWORD8 *pu1_no_loop_filter_flag;
    104     WORD32 loop_filter_strd;
    105 
    106     WORD8 ai1_offset_y[5];
    107     WORD8 ai1_offset_cb[5];
    108     WORD8 ai1_offset_cr[5];
    109 
    110     PROFILE_DISABLE_SAO();
    111 
    112     ai1_offset_y[0] = 0;
    113     ai1_offset_cb[0] = 0;
    114     ai1_offset_cr[0] = 0;
    115 
    116     ps_sps = ps_sao_ctxt->ps_sps;
    117     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    118     ctb_size = (1 << log2_ctb_size);
    119     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
    120     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
    121     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
    122 
    123     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    124     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
    125 
    126     /* Current CTB */
    127     {
    128         WORD32 sao_wd_luma;
    129         WORD32 sao_wd_chroma;
    130         WORD32 sao_ht_luma;
    131         WORD32 sao_ht_chroma;
    132 
    133         WORD32 remaining_rows;
    134         WORD32 remaining_cols;
    135 
    136         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
    137         sao_wd_luma = MIN(ctb_size, remaining_cols);
    138         sao_wd_chroma = MIN(ctb_size, remaining_cols);
    139 
    140         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
    141         sao_ht_luma = MIN(ctb_size, remaining_rows);
    142         sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
    143 
    144         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
    145         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
    146         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
    147         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
    148 
    149         pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
    150                         ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
    151                         ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
    152 
    153         ai1_offset_y[1] = ps_sao->b4_y_offset_1;
    154         ai1_offset_y[2] = ps_sao->b4_y_offset_2;
    155         ai1_offset_y[3] = ps_sao->b4_y_offset_3;
    156         ai1_offset_y[4] = ps_sao->b4_y_offset_4;
    157 
    158         ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
    159         ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
    160         ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
    161         ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
    162 
    163         ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
    164         ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
    165         ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
    166         ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
    167 
    168         for(i = 0; i < 8; i++)
    169         {
    170             au1_avail_luma[i] = 255;
    171             au1_avail_chroma[i] = 255;
    172         }
    173 
    174 
    175         if(0 == ps_sao_ctxt->i4_ctb_x)
    176         {
    177             au1_avail_luma[0] = 0;
    178             au1_avail_luma[4] = 0;
    179             au1_avail_luma[6] = 0;
    180 
    181             au1_avail_chroma[0] = 0;
    182             au1_avail_chroma[4] = 0;
    183             au1_avail_chroma[6] = 0;
    184         }
    185 
    186         if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
    187         {
    188             au1_avail_luma[1] = 0;
    189             au1_avail_luma[5] = 0;
    190             au1_avail_luma[7] = 0;
    191 
    192             au1_avail_chroma[1] = 0;
    193             au1_avail_chroma[5] = 0;
    194             au1_avail_chroma[7] = 0;
    195         }
    196 
    197         if(0 == ps_sao_ctxt->i4_ctb_y)
    198         {
    199             au1_avail_luma[2] = 0;
    200             au1_avail_luma[4] = 0;
    201             au1_avail_luma[5] = 0;
    202 
    203             au1_avail_chroma[2] = 0;
    204             au1_avail_chroma[4] = 0;
    205             au1_avail_chroma[5] = 0;
    206         }
    207 
    208         if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
    209         {
    210             au1_avail_luma[3] = 0;
    211             au1_avail_luma[6] = 0;
    212             au1_avail_luma[7] = 0;
    213 
    214             au1_avail_chroma[3] = 0;
    215             au1_avail_chroma[6] = 0;
    216             au1_avail_chroma[7] = 0;
    217         }
    218 
    219 
    220         if(0 == ps_sao->b3_y_type_idx)
    221         {
    222             /* Update left, top and top-left */
    223             for(row = 0; row < sao_ht_luma; row++)
    224             {
    225                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
    226             }
    227             ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
    228 
    229             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
    230 
    231         }
    232         else
    233         {
    234             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
    235             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
    236             WORD32 tmp_strd = MAX_CTB_SIZE + 2;
    237             WORD32 no_loop_filter_enabled = 0;
    238 
    239             /* Check the loop filter flags and copy the original values for back up */
    240             {
    241                 UWORD32 u4_no_loop_filter_flag;
    242                 WORD32 min_cu = 8;
    243                 UWORD8 *pu1_src_tmp = pu1_src_luma;
    244 
    245                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
    246                 {
    247                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
    248                                     ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
    249                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
    250 
    251                     if(u4_no_loop_filter_flag)
    252                     {
    253                         WORD32 tmp_wd = sao_wd_luma;
    254                         no_loop_filter_enabled = 1;
    255                         while(tmp_wd > 0)
    256                         {
    257                             if(CTZ(u4_no_loop_filter_flag))
    258                             {
    259                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    260                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    261                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    262                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    263                             }
    264                             else
    265                             {
    266                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
    267                                 {
    268                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    269                                     {
    270                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
    271                                     }
    272                                 }
    273 
    274                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    275                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    276                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    277                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    278                             }
    279                         }
    280 
    281                         pu1_src_tmp -= sao_wd_luma;
    282                     }
    283 
    284                     pu1_src_tmp += min_cu * src_strd;
    285                     pu1_src_copy += min_cu * tmp_strd;
    286                 }
    287             }
    288 
    289             if(1 == ps_sao->b3_y_type_idx)
    290             {
    291                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
    292                                                                           src_strd,
    293                                                                           pu1_src_left_luma,
    294                                                                           pu1_src_top_luma,
    295                                                                           ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
    296                                                                           ps_sao->b5_y_band_pos,
    297                                                                           ai1_offset_y,
    298                                                                           sao_wd_luma,
    299                                                                           sao_ht_luma);
    300             }
    301             else // if(2 <= ps_sao->b3_y_type_idx)
    302             {
    303                 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
    304                 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
    305                 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
    306                                                                   src_strd,
    307                                                                   pu1_src_left_luma,
    308                                                                   pu1_src_top_luma,
    309                                                                   ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
    310                                                                   au1_src_top_right,
    311                                                                   au1_src_bot_left,
    312                                                                   au1_avail_luma,
    313                                                                   ai1_offset_y,
    314                                                                   sao_wd_luma,
    315                                                                   sao_ht_luma);
    316             }
    317 
    318             /* Check the loop filter flags and copy the original values back if they are set */
    319             if(no_loop_filter_enabled)
    320             {
    321                 UWORD32 u4_no_loop_filter_flag;
    322                 WORD32 min_cu = 8;
    323                 UWORD8 *pu1_src_tmp = pu1_src_luma;
    324 
    325                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
    326                 {
    327                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
    328                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
    329 
    330                     if(u4_no_loop_filter_flag)
    331                     {
    332                         WORD32 tmp_wd = sao_wd_luma;
    333                         while(tmp_wd > 0)
    334                         {
    335                             if(CTZ(u4_no_loop_filter_flag))
    336                             {
    337                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    338                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    339                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    340                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    341                             }
    342                             else
    343                             {
    344                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
    345                                 {
    346                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    347                                     {
    348                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
    349                                     }
    350                                 }
    351 
    352                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    353                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    354                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    355                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    356                             }
    357                         }
    358 
    359                         pu1_src_tmp -= sao_wd_luma;
    360                     }
    361 
    362                     pu1_src_tmp += min_cu * src_strd;
    363                     pu1_src_copy += min_cu * tmp_strd;
    364                 }
    365             }
    366 
    367         }
    368 
    369         if(0 == ps_sao->b3_cb_type_idx)
    370         {
    371             for(row = 0; row < sao_ht_chroma; row++)
    372             {
    373                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
    374                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
    375             }
    376             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
    377             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
    378 
    379             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
    380         }
    381         else
    382         {
    383             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
    384             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
    385             WORD32 tmp_strd = MAX_CTB_SIZE + 4;
    386             WORD32 no_loop_filter_enabled = 0;
    387 
    388             /* Check the loop filter flags and copy the original values for back up */
    389             {
    390                 UWORD32 u4_no_loop_filter_flag;
    391                 WORD32 min_cu = 4;
    392                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
    393 
    394                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
    395                 {
    396                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
    397                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
    398 
    399                     if(u4_no_loop_filter_flag)
    400                     {
    401                         WORD32 tmp_wd = sao_wd_chroma;
    402                         no_loop_filter_enabled = 1;
    403                         while(tmp_wd > 0)
    404                         {
    405                             if(CTZ(u4_no_loop_filter_flag))
    406                             {
    407                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    408                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    409                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    410                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    411                             }
    412                             else
    413                             {
    414                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
    415                                 {
    416                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    417                                     {
    418                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
    419                                     }
    420                                 }
    421 
    422                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    423                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    424                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    425                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    426                             }
    427                         }
    428 
    429                         pu1_src_tmp -= sao_wd_chroma;
    430                     }
    431 
    432                     pu1_src_tmp += min_cu * src_strd;
    433                     pu1_src_copy += min_cu * tmp_strd;
    434                 }
    435             }
    436 
    437             if(1 == ps_sao->b3_cb_type_idx)
    438             {
    439                 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
    440                                                                             src_strd,
    441                                                                             pu1_src_left_chroma,
    442                                                                             pu1_src_top_chroma,
    443                                                                             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
    444                                                                             ps_sao->b5_cb_band_pos,
    445                                                                             ps_sao->b5_cr_band_pos,
    446                                                                             ai1_offset_cb,
    447                                                                             ai1_offset_cr,
    448                                                                             sao_wd_chroma,
    449                                                                             sao_ht_chroma
    450                                                                            );
    451             }
    452             else // if(2 <= ps_sao->b3_cb_type_idx)
    453             {
    454                 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
    455                 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
    456                 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
    457                 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
    458                 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
    459                                                                      src_strd,
    460                                                                      pu1_src_left_chroma,
    461                                                                      pu1_src_top_chroma,
    462                                                                      ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
    463                                                                      au1_src_top_right,
    464                                                                      au1_src_bot_left,
    465                                                                      au1_avail_chroma,
    466                                                                      ai1_offset_cb,
    467                                                                      ai1_offset_cr,
    468                                                                      sao_wd_chroma,
    469                                                                      sao_ht_chroma);
    470             }
    471 
    472             /* Check the loop filter flags and copy the original values back if they are set */
    473             if(no_loop_filter_enabled)
    474             {
    475                 UWORD32 u4_no_loop_filter_flag;
    476                 WORD32 min_cu = 4;
    477                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
    478 
    479                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
    480                 {
    481                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
    482                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
    483 
    484                     if(u4_no_loop_filter_flag)
    485                     {
    486                         WORD32 tmp_wd = sao_wd_chroma;
    487                         while(tmp_wd > 0)
    488                         {
    489                             if(CTZ(u4_no_loop_filter_flag))
    490                             {
    491                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    492                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    493                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    494                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    495                             }
    496                             else
    497                             {
    498                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
    499                                 {
    500                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    501                                     {
    502                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
    503                                     }
    504                                 }
    505 
    506                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    507                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    508                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    509                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    510                             }
    511                         }
    512 
    513                         pu1_src_tmp -= sao_wd_chroma;
    514                     }
    515 
    516                     pu1_src_tmp += min_cu * src_strd;
    517                     pu1_src_copy += min_cu * tmp_strd;
    518                 }
    519             }
    520 
    521         }
    522 
    523     }
    524 }
    525 
    526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
    527 {
    528     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
    529     UWORD8 *pu1_src_luma;
    530     UWORD8 *pu1_src_chroma;
    531     WORD32 src_strd;
    532     WORD32 ctb_size;
    533     WORD32 log2_ctb_size;
    534     sps_t *ps_sps;
    535     sao_t *ps_sao;
    536     pps_t *ps_pps;
    537     slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
    538     tile_t *ps_tile;
    539     UWORD16 *pu1_slice_idx;
    540     UWORD16 *pu1_tile_idx;
    541     WORD32 row, col;
    542     UWORD8 au1_avail_luma[8];
    543     UWORD8 au1_avail_chroma[8];
    544     UWORD8 au1_tile_slice_boundary[8];
    545     UWORD8 au4_ilf_across_tile_slice_enable[8];
    546     WORD32 i;
    547     UWORD8 *pu1_src_top_luma;
    548     UWORD8 *pu1_src_top_chroma;
    549     UWORD8 *pu1_src_left_luma;
    550     UWORD8 *pu1_src_left_chroma;
    551     UWORD8 au1_src_top_right[2];
    552     UWORD8 au1_src_bot_left[2];
    553     UWORD8 *pu1_no_loop_filter_flag;
    554     UWORD8 *pu1_src_backup_luma;
    555     UWORD8 *pu1_src_backup_chroma;
    556     WORD32 backup_strd;
    557     WORD32 loop_filter_strd;
    558 
    559     WORD32 no_loop_filter_enabled_luma = 0;
    560     WORD32 no_loop_filter_enabled_chroma = 0;
    561     UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
    562     UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
    563     UWORD8 *pu1_sao_src_luma_top_left_ctb;
    564     UWORD8 *pu1_sao_src_chroma_top_left_ctb;
    565     UWORD8 *pu1_sao_src_top_left_luma_top_right;
    566     UWORD8 *pu1_sao_src_top_left_chroma_top_right;
    567     UWORD8  u1_sao_src_top_left_luma_bot_left;
    568     UWORD8  *pu1_sao_src_top_left_luma_bot_left;
    569     UWORD8 *au1_sao_src_top_left_chroma_bot_left;
    570     UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
    571 
    572     WORD8 ai1_offset_y[5];
    573     WORD8 ai1_offset_cb[5];
    574     WORD8 ai1_offset_cr[5];
    575     WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
    576 
    577     PROFILE_DISABLE_SAO();
    578 
    579     ai1_offset_y[0] = 0;
    580     ai1_offset_cb[0] = 0;
    581     ai1_offset_cr[0] = 0;
    582 
    583     ps_sps = ps_sao_ctxt->ps_sps;
    584     ps_pps = ps_sao_ctxt->ps_pps;
    585     ps_tile = ps_sao_ctxt->ps_tile;
    586 
    587     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    588     ctb_size = (1 << log2_ctb_size);
    589     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
    590     ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
    591     ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
    592 
    593     pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
    594     pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
    595     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
    596     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
    597 
    598     /*Stores the left value for each row ctbs- Needed for column tiles*/
    599     pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
    600     pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
    601     pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
    602     pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
    603     u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
    604     pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
    605     au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
    606     pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
    607     pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
    608     pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
    609 
    610     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    611     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
    612     backup_strd = 2 * MAX_CTB_SIZE;
    613 
    614     DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
    615 
    616     {
    617         /* Check the loop filter flags and copy the original values for back up */
    618         /* Luma */
    619         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
    620         {
    621             UWORD32 u4_no_loop_filter_flag;
    622             WORD32 loop_filter_bit_pos;
    623             WORD32 log2_min_cu = 3;
    624             WORD32 min_cu = (1 << log2_min_cu);
    625             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
    626             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
    627             WORD32 sao_blk_wd = ctb_size;
    628             WORD32 remaining_rows;
    629             WORD32 remaining_cols;
    630 
    631             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
    632             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
    633             if(remaining_rows <= SAO_SHIFT_CTB)
    634                 sao_blk_ht += remaining_rows;
    635             if(remaining_cols <= SAO_SHIFT_CTB)
    636                 sao_blk_wd += remaining_cols;
    637 
    638             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
    639             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
    640 
    641             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
    642 
    643             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
    644                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
    645             if(ps_sao_ctxt->i4_ctb_x > 0)
    646                 loop_filter_bit_pos -= 1;
    647 
    648             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
    649                             (loop_filter_bit_pos >> 3);
    650 
    651             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
    652                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
    653             {
    654                 WORD32 tmp_wd = sao_blk_wd;
    655 
    656                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
    657                                 (loop_filter_bit_pos & 7);
    658                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
    659 
    660                 if(u4_no_loop_filter_flag)
    661                 {
    662                     no_loop_filter_enabled_luma = 1;
    663                     while(tmp_wd > 0)
    664                     {
    665                         if(CTZ(u4_no_loop_filter_flag))
    666                         {
    667                             pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    668                             pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    669                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
    670                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    671                         }
    672                         else
    673                         {
    674                             for(row = 0; row < min_cu; row++)
    675                             {
    676                                 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
    677                                 {
    678                                     pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
    679                                 }
    680                             }
    681                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    682                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    683                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
    684                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    685                         }
    686                     }
    687 
    688                     pu1_src_tmp_luma -= sao_blk_wd;
    689                     pu1_src_backup_luma -= sao_blk_wd;
    690                 }
    691 
    692                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
    693                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
    694             }
    695         }
    696 
    697         /* Chroma */
    698         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
    699         {
    700             UWORD32 u4_no_loop_filter_flag;
    701             WORD32 loop_filter_bit_pos;
    702             WORD32 log2_min_cu = 3;
    703             WORD32 min_cu = (1 << log2_min_cu);
    704             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
    705             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
    706             WORD32 sao_blk_wd = ctb_size;
    707             WORD32 remaining_rows;
    708             WORD32 remaining_cols;
    709 
    710             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
    711             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
    712             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
    713                 sao_blk_ht += remaining_rows;
    714             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
    715                 sao_blk_wd += remaining_cols;
    716 
    717             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
    718             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
    719 
    720             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
    721 
    722             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
    723                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
    724             if(ps_sao_ctxt->i4_ctb_x > 0)
    725                 loop_filter_bit_pos -= 2;
    726 
    727             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
    728                             (loop_filter_bit_pos >> 3);
    729 
    730             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
    731                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
    732             {
    733                 WORD32 tmp_wd = sao_blk_wd;
    734 
    735                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
    736                                 (loop_filter_bit_pos & 7);
    737                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
    738 
    739                 if(u4_no_loop_filter_flag)
    740                 {
    741                     no_loop_filter_enabled_chroma = 1;
    742                     while(tmp_wd > 0)
    743                     {
    744                         if(CTZ(u4_no_loop_filter_flag))
    745                         {
    746                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    747                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    748                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
    749                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    750                         }
    751                         else
    752                         {
    753                             for(row = 0; row < min_cu / 2; row++)
    754                             {
    755                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
    756                                 {
    757                                     pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
    758                                 }
    759                             }
    760 
    761                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    762                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    763                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
    764                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    765                         }
    766                     }
    767 
    768                     pu1_src_tmp_chroma -= sao_blk_wd;
    769                     pu1_src_backup_chroma -= sao_blk_wd;
    770                 }
    771 
    772                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
    773                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
    774             }
    775         }
    776     }
    777 
    778     DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
    779 
    780     /* Top-left CTB */
    781     if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
    782     {
    783         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
    784         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
    785         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
    786         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
    787 
    788         WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
    789         WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
    790         WORD32 au4_idx_tl[8], idx_tl;
    791 
    792 
    793         pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
    794         pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
    795         ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
    796         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
    797         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
    798         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
    799         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
    800 
    801         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
    802         {
    803             if(0 == ps_sao->b3_y_type_idx)
    804             {
    805                 /* Update left, top and top-left */
    806                 for(row = 0; row < sao_ht_luma; row++)
    807                 {
    808                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
    809                 }
    810                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
    811 
    812                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
    813 
    814 
    815             }
    816 
    817             else if(1 == ps_sao->b3_y_type_idx)
    818             {
    819                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
    820                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
    821                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
    822                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
    823 
    824                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
    825                                                                           src_strd,
    826                                                                           pu1_src_left_luma,
    827                                                                           pu1_src_top_luma,
    828                                                                           pu1_sao_src_luma_top_left_ctb,
    829                                                                           ps_sao->b5_y_band_pos,
    830                                                                           ai1_offset_y,
    831                                                                           sao_wd_luma,
    832                                                                           sao_ht_luma
    833                                                                          );
    834             }
    835 
    836             else // if(2 <= ps_sao->b3_y_type_idx)
    837             {
    838                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
    839                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
    840                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
    841                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
    842 
    843                 for(i = 0; i < 8; i++)
    844                 {
    845                     au1_avail_luma[i] = 255;
    846                     au1_tile_slice_boundary[i] = 0;
    847                     au4_idx_tl[i] = 0;
    848                     au4_ilf_across_tile_slice_enable[i] = 1;
    849                 }
    850 
    851                 /******************************************************************
    852                  * Derive the  Top-left CTB's neighbor pixel's slice indices.
    853                  *
    854                  *          TL_T
    855                  *       4  _2__5________
    856                  *     0   |    |       |
    857                  *    TL_L | TL | 1 TL_R|
    858                  *         |____|_______|____
    859                  *        6|TL_D|7      |    |
    860                  *         | 3  |       |    |
    861                  *         |____|_______|    |
    862                  *              |            |
    863                  *              |            |
    864                  *              |____________|
    865                  *
    866                  *****************************************************************/
    867 
    868                 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
    869                 {
    870                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
    871                     {
    872                         {
    873                             /*Assuming that sao shift is uniform along x and y directions*/
    874                             if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
    875                             {
    876                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
    877                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
    878                             }
    879                             else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
    880                             {
    881                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
    882                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
    883                             }
    884                             ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
    885                             ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
    886 
    887                             ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
    888                             ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
    889 
    890                             ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
    891                             ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
    892 
    893                             ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
    894                             ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
    895                         }
    896 
    897                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
    898                         {
    899                             /*Calculate slice indices for neighbor pixels*/
    900                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
    901                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
    902                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
    903                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
    904                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
    905                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
    906 
    907                             if((0 == (1 << log2_ctb_size) - sao_wd_luma))
    908                             {
    909                                 if(ps_sao_ctxt->i4_ctb_x == 1)
    910                                 {
    911                                     au4_idx_tl[6] = -1;
    912                                     au4_idx_tl[4] = -1;
    913                                 }
    914                                 else
    915                                 {
    916                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
    917                                 }
    918                                 if(ps_sao_ctxt->i4_ctb_y == 1)
    919                                 {
    920                                     au4_idx_tl[5] = -1;
    921                                     au4_idx_tl[4] = -1;
    922                                 }
    923                                 else
    924                                 {
    925                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
    926                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
    927                                 }
    928                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
    929                             }
    930 
    931                             /* Verify that the neighbor ctbs dont cross pic boundary.
    932                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
    933                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
    934                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
    935                              * the respective pixel's flags are checked
    936                              */
    937 
    938                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
    939                             {
    940                                 au4_ilf_across_tile_slice_enable[4] = 0;
    941                                 au4_ilf_across_tile_slice_enable[6] = 0;
    942                             }
    943                             else
    944                             {
    945                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
    946                             }
    947                             if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
    948                             {
    949                                 au4_ilf_across_tile_slice_enable[5] = 0;
    950                                 au4_ilf_across_tile_slice_enable[4] = 0;
    951                             }
    952                             else
    953                             {
    954                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    955                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    956                             }
    957                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    958                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    959                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
    960                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
    961                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
    962 
    963                             /*
    964                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
    965                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
    966                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
    967                              * the respective pixel's flags are checked
    968                              */
    969                             for(i = 0; i < 8; i++)
    970                             {
    971                                 /*Sets the edges that lie on the slice/tile boundary*/
    972                                 if(au4_idx_tl[i] != idx_tl)
    973                                 {
    974                                     au1_tile_slice_boundary[i] = 1;
    975                                 }
    976                                 else
    977                                 {
    978                                     au4_ilf_across_tile_slice_enable[i] = 1;
    979                                 }
    980                             }
    981 
    982                             ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
    983                         }
    984 
    985                         if(ps_pps->i1_tiles_enabled_flag)
    986                         {
    987                             /* Calculate availability flags at slice boundary */
    988                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
    989                             {
    990                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
    991                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
    992                                 {
    993                                     /*Set the boundary arrays*/
    994                                     /*Calculate tile indices for neighbor pixels*/
    995                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
    996                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
    997                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
    998                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
    999                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1000                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1001 
   1002                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
   1003                                     {
   1004                                         if(ps_sao_ctxt->i4_ctb_x == 1)
   1005                                         {
   1006                                             au4_idx_tl[6] = -1;
   1007                                             au4_idx_tl[4] = -1;
   1008                                         }
   1009                                         else
   1010                                         {
   1011                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1012                                         }
   1013                                         if(ps_sao_ctxt->i4_ctb_y == 1)
   1014                                         {
   1015                                             au4_idx_tl[5] = -1;
   1016                                             au4_idx_tl[4] = -1;
   1017                                         }
   1018                                         else
   1019                                         {
   1020                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1021                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
   1022                                         }
   1023                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1024                                     }
   1025                                     for(i = 0; i < 8; i++)
   1026                                     {
   1027                                         /*Sets the edges that lie on the tile boundary*/
   1028                                         if(au4_idx_tl[i] != idx_tl)
   1029                                         {
   1030                                             au1_tile_slice_boundary[i] |= 1;
   1031                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   1032                                         }
   1033                                     }
   1034                                 }
   1035                             }
   1036                         }
   1037 
   1038 
   1039                         /*Set availability flags based on tile and slice boundaries*/
   1040                         for(i = 0; i < 8; i++)
   1041                         {
   1042                             /*Sets the edges that lie on the slice/tile boundary*/
   1043                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1044                             {
   1045                                 au1_avail_luma[i] = 0;
   1046                             }
   1047                         }
   1048                     }
   1049                 }
   1050 
   1051                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
   1052                 {
   1053                     au1_avail_luma[0] = 0;
   1054                     au1_avail_luma[4] = 0;
   1055                     au1_avail_luma[6] = 0;
   1056                 }
   1057 
   1058                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   1059                 {
   1060                     au1_avail_luma[1] = 0;
   1061                     au1_avail_luma[5] = 0;
   1062                     au1_avail_luma[7] = 0;
   1063                 }
   1064                 //y==1 case
   1065                 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
   1066                 {
   1067                     au1_avail_luma[2] = 0;
   1068                     au1_avail_luma[4] = 0;
   1069                     au1_avail_luma[5] = 0;
   1070                 }
   1071                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1072                 {
   1073                     au1_avail_luma[3] = 0;
   1074                     au1_avail_luma[6] = 0;
   1075                     au1_avail_luma[7] = 0;
   1076                 }
   1077 
   1078                 {
   1079                     au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
   1080                     u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
   1081                     ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   1082                                                                       src_strd,
   1083                                                                       pu1_src_left_luma,
   1084                                                                       pu1_src_top_luma,
   1085                                                                       pu1_sao_src_luma_top_left_ctb,
   1086                                                                       au1_src_top_right,
   1087                                                                       &u1_sao_src_top_left_luma_bot_left,
   1088                                                                       au1_avail_luma,
   1089                                                                       ai1_offset_y,
   1090                                                                       sao_wd_luma,
   1091                                                                       sao_ht_luma);
   1092                 }
   1093             }
   1094 
   1095         }
   1096 
   1097         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
   1098         {
   1099             if(0 == ps_sao->b3_cb_type_idx)
   1100             {
   1101                 for(row = 0; row < sao_ht_chroma; row++)
   1102                 {
   1103                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   1104                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   1105                 }
   1106                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   1107                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   1108 
   1109                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   1110 
   1111             }
   1112 
   1113             else if(1 == ps_sao->b3_cb_type_idx)
   1114             {
   1115                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1116                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1117                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1118                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1119 
   1120                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1121                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1122                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1123                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1124 
   1125                 if(chroma_yuv420sp_vu)
   1126                 {
   1127                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1128                                                                                 src_strd,
   1129                                                                                 pu1_src_left_chroma,
   1130                                                                                 pu1_src_top_chroma,
   1131                                                                                 pu1_sao_src_chroma_top_left_ctb,
   1132                                                                                 ps_sao->b5_cr_band_pos,
   1133                                                                                 ps_sao->b5_cb_band_pos,
   1134                                                                                 ai1_offset_cr,
   1135                                                                                 ai1_offset_cb,
   1136                                                                                 sao_wd_chroma,
   1137                                                                                 sao_ht_chroma
   1138                                                                                );
   1139                 }
   1140                 else
   1141                 {
   1142                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1143                                                                                 src_strd,
   1144                                                                                 pu1_src_left_chroma,
   1145                                                                                 pu1_src_top_chroma,
   1146                                                                                 pu1_sao_src_chroma_top_left_ctb,
   1147                                                                                 ps_sao->b5_cb_band_pos,
   1148                                                                                 ps_sao->b5_cr_band_pos,
   1149                                                                                 ai1_offset_cb,
   1150                                                                                 ai1_offset_cr,
   1151                                                                                 sao_wd_chroma,
   1152                                                                                 sao_ht_chroma
   1153                                                                                );
   1154                 }
   1155             }
   1156 
   1157             else // if(2 <= ps_sao->b3_cb_type_idx)
   1158             {
   1159                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1160                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1161                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1162                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1163 
   1164                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1165                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1166                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1167                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1168                 for(i = 0; i < 8; i++)
   1169                 {
   1170                     au1_avail_chroma[i] = 255;
   1171                     au1_tile_slice_boundary[i] = 0;
   1172                     au4_idx_tl[i] = 0;
   1173                     au4_ilf_across_tile_slice_enable[i] = 1;
   1174                 }
   1175                 /*In case of slices*/
   1176                 {
   1177                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1178                     {
   1179                         if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
   1180                         {
   1181                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
   1182                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
   1183                         }
   1184                         else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
   1185                         {
   1186                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
   1187                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
   1188                         }
   1189                         ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
   1190                         ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
   1191 
   1192                         ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
   1193                         ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
   1194 
   1195                         ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
   1196                         ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
   1197 
   1198                         ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
   1199                         ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
   1200 
   1201                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   1202                         {
   1203 
   1204                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
   1205                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
   1206                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1207                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1208                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1209                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1210 
   1211                             if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
   1212                             {
   1213                                 if(ps_sao_ctxt->i4_ctb_x == 1)
   1214                                 {
   1215                                     au4_idx_tl[6] = -1;
   1216                                     au4_idx_tl[4] = -1;
   1217                                 }
   1218                                 else
   1219                                 {
   1220                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1221                                 }
   1222                                 if(ps_sao_ctxt->i4_ctb_y == 1)
   1223                                 {
   1224                                     au4_idx_tl[5] = -1;
   1225                                     au4_idx_tl[4] = -1;
   1226                                 }
   1227                                 else
   1228                                 {
   1229                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1230                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
   1231                                 }
   1232                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1233                             }
   1234 
   1235                             /* Verify that the neighbor ctbs don't cross pic boundary
   1236                              * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
   1237                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
   1238                             {
   1239                                 au4_ilf_across_tile_slice_enable[4] = 0;
   1240                                 au4_ilf_across_tile_slice_enable[6] = 0;
   1241                             }
   1242                             else
   1243                             {
   1244                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   1245                             }
   1246                             if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
   1247                             {
   1248                                 au4_ilf_across_tile_slice_enable[5] = 0;
   1249                                 au4_ilf_across_tile_slice_enable[4] = 0;
   1250                             }
   1251                             else
   1252                             {
   1253                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1254                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1255                             }
   1256                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1257                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1258                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   1259                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   1260                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   1261                             /*
   1262                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   1263                              * of the pixel having a greater address is checked. Accordingly, set the availability flags
   1264                              */
   1265                             for(i = 0; i < 8; i++)
   1266                             {
   1267                                 /*Sets the edges that lie on the slice/tile boundary*/
   1268                                 if(au4_idx_tl[i] != idx_tl)
   1269                                 {
   1270                                     au1_tile_slice_boundary[i] = 1;
   1271                                 }
   1272                                 else
   1273                                 {
   1274                                     au4_ilf_across_tile_slice_enable[i] = 1;
   1275                                 }
   1276                             }
   1277 
   1278                             /*Reset indices*/
   1279                             for(i = 0; i < 8; i++)
   1280                             {
   1281                                 au4_idx_tl[i] = 0;
   1282                             }
   1283                         }
   1284                         if(ps_pps->i1_tiles_enabled_flag)
   1285                         {
   1286                             /* Calculate availability flags at slice boundary */
   1287                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1288                             {
   1289                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1290                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1291                                 {
   1292                                     /*Set the boundary arrays*/
   1293                                     /*Calculate tile indices for neighbor pixels*/
   1294                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
   1295                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
   1296                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1297                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1298                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1299                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1300 
   1301                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
   1302                                     {
   1303                                         if(ps_sao_ctxt->i4_ctb_x == 1)
   1304                                         {
   1305                                             au4_idx_tl[6] = -1;
   1306                                             au4_idx_tl[4] = -1;
   1307                                         }
   1308                                         else
   1309                                         {
   1310                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1311                                         }
   1312                                         if(ps_sao_ctxt->i4_ctb_y == 1)
   1313                                         {
   1314                                             au4_idx_tl[5] = -1;
   1315                                             au4_idx_tl[4] = -1;
   1316                                         }
   1317                                         else
   1318                                         {
   1319                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1320                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
   1321                                         }
   1322                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1323                                     }
   1324                                     for(i = 0; i < 8; i++)
   1325                                     {
   1326                                         /*Sets the edges that lie on the tile boundary*/
   1327                                         if(au4_idx_tl[i] != idx_tl)
   1328                                         {
   1329                                             au1_tile_slice_boundary[i] |= 1;
   1330                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   1331                                         }
   1332                                     }
   1333                                 }
   1334                             }
   1335                         }
   1336 
   1337                         for(i = 0; i < 8; i++)
   1338                         {
   1339                             /*Sets the edges that lie on the slice/tile boundary*/
   1340                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1341                             {
   1342                                 au1_avail_chroma[i] = 0;
   1343                             }
   1344                         }
   1345                     }
   1346                 }
   1347 
   1348                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
   1349                 {
   1350                     au1_avail_chroma[0] = 0;
   1351                     au1_avail_chroma[4] = 0;
   1352                     au1_avail_chroma[6] = 0;
   1353                 }
   1354                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   1355                 {
   1356                     au1_avail_chroma[1] = 0;
   1357                     au1_avail_chroma[5] = 0;
   1358                     au1_avail_chroma[7] = 0;
   1359                 }
   1360 
   1361                 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
   1362                 {
   1363                     au1_avail_chroma[2] = 0;
   1364                     au1_avail_chroma[4] = 0;
   1365                     au1_avail_chroma[5] = 0;
   1366                 }
   1367                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1368                 {
   1369                     au1_avail_chroma[3] = 0;
   1370                     au1_avail_chroma[6] = 0;
   1371                     au1_avail_chroma[7] = 0;
   1372                 }
   1373 
   1374                 {
   1375                     au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
   1376                     au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
   1377                     au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
   1378                     au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
   1379                     if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
   1380                     {
   1381                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   1382                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   1383                     }
   1384 
   1385                     if(chroma_yuv420sp_vu)
   1386                     {
   1387                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   1388                                                                              src_strd,
   1389                                                                              pu1_src_left_chroma,
   1390                                                                              pu1_src_top_chroma,
   1391                                                                              pu1_sao_src_chroma_top_left_ctb,
   1392                                                                              au1_src_top_right,
   1393                                                                              au1_sao_src_top_left_chroma_bot_left,
   1394                                                                              au1_avail_chroma,
   1395                                                                              ai1_offset_cr,
   1396                                                                              ai1_offset_cb,
   1397                                                                              sao_wd_chroma,
   1398                                                                              sao_ht_chroma);
   1399                     }
   1400                     else
   1401                     {
   1402                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   1403                                                                              src_strd,
   1404                                                                              pu1_src_left_chroma,
   1405                                                                              pu1_src_top_chroma,
   1406                                                                              pu1_sao_src_chroma_top_left_ctb,
   1407                                                                              au1_src_top_right,
   1408                                                                              au1_sao_src_top_left_chroma_bot_left,
   1409                                                                              au1_avail_chroma,
   1410                                                                              ai1_offset_cb,
   1411                                                                              ai1_offset_cr,
   1412                                                                              sao_wd_chroma,
   1413                                                                              sao_ht_chroma);
   1414                     }
   1415                 }
   1416             }
   1417         }
   1418 
   1419         pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
   1420         pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
   1421         ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
   1422     }
   1423 
   1424 
   1425     /* Top CTB */
   1426     if((ps_sao_ctxt->i4_ctb_y > 0))
   1427     {
   1428         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
   1429         WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
   1430         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
   1431         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
   1432 
   1433         WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
   1434         WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
   1435         WORD32 au4_idx_t[8], idx_t;
   1436 
   1437         WORD32 remaining_cols;
   1438 
   1439         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
   1440         if(remaining_cols <= SAO_SHIFT_CTB)
   1441         {
   1442             sao_wd_luma += remaining_cols;
   1443         }
   1444         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
   1445         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
   1446         {
   1447             sao_wd_chroma += remaining_cols;
   1448         }
   1449 
   1450         pu1_src_luma -= (sao_ht_luma * src_strd);
   1451         pu1_src_chroma -= (sao_ht_chroma * src_strd);
   1452         ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
   1453         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   1454         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   1455         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
   1456         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
   1457 
   1458         if(0 != sao_wd_luma)
   1459         {
   1460             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
   1461             {
   1462                 if(0 == ps_sao->b3_y_type_idx)
   1463                 {
   1464                     /* Update left, top and top-left */
   1465                     for(row = 0; row < sao_ht_luma; row++)
   1466                     {
   1467                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   1468                     }
   1469                     pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   1470 
   1471                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   1472 
   1473                 }
   1474 
   1475                 else if(1 == ps_sao->b3_y_type_idx)
   1476                 {
   1477                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   1478                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   1479                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   1480                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   1481 
   1482                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
   1483                                                                               src_strd,
   1484                                                                               pu1_src_left_luma,
   1485                                                                               pu1_src_top_luma,
   1486                                                                               pu1_sao_src_luma_top_left_ctb,
   1487                                                                               ps_sao->b5_y_band_pos,
   1488                                                                               ai1_offset_y,
   1489                                                                               sao_wd_luma,
   1490                                                                               sao_ht_luma
   1491                                                                              );
   1492                 }
   1493 
   1494                 else // if(2 <= ps_sao->b3_y_type_idx)
   1495                 {
   1496                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   1497                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   1498                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   1499                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   1500 
   1501                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
   1502                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
   1503                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
   1504 
   1505                     for(i = 0; i < 8; i++)
   1506                     {
   1507 
   1508                         au4_ilf_across_tile_slice_enable[i] = 1;
   1509                     }
   1510                     /******************************************************************
   1511                      * Derive the  Top-left CTB's neighbor pixel's slice indices.
   1512                      *
   1513                      *               T_T
   1514                      *          ____________
   1515                      *         |    |       |
   1516                      *         | T_L|  T    |T_R
   1517                      *         |    | ______|____
   1518                      *         |    |  T_D  |    |
   1519                      *         |    |       |    |
   1520                      *         |____|_______|    |
   1521                      *              |            |
   1522                      *              |            |
   1523                      *              |____________|
   1524                      *
   1525                      *****************************************************************/
   1526 
   1527                     /*In case of slices*/
   1528                     {
   1529                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1530                         {
   1531 
   1532                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
   1533                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
   1534 
   1535                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
   1536                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
   1537 
   1538                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
   1539                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
   1540 
   1541                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
   1542                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
   1543 
   1544                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
   1545                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
   1546 
   1547                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   1548                             {
   1549                                 /*Calculate neighbor ctb slice indices*/
   1550                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1551                                 {
   1552                                     au4_idx_t[0] = -1;
   1553                                     au4_idx_t[6] = -1;
   1554                                     au4_idx_t[4] = -1;
   1555                                 }
   1556                                 else
   1557                                 {
   1558                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1559                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1560                                 }
   1561                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1562                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1563                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1564                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1565 
   1566                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
   1567                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1568                                 {
   1569                                     au4_ilf_across_tile_slice_enable[4] = 0;
   1570                                     au4_ilf_across_tile_slice_enable[6] = 0;
   1571                                     au4_ilf_across_tile_slice_enable[0] = 0;
   1572                                 }
   1573                                 else
   1574                                 {
   1575                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1576                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   1577                                 }
   1578 
   1579 
   1580 
   1581                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1582                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1583                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   1584                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   1585                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   1586                                 /*
   1587                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   1588                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   1589                                  */
   1590 
   1591                                 for(i = 0; i < 8; i++)
   1592                                 {
   1593                                     /*Sets the edges that lie on the slice/tile boundary*/
   1594                                     if(au4_idx_t[i] != idx_t)
   1595                                     {
   1596                                         au1_tile_slice_boundary[i] = 1;
   1597                                         /*Check for slice flag at such boundaries*/
   1598                                     }
   1599                                     else
   1600                                     {
   1601                                         au4_ilf_across_tile_slice_enable[i] = 1;
   1602                                     }
   1603                                 }
   1604                                 /*Reset indices*/
   1605                                 for(i = 0; i < 8; i++)
   1606                                 {
   1607                                     au4_idx_t[i] = 0;
   1608                                 }
   1609                             }
   1610 
   1611                             if(ps_pps->i1_tiles_enabled_flag)
   1612                             {
   1613                                 /* Calculate availability flags at slice boundary */
   1614                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1615                                 {
   1616                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1617                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1618                                     {
   1619                                         /*Calculate neighbor ctb slice indices*/
   1620                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   1621                                         {
   1622                                             au4_idx_t[0] = -1;
   1623                                             au4_idx_t[6] = -1;
   1624                                             au4_idx_t[4] = -1;
   1625                                         }
   1626                                         else
   1627                                         {
   1628                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1629                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1630                                         }
   1631                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1632                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1633                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1634                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1635 
   1636                                         for(i = 0; i < 8; i++)
   1637                                         {
   1638                                             /*Sets the edges that lie on the tile boundary*/
   1639                                             if(au4_idx_t[i] != idx_t)
   1640                                             {
   1641                                                 au1_tile_slice_boundary[i] |= 1;
   1642                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
   1643                                             }
   1644                                         }
   1645                                     }
   1646                                 }
   1647                             }
   1648 
   1649                             for(i = 0; i < 8; i++)
   1650                             {
   1651                                 /*Sets the edges that lie on the slice/tile boundary*/
   1652                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1653                                 {
   1654                                     au1_avail_luma[i] = 0;
   1655                                 }
   1656                             }
   1657                         }
   1658                     }
   1659 
   1660 
   1661                     if(0 == ps_sao_ctxt->i4_ctb_x)
   1662                     {
   1663                         au1_avail_luma[0] = 0;
   1664                         au1_avail_luma[4] = 0;
   1665                         au1_avail_luma[6] = 0;
   1666                     }
   1667 
   1668                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
   1669                     {
   1670                         au1_avail_luma[1] = 0;
   1671                         au1_avail_luma[5] = 0;
   1672                         au1_avail_luma[7] = 0;
   1673                     }
   1674 
   1675                     if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
   1676                     {
   1677                         au1_avail_luma[2] = 0;
   1678                         au1_avail_luma[4] = 0;
   1679                         au1_avail_luma[5] = 0;
   1680                     }
   1681 
   1682                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1683                     {
   1684                         au1_avail_luma[3] = 0;
   1685                         au1_avail_luma[6] = 0;
   1686                         au1_avail_luma[7] = 0;
   1687                     }
   1688 
   1689                     {
   1690                         au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
   1691                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
   1692                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   1693                                                                           src_strd,
   1694                                                                           pu1_src_left_luma,
   1695                                                                           pu1_src_top_luma,
   1696                                                                           pu1_sao_src_luma_top_left_ctb,
   1697                                                                           au1_src_top_right,
   1698                                                                           &u1_sao_src_top_left_luma_bot_left,
   1699                                                                           au1_avail_luma,
   1700                                                                           ai1_offset_y,
   1701                                                                           sao_wd_luma,
   1702                                                                           sao_ht_luma);
   1703                     }
   1704                 }
   1705             }
   1706         }
   1707 
   1708         if(0 != sao_wd_chroma)
   1709         {
   1710             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
   1711             {
   1712                 if(0 == ps_sao->b3_cb_type_idx)
   1713                 {
   1714 
   1715                     for(row = 0; row < sao_ht_chroma; row++)
   1716                     {
   1717                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   1718                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   1719                     }
   1720                     pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   1721                     pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   1722 
   1723                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   1724 
   1725                 }
   1726 
   1727                 else if(1 == ps_sao->b3_cb_type_idx)
   1728                 {
   1729                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1730                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1731                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1732                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1733 
   1734                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1735                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1736                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1737                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1738 
   1739                     if(chroma_yuv420sp_vu)
   1740                     {
   1741                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1742                                                                                     src_strd,
   1743                                                                                     pu1_src_left_chroma,
   1744                                                                                     pu1_src_top_chroma,
   1745                                                                                     pu1_sao_src_chroma_top_left_ctb,
   1746                                                                                     ps_sao->b5_cr_band_pos,
   1747                                                                                     ps_sao->b5_cb_band_pos,
   1748                                                                                     ai1_offset_cr,
   1749                                                                                     ai1_offset_cb,
   1750                                                                                     sao_wd_chroma,
   1751                                                                                     sao_ht_chroma
   1752                                                                                    );
   1753                     }
   1754                     else
   1755                     {
   1756                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1757                                                                                     src_strd,
   1758                                                                                     pu1_src_left_chroma,
   1759                                                                                     pu1_src_top_chroma,
   1760                                                                                     pu1_sao_src_chroma_top_left_ctb,
   1761                                                                                     ps_sao->b5_cb_band_pos,
   1762                                                                                     ps_sao->b5_cr_band_pos,
   1763                                                                                     ai1_offset_cb,
   1764                                                                                     ai1_offset_cr,
   1765                                                                                     sao_wd_chroma,
   1766                                                                                     sao_ht_chroma
   1767                                                                                    );
   1768                     }
   1769                 }
   1770                 else // if(2 <= ps_sao->b3_cb_type_idx)
   1771                 {
   1772                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1773                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1774                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1775                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1776 
   1777                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1778                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1779                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1780                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1781 
   1782                     for(i = 0; i < 8; i++)
   1783                     {
   1784                         au1_avail_chroma[i] = 255;
   1785                         au1_tile_slice_boundary[i] = 0;
   1786                         au4_idx_t[i] = 0;
   1787                         au4_ilf_across_tile_slice_enable[i] = 1;
   1788                     }
   1789 
   1790                     {
   1791                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1792                         {
   1793                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
   1794                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
   1795 
   1796                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
   1797                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
   1798 
   1799                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
   1800                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
   1801 
   1802                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
   1803                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
   1804 
   1805                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
   1806                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
   1807 
   1808                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   1809                             {
   1810                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1811                                 {
   1812                                     au4_idx_t[0] = -1;
   1813                                     au4_idx_t[6] = -1;
   1814                                     au4_idx_t[4] = -1;
   1815                                 }
   1816                                 else
   1817                                 {
   1818                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1819                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1820                                 }
   1821                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1822                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1823                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1824                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1825 
   1826                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
   1827 
   1828                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1829                                 {
   1830                                     au4_ilf_across_tile_slice_enable[4] = 0;
   1831                                     au4_ilf_across_tile_slice_enable[6] = 0;
   1832                                     au4_ilf_across_tile_slice_enable[0] = 0;
   1833                                 }
   1834                                 else
   1835                                 {
   1836                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1837                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   1838                                 }
   1839 
   1840                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1841                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1842                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   1843                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   1844                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   1845                                 /*
   1846                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   1847                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   1848                                  */
   1849                                 for(i = 0; i < 8; i++)
   1850                                 {
   1851                                     /*Sets the edges that lie on the slice/tile boundary*/
   1852                                     if(au4_idx_t[i] != idx_t)
   1853                                     {
   1854                                         au1_tile_slice_boundary[i] = 1;
   1855                                     }
   1856                                     else
   1857                                     {
   1858                                         /*Indicates that the neighbour belongs to same/dependent slice*/
   1859                                         au4_ilf_across_tile_slice_enable[i] = 1;
   1860                                     }
   1861                                 }
   1862                                 /*Reset indices*/
   1863                                 for(i = 0; i < 8; i++)
   1864                                 {
   1865                                     au4_idx_t[i] = 0;
   1866                                 }
   1867                             }
   1868                             if(ps_pps->i1_tiles_enabled_flag)
   1869                             {
   1870                                 /* Calculate availability flags at slice boundary */
   1871                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1872                                 {
   1873                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1874                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1875                                     {
   1876                                         /*Calculate neighbor ctb slice indices*/
   1877                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   1878                                         {
   1879                                             au4_idx_t[0] = -1;
   1880                                             au4_idx_t[6] = -1;
   1881                                             au4_idx_t[4] = -1;
   1882                                         }
   1883                                         else
   1884                                         {
   1885                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1886                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1887                                         }
   1888                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1889                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1890                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1891                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1892 
   1893                                         for(i = 0; i < 8; i++)
   1894                                         {
   1895                                             /*Sets the edges that lie on the tile boundary*/
   1896                                             if(au4_idx_t[i] != idx_t)
   1897                                             {
   1898                                                 au1_tile_slice_boundary[i] |= 1;
   1899                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
   1900                                             }
   1901                                         }
   1902                                     }
   1903                                 }
   1904                             }
   1905                             for(i = 0; i < 8; i++)
   1906                             {
   1907                                 /*Sets the edges that lie on the slice/tile boundary*/
   1908                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1909                                 {
   1910                                     au1_avail_chroma[i] = 0;
   1911                                 }
   1912                             }
   1913 
   1914                         }
   1915                     }
   1916                     if(0 == ps_sao_ctxt->i4_ctb_x)
   1917                     {
   1918                         au1_avail_chroma[0] = 0;
   1919                         au1_avail_chroma[4] = 0;
   1920                         au1_avail_chroma[6] = 0;
   1921                     }
   1922 
   1923                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
   1924                     {
   1925                         au1_avail_chroma[1] = 0;
   1926                         au1_avail_chroma[5] = 0;
   1927                         au1_avail_chroma[7] = 0;
   1928                     }
   1929 
   1930                     if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
   1931                     {
   1932                         au1_avail_chroma[2] = 0;
   1933                         au1_avail_chroma[4] = 0;
   1934                         au1_avail_chroma[5] = 0;
   1935                     }
   1936 
   1937                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1938                     {
   1939                         au1_avail_chroma[3] = 0;
   1940                         au1_avail_chroma[6] = 0;
   1941                         au1_avail_chroma[7] = 0;
   1942                     }
   1943 
   1944                     {
   1945                         au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
   1946                         au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
   1947                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   1948                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   1949 
   1950                         if(chroma_yuv420sp_vu)
   1951                         {
   1952                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   1953                                                                                  src_strd,
   1954                                                                                  pu1_src_left_chroma,
   1955                                                                                  pu1_src_top_chroma,
   1956                                                                                  pu1_sao_src_chroma_top_left_ctb,
   1957                                                                                  au1_src_top_right,
   1958                                                                                  au1_sao_src_top_left_chroma_bot_left,
   1959                                                                                  au1_avail_chroma,
   1960                                                                                  ai1_offset_cr,
   1961                                                                                  ai1_offset_cb,
   1962                                                                                  sao_wd_chroma,
   1963                                                                                  sao_ht_chroma);
   1964                         }
   1965                         else
   1966                         {
   1967                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   1968                                                                                  src_strd,
   1969                                                                                  pu1_src_left_chroma,
   1970                                                                                  pu1_src_top_chroma,
   1971                                                                                  pu1_sao_src_chroma_top_left_ctb,
   1972                                                                                  au1_src_top_right,
   1973                                                                                  au1_sao_src_top_left_chroma_bot_left,
   1974                                                                                  au1_avail_chroma,
   1975                                                                                  ai1_offset_cb,
   1976                                                                                  ai1_offset_cr,
   1977                                                                                  sao_wd_chroma,
   1978                                                                                  sao_ht_chroma);
   1979                         }
   1980                     }
   1981 
   1982                 }
   1983             }
   1984         }
   1985 
   1986         pu1_src_luma += sao_ht_luma * src_strd;
   1987         pu1_src_chroma += sao_ht_chroma * src_strd;
   1988         ps_sao += (ps_sps->i2_pic_wd_in_ctb);
   1989     }
   1990 
   1991     /* Left CTB */
   1992     if(ps_sao_ctxt->i4_ctb_x > 0)
   1993     {
   1994         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
   1995         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
   1996         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
   1997         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
   1998 
   1999         WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
   2000         WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
   2001         WORD32 au4_idx_l[8], idx_l;
   2002 
   2003         WORD32 remaining_rows;
   2004         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
   2005         if(remaining_rows <= SAO_SHIFT_CTB)
   2006         {
   2007             sao_ht_luma += remaining_rows;
   2008         }
   2009         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
   2010         if(remaining_rows <= SAO_SHIFT_CTB)
   2011         {
   2012             sao_ht_chroma += remaining_rows;
   2013         }
   2014 
   2015         pu1_src_luma -= sao_wd_luma;
   2016         pu1_src_chroma -= sao_wd_chroma;
   2017         ps_sao -= 1;
   2018         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
   2019         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
   2020         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2021         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2022 
   2023 
   2024         if(0 != sao_ht_luma)
   2025         {
   2026             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
   2027             {
   2028                 if(0 == ps_sao->b3_y_type_idx)
   2029                 {
   2030                     /* Update left, top and top-left */
   2031                     for(row = 0; row < sao_ht_luma; row++)
   2032                     {
   2033                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2034                     }
   2035                     /*Update in next location*/
   2036                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2037 
   2038                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2039 
   2040                 }
   2041 
   2042                 else if(1 == ps_sao->b3_y_type_idx)
   2043                 {
   2044                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2045                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2046                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2047                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2048 
   2049                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
   2050                                                                               src_strd,
   2051                                                                               pu1_src_left_luma,
   2052                                                                               pu1_src_top_luma,
   2053                                                                               pu1_sao_src_top_left_luma_curr_ctb,
   2054                                                                               ps_sao->b5_y_band_pos,
   2055                                                                               ai1_offset_y,
   2056                                                                               sao_wd_luma,
   2057                                                                               sao_ht_luma
   2058                                                                              );
   2059                 }
   2060 
   2061                 else // if(2 <= ps_sao->b3_y_type_idx)
   2062                 {
   2063                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2064                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2065                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2066                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2067 
   2068                     for(i = 0; i < 8; i++)
   2069                     {
   2070                         au1_avail_luma[i] = 255;
   2071                         au1_tile_slice_boundary[i] = 0;
   2072                         au4_idx_l[i] = 0;
   2073                         au4_ilf_across_tile_slice_enable[i] = 1;
   2074                     }
   2075                     /******************************************************************
   2076                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
   2077                      *
   2078                      *
   2079                      *          ____________
   2080                      *         |    |       |
   2081                      *         | L_T|       |
   2082                      *         |____|_______|____
   2083                      *         |    |       |    |
   2084                      *     L_L |  L |  L_R  |    |
   2085                      *         |____|_______|    |
   2086                      *              |            |
   2087                      *          L_D |            |
   2088                      *              |____________|
   2089                      *
   2090                      *****************************************************************/
   2091 
   2092                     /*In case of slices or tiles*/
   2093                     {
   2094                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2095                         {
   2096                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
   2097                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
   2098 
   2099                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
   2100                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
   2101 
   2102                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
   2103                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
   2104 
   2105                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
   2106                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
   2107 
   2108                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
   2109                             ctby_l = ps_sao_ctxt->i4_ctb_y;
   2110 
   2111                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2112                             {
   2113                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2114                                 {
   2115                                     au4_idx_l[2] = -1;
   2116                                     au4_idx_l[4] = -1;
   2117                                     au4_idx_l[5] = -1;
   2118                                 }
   2119                                 else
   2120                                 {
   2121                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2122                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2123                                 }
   2124                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2125                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2126                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2127                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2128 
   2129                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
   2130                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2131                                 {
   2132                                     au4_ilf_across_tile_slice_enable[2] = 0;
   2133                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2134                                     au4_ilf_across_tile_slice_enable[5] = 0;
   2135                                 }
   2136                                 else
   2137                                 {
   2138                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
   2139                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   2140 
   2141                                 }
   2142                                 //TODO: ILF flag checks for [0] and [6] is missing.
   2143                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   2144                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   2145                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   2146                                 /*
   2147                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   2148                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   2149                                  */
   2150                                 for(i = 0; i < 8; i++)
   2151                                 {
   2152                                     /*Sets the edges that lie on the slice/tile boundary*/
   2153                                     if(au4_idx_l[i] != idx_l)
   2154                                     {
   2155                                         au1_tile_slice_boundary[i] = 1;
   2156                                     }
   2157                                     else
   2158                                     {
   2159                                         au4_ilf_across_tile_slice_enable[i] = 1;
   2160                                     }
   2161                                 }
   2162                                 /*Reset indices*/
   2163                                 for(i = 0; i < 8; i++)
   2164                                 {
   2165                                     au4_idx_l[i] = 0;
   2166                                 }
   2167                             }
   2168 
   2169                             if(ps_pps->i1_tiles_enabled_flag)
   2170                             {
   2171                                 /* Calculate availability flags at slice boundary */
   2172                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   2173                                 {
   2174                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   2175                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   2176                                     {
   2177                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   2178                                         {
   2179                                             au4_idx_l[2] = -1;
   2180                                             au4_idx_l[4] = -1;
   2181                                             au4_idx_l[5] = -1;
   2182                                         }
   2183                                         else
   2184                                         {
   2185                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2186                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2187                                         }
   2188 
   2189                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2190                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2191                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2192                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2193 
   2194                                         for(i = 0; i < 8; i++)
   2195                                         {
   2196                                             /*Sets the edges that lie on the slice/tile boundary*/
   2197                                             if(au4_idx_l[i] != idx_l)
   2198                                             {
   2199                                                 au1_tile_slice_boundary[i] |= 1;
   2200                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
   2201                                             }
   2202                                         }
   2203                                     }
   2204                                 }
   2205                             }
   2206 
   2207                             for(i = 0; i < 8; i++)
   2208                             {
   2209                                 /*Sets the edges that lie on the slice/tile boundary*/
   2210                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   2211                                 {
   2212                                     au1_avail_luma[i] = 0;
   2213                                 }
   2214                             }
   2215                         }
   2216                     }
   2217                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
   2218                     {
   2219                         au1_avail_luma[0] = 0;
   2220                         au1_avail_luma[4] = 0;
   2221                         au1_avail_luma[6] = 0;
   2222                     }
   2223                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   2224                     {
   2225                         au1_avail_luma[1] = 0;
   2226                         au1_avail_luma[5] = 0;
   2227                         au1_avail_luma[7] = 0;
   2228                     }
   2229 
   2230                     if(0 == ps_sao_ctxt->i4_ctb_y)
   2231                     {
   2232                         au1_avail_luma[2] = 0;
   2233                         au1_avail_luma[4] = 0;
   2234                         au1_avail_luma[5] = 0;
   2235                     }
   2236 
   2237                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
   2238                     {
   2239                         au1_avail_luma[3] = 0;
   2240                         au1_avail_luma[6] = 0;
   2241                         au1_avail_luma[7] = 0;
   2242                     }
   2243 
   2244                     {
   2245                         au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
   2246                         u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
   2247                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   2248                                                                           src_strd,
   2249                                                                           pu1_src_left_luma,
   2250                                                                           pu1_src_top_luma,
   2251                                                                           pu1_sao_src_top_left_luma_curr_ctb,
   2252                                                                           au1_src_top_right,
   2253                                                                           &u1_sao_src_top_left_luma_bot_left,
   2254                                                                           au1_avail_luma,
   2255                                                                           ai1_offset_y,
   2256                                                                           sao_wd_luma,
   2257                                                                           sao_ht_luma);
   2258                     }
   2259 
   2260                 }
   2261             }
   2262         }
   2263 
   2264         if(0 != sao_ht_chroma)
   2265         {
   2266             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
   2267             {
   2268                 if(0 == ps_sao->b3_cb_type_idx)
   2269                 {
   2270                     for(row = 0; row < sao_ht_chroma; row++)
   2271                     {
   2272                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   2273                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   2274                     }
   2275                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   2276                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   2277 
   2278                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   2279                 }
   2280 
   2281                 else if(1 == ps_sao->b3_cb_type_idx)
   2282                 {
   2283                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   2284                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   2285                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   2286                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   2287 
   2288                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   2289                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   2290                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   2291                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   2292 
   2293                     if(chroma_yuv420sp_vu)
   2294                     {
   2295                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   2296                                                                                     src_strd,
   2297                                                                                     pu1_src_left_chroma,
   2298                                                                                     pu1_src_top_chroma,
   2299                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   2300                                                                                     ps_sao->b5_cr_band_pos,
   2301                                                                                     ps_sao->b5_cb_band_pos,
   2302                                                                                     ai1_offset_cr,
   2303                                                                                     ai1_offset_cb,
   2304                                                                                     sao_wd_chroma,
   2305                                                                                     sao_ht_chroma
   2306                                                                                    );
   2307                     }
   2308                     else
   2309                     {
   2310                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   2311                                                                                     src_strd,
   2312                                                                                     pu1_src_left_chroma,
   2313                                                                                     pu1_src_top_chroma,
   2314                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   2315                                                                                     ps_sao->b5_cb_band_pos,
   2316                                                                                     ps_sao->b5_cr_band_pos,
   2317                                                                                     ai1_offset_cb,
   2318                                                                                     ai1_offset_cr,
   2319                                                                                     sao_wd_chroma,
   2320                                                                                     sao_ht_chroma
   2321                                                                                    );
   2322                     }
   2323                 }
   2324 
   2325                 else // if(2 <= ps_sao->b3_cb_type_idx)
   2326                 {
   2327                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   2328                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   2329                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   2330                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   2331 
   2332                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   2333                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   2334                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   2335                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   2336 
   2337                     for(i = 0; i < 8; i++)
   2338                     {
   2339                         au1_avail_chroma[i] = 255;
   2340                         au1_tile_slice_boundary[i] = 0;
   2341                         au4_idx_l[i] = 0;
   2342                         au4_ilf_across_tile_slice_enable[i] = 1;
   2343                     }
   2344                     /*In case of slices*/
   2345                     {
   2346                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2347                         {
   2348                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
   2349                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
   2350 
   2351                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
   2352                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
   2353 
   2354                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
   2355                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
   2356 
   2357                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
   2358                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
   2359 
   2360                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
   2361                             ctby_l = ps_sao_ctxt->i4_ctb_y;
   2362 
   2363                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2364                             {
   2365                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2366                                 {
   2367                                     au4_idx_l[2] = -1;
   2368                                     au4_idx_l[4] = -1;
   2369                                     au4_idx_l[5] = -1;
   2370                                 }
   2371                                 else
   2372                                 {
   2373                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2374                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2375                                 }
   2376                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2377                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2378                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2379                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2380 
   2381                                 /*Verify that the neighbour ctbs dont cross pic boundary.*/
   2382                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2383                                 {
   2384                                     au4_ilf_across_tile_slice_enable[2] = 0;
   2385                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2386                                     au4_ilf_across_tile_slice_enable[5] = 0;
   2387                                 }
   2388                                 else
   2389                                 {
   2390                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
   2391                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   2392                                 }
   2393                                 //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
   2394                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   2395                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   2396                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   2397                                 /*
   2398                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   2399                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   2400                                  */
   2401                                 for(i = 0; i < 8; i++)
   2402                                 {
   2403                                     /*Sets the edges that lie on the slice/tile boundary*/
   2404                                     if(au4_idx_l[i] != idx_l)
   2405                                     {
   2406                                         au1_tile_slice_boundary[i] = 1;
   2407                                     }
   2408                                     else
   2409                                     {
   2410                                         au4_ilf_across_tile_slice_enable[i] = 1;
   2411                                     }
   2412                                 }
   2413                                 /*Reset indices*/
   2414                                 for(i = 0; i < 8; i++)
   2415                                 {
   2416                                     au4_idx_l[i] = 0;
   2417                                 }
   2418                             }
   2419                             if(ps_pps->i1_tiles_enabled_flag)
   2420                             {
   2421                                 /* Calculate availability flags at slice boundary */
   2422                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   2423                                 {
   2424                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   2425                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   2426                                     {
   2427                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   2428                                         {
   2429                                             au4_idx_l[2] = -1;
   2430                                             au4_idx_l[4] = -1;
   2431                                             au4_idx_l[5] = -1;
   2432                                         }
   2433                                         else
   2434                                         {
   2435                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2436                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2437                                         }
   2438 
   2439                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2440                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2441                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2442                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2443 
   2444                                         for(i = 0; i < 8; i++)
   2445                                         {
   2446                                             /*Sets the edges that lie on the slice/tile boundary*/
   2447                                             if(au4_idx_l[i] != idx_l)
   2448                                             {
   2449                                                 au1_tile_slice_boundary[i] |= 1;
   2450                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   2451                                             }
   2452                                         }
   2453                                     }
   2454                                 }
   2455                             }
   2456                             for(i = 0; i < 8; i++)
   2457                             {
   2458                                 /*Sets the edges that lie on the slice/tile boundary*/
   2459                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   2460                                 {
   2461                                     au1_avail_chroma[i] = 0;
   2462                                 }
   2463                             }
   2464                         }
   2465                     }
   2466                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
   2467                     {
   2468                         au1_avail_chroma[0] = 0;
   2469                         au1_avail_chroma[4] = 0;
   2470                         au1_avail_chroma[6] = 0;
   2471                     }
   2472 
   2473                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   2474                     {
   2475                         au1_avail_chroma[1] = 0;
   2476                         au1_avail_chroma[5] = 0;
   2477                         au1_avail_chroma[7] = 0;
   2478                     }
   2479 
   2480                     if(0 == ps_sao_ctxt->i4_ctb_y)
   2481                     {
   2482                         au1_avail_chroma[2] = 0;
   2483                         au1_avail_chroma[4] = 0;
   2484                         au1_avail_chroma[5] = 0;
   2485                     }
   2486 
   2487                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
   2488                     {
   2489                         au1_avail_chroma[3] = 0;
   2490                         au1_avail_chroma[6] = 0;
   2491                         au1_avail_chroma[7] = 0;
   2492                     }
   2493 
   2494                     {
   2495                         au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
   2496                         au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
   2497                         au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
   2498                         au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
   2499                         //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   2500                         //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   2501                         if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
   2502                         {
   2503                             au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
   2504                             au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
   2505                         }
   2506 
   2507 
   2508                         if(chroma_yuv420sp_vu)
   2509                         {
   2510                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2511                                                                                  src_strd,
   2512                                                                                  pu1_src_left_chroma,
   2513                                                                                  pu1_src_top_chroma,
   2514                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   2515                                                                                  au1_src_top_right,
   2516                                                                                  au1_src_bot_left,
   2517                                                                                  au1_avail_chroma,
   2518                                                                                  ai1_offset_cr,
   2519                                                                                  ai1_offset_cb,
   2520                                                                                  sao_wd_chroma,
   2521                                                                                  sao_ht_chroma);
   2522                         }
   2523                         else
   2524                         {
   2525                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2526                                                                                  src_strd,
   2527                                                                                  pu1_src_left_chroma,
   2528                                                                                  pu1_src_top_chroma,
   2529                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   2530                                                                                  au1_src_top_right,
   2531                                                                                  au1_src_bot_left,
   2532                                                                                  au1_avail_chroma,
   2533                                                                                  ai1_offset_cb,
   2534                                                                                  ai1_offset_cr,
   2535                                                                                  sao_wd_chroma,
   2536                                                                                  sao_ht_chroma);
   2537                         }
   2538                     }
   2539 
   2540                 }
   2541             }
   2542 
   2543         }
   2544         pu1_src_luma += sao_wd_luma;
   2545         pu1_src_chroma += sao_wd_chroma;
   2546         ps_sao += 1;
   2547     }
   2548 
   2549 
   2550     /* Current CTB */
   2551     {
   2552         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
   2553         WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
   2554         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
   2555         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
   2556         WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
   2557         WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
   2558         WORD32 au4_idx_c[8], idx_c;
   2559 
   2560         WORD32 remaining_rows;
   2561         WORD32 remaining_cols;
   2562 
   2563         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
   2564         if(remaining_cols <= SAO_SHIFT_CTB)
   2565         {
   2566             sao_wd_luma += remaining_cols;
   2567         }
   2568         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
   2569         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
   2570         {
   2571             sao_wd_chroma += remaining_cols;
   2572         }
   2573 
   2574         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
   2575         if(remaining_rows <= SAO_SHIFT_CTB)
   2576         {
   2577             sao_ht_luma += remaining_rows;
   2578         }
   2579         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
   2580         if(remaining_rows <= SAO_SHIFT_CTB)
   2581         {
   2582             sao_ht_chroma += remaining_rows;
   2583         }
   2584 
   2585         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   2586         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   2587         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2588         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2589 
   2590         if((0 != sao_wd_luma) && (0 != sao_ht_luma))
   2591         {
   2592             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
   2593             {
   2594                 if(0 == ps_sao->b3_y_type_idx)
   2595                 {
   2596                     /* Update left, top and top-left */
   2597                     for(row = 0; row < sao_ht_luma; row++)
   2598                     {
   2599                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2600                     }
   2601                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2602 
   2603                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2604 
   2605                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
   2606 
   2607                 }
   2608 
   2609                 else if(1 == ps_sao->b3_y_type_idx)
   2610                 {
   2611                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2612                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2613                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2614                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2615 
   2616                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
   2617                                                                               src_strd,
   2618                                                                               pu1_src_left_luma,
   2619                                                                               pu1_src_top_luma,
   2620                                                                               pu1_sao_src_top_left_luma_curr_ctb,
   2621                                                                               ps_sao->b5_y_band_pos,
   2622                                                                               ai1_offset_y,
   2623                                                                               sao_wd_luma,
   2624                                                                               sao_ht_luma
   2625                                                                              );
   2626                 }
   2627 
   2628                 else // if(2 <= ps_sao->b3_y_type_idx)
   2629                 {
   2630                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2631                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2632                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2633                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2634 
   2635                     for(i = 0; i < 8; i++)
   2636                     {
   2637                         au1_avail_luma[i] = 255;
   2638                         au1_tile_slice_boundary[i] = 0;
   2639                         au4_idx_c[i] = 0;
   2640                         au4_ilf_across_tile_slice_enable[i] = 1;
   2641                     }
   2642                     /******************************************************************
   2643                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
   2644                      *
   2645                      *
   2646                      *          ____________
   2647                      *         |    |       |
   2648                      *         |    | C_T   |
   2649                      *         |____|_______|____
   2650                      *         |    |       |    |
   2651                      *         | C_L|   C   | C_R|
   2652                      *         |____|_______|    |
   2653                      *              |  C_D       |
   2654                      *              |            |
   2655                      *              |____________|
   2656                      *
   2657                      *****************************************************************/
   2658 
   2659                     /*In case of slices*/
   2660                     {
   2661                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2662                         {
   2663                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
   2664                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
   2665 
   2666                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
   2667                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
   2668 
   2669                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
   2670                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
   2671 
   2672                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
   2673                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
   2674 
   2675                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
   2676                             ctby_c = ps_sao_ctxt->i4_ctb_y;
   2677 
   2678                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2679                             {
   2680                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   2681                                 {
   2682                                     au4_idx_c[6] = -1;
   2683                                     au4_idx_c[0] = -1;
   2684                                     au4_idx_c[4] = -1;
   2685                                 }
   2686                                 else
   2687                                 {
   2688                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   2689                                 }
   2690 
   2691                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2692                                 {
   2693                                     au4_idx_c[2] = -1;
   2694                                     au4_idx_c[5] = -1;
   2695                                     au4_idx_c[4] = -1;
   2696                                 }
   2697                                 else
   2698                                 {
   2699                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   2700                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   2701                                 }
   2702                                 idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   2703                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   2704                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   2705 
   2706                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   2707                                 {
   2708                                     au4_ilf_across_tile_slice_enable[6] = 0;
   2709                                     au4_ilf_across_tile_slice_enable[0] = 0;
   2710                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2711                                 }
   2712                                 else
   2713                                 {
   2714                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   2715                                     au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
   2716                                 }
   2717                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2718                                 {
   2719                                     au4_ilf_across_tile_slice_enable[2] = 0;
   2720                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2721                                     au4_ilf_across_tile_slice_enable[5] = 0;
   2722                                 }
   2723                                 else
   2724                                 {
   2725                                     au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   2726                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   2727                                 }
   2728                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   2729                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   2730                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   2731 
   2732                                 /*
   2733                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   2734                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   2735                                  */
   2736                                 for(i = 0; i < 8; i++)
   2737                                 {
   2738                                     /*Sets the edges that lie on the slice/tile boundary*/
   2739                                     if(au4_idx_c[i] != idx_c)
   2740                                     {
   2741                                         au1_tile_slice_boundary[i] = 1;
   2742                                     }
   2743                                     else
   2744                                     {
   2745                                         au4_ilf_across_tile_slice_enable[i] = 1;
   2746                                     }
   2747                                 }
   2748                                 /*Reset indices*/
   2749                                 for(i = 0; i < 8; i++)
   2750                                 {
   2751                                     au4_idx_c[i] = 0;
   2752                                 }
   2753                             }
   2754 
   2755                             if(ps_pps->i1_tiles_enabled_flag)
   2756                             {
   2757                                 /* Calculate availability flags at slice boundary */
   2758                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   2759                                 {
   2760                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   2761                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   2762                                     {
   2763                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   2764                                         {
   2765                                             au4_idx_c[6] = -1;
   2766                                             au4_idx_c[0] = -1;
   2767                                             au4_idx_c[4] = -1;
   2768                                         }
   2769                                         else
   2770                                         {
   2771                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   2772                                         }
   2773 
   2774                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   2775                                         {
   2776                                             au4_idx_c[2] = -1;
   2777                                             au4_idx_c[5] = -1;
   2778                                             au4_idx_c[4] = -1;
   2779                                         }
   2780                                         else
   2781                                         {
   2782                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   2783                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   2784                                         }
   2785                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   2786                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   2787                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   2788 
   2789                                         for(i = 0; i < 8; i++)
   2790                                         {
   2791                                             /*Sets the edges that lie on the slice/tile boundary*/
   2792                                             if(au4_idx_c[i] != idx_c)
   2793                                             {
   2794                                                 au1_tile_slice_boundary[i] |= 1;
   2795                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   2796                                             }
   2797                                         }
   2798                                     }
   2799                                 }
   2800                             }
   2801 
   2802                             for(i = 0; i < 8; i++)
   2803                             {
   2804                                 /*Sets the edges that lie on the slice/tile boundary*/
   2805                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   2806                                 {
   2807                                     au1_avail_luma[i] = 0;
   2808                                 }
   2809                             }
   2810 
   2811                         }
   2812                     }
   2813                     if(0 == ps_sao_ctxt->i4_ctb_x)
   2814                     {
   2815                         au1_avail_luma[0] = 0;
   2816                         au1_avail_luma[4] = 0;
   2817                         au1_avail_luma[6] = 0;
   2818                     }
   2819 
   2820                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
   2821                     {
   2822                         au1_avail_luma[1] = 0;
   2823                         au1_avail_luma[5] = 0;
   2824                         au1_avail_luma[7] = 0;
   2825                     }
   2826 
   2827                     if(0 == ps_sao_ctxt->i4_ctb_y)
   2828                     {
   2829                         au1_avail_luma[2] = 0;
   2830                         au1_avail_luma[4] = 0;
   2831                         au1_avail_luma[5] = 0;
   2832                     }
   2833 
   2834                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
   2835                     {
   2836                         au1_avail_luma[3] = 0;
   2837                         au1_avail_luma[6] = 0;
   2838                         au1_avail_luma[7] = 0;
   2839                     }
   2840 
   2841                     {
   2842                         au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
   2843                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
   2844 
   2845                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   2846                                                                           src_strd,
   2847                                                                           pu1_src_left_luma,
   2848                                                                           pu1_src_top_luma,
   2849                                                                           pu1_sao_src_top_left_luma_curr_ctb,
   2850                                                                           au1_src_top_right,
   2851                                                                           &u1_sao_src_top_left_luma_bot_left,
   2852                                                                           au1_avail_luma,
   2853                                                                           ai1_offset_y,
   2854                                                                           sao_wd_luma,
   2855                                                                           sao_ht_luma);
   2856                     }
   2857                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
   2858                     pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
   2859                 }
   2860             }
   2861         }
   2862 
   2863         if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
   2864         {
   2865             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
   2866             {
   2867                 if(0 == ps_sao->b3_cb_type_idx)
   2868                 {
   2869                     for(row = 0; row < sao_ht_chroma; row++)
   2870                     {
   2871                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   2872                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   2873                     }
   2874                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   2875                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   2876 
   2877                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   2878 
   2879                     pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
   2880                     pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
   2881                 }
   2882 
   2883                 else if(1 == ps_sao->b3_cb_type_idx)
   2884                 {
   2885                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   2886                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   2887                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   2888                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   2889 
   2890                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   2891                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   2892                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   2893                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   2894 
   2895                     if(chroma_yuv420sp_vu)
   2896                     {
   2897                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   2898                                                                                     src_strd,
   2899                                                                                     pu1_src_left_chroma,
   2900                                                                                     pu1_src_top_chroma,
   2901                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   2902                                                                                     ps_sao->b5_cr_band_pos,
   2903                                                                                     ps_sao->b5_cb_band_pos,
   2904                                                                                     ai1_offset_cr,
   2905                                                                                     ai1_offset_cb,
   2906                                                                                     sao_wd_chroma,
   2907                                                                                     sao_ht_chroma
   2908                                                                                    );
   2909                     }
   2910                     else
   2911                     {
   2912                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   2913                                                                                     src_strd,
   2914                                                                                     pu1_src_left_chroma,
   2915                                                                                     pu1_src_top_chroma,
   2916                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   2917                                                                                     ps_sao->b5_cb_band_pos,
   2918                                                                                     ps_sao->b5_cr_band_pos,
   2919                                                                                     ai1_offset_cb,
   2920                                                                                     ai1_offset_cr,
   2921                                                                                     sao_wd_chroma,
   2922                                                                                     sao_ht_chroma
   2923                                                                                    );
   2924                     }
   2925                 }
   2926 
   2927                 else // if(2 <= ps_sao->b3_cb_type_idx)
   2928                 {
   2929                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   2930                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   2931                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   2932                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   2933 
   2934                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   2935                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   2936                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   2937                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   2938 
   2939                     for(i = 0; i < 8; i++)
   2940                     {
   2941                         au1_avail_chroma[i] = 255;
   2942                         au1_tile_slice_boundary[i] = 0;
   2943                         au4_idx_c[i] = 0;
   2944                         au4_ilf_across_tile_slice_enable[i] = 1;
   2945                     }
   2946                     {
   2947                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2948                         {
   2949                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
   2950                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
   2951 
   2952                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
   2953                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
   2954 
   2955                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
   2956                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
   2957 
   2958                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
   2959                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
   2960 
   2961                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
   2962                             ctby_c = ps_sao_ctxt->i4_ctb_y;
   2963 
   2964                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2965                             {
   2966                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   2967                                 {
   2968                                     au4_idx_c[0] = -1;
   2969                                     au4_idx_c[4] = -1;
   2970                                     au4_idx_c[6] = -1;
   2971                                 }
   2972                                 else
   2973                                 {
   2974                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   2975                                 }
   2976 
   2977                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2978                                 {
   2979                                     au4_idx_c[2] = -1;
   2980                                     au4_idx_c[4] = -1;
   2981                                     au4_idx_c[5] = -1;
   2982                                 }
   2983                                 else
   2984                                 {
   2985                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   2986                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   2987                                 }
   2988                                 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   2989                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   2990                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   2991 
   2992                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   2993                                 {
   2994                                     au4_ilf_across_tile_slice_enable[0] = 0;
   2995                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2996                                     au4_ilf_across_tile_slice_enable[6] = 0;
   2997                                 }
   2998                                 else
   2999                                 {
   3000                                     au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   3001                                     au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   3002                                 }
   3003 
   3004                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   3005                                 {
   3006                                     au4_ilf_across_tile_slice_enable[2] = 0;
   3007                                     au4_ilf_across_tile_slice_enable[4] = 0;
   3008                                     au4_ilf_across_tile_slice_enable[5] = 0;
   3009                                 }
   3010                                 else
   3011                                 {
   3012                                     au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   3013                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   3014                                 }
   3015 
   3016                                 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   3017                                 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   3018                                 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   3019 
   3020                                 /*
   3021                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   3022                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   3023                                  */
   3024                                 for(i = 0; i < 8; i++)
   3025                                 {
   3026                                     /*Sets the edges that lie on the slice/tile boundary*/
   3027                                     if(au4_idx_c[i] != idx_c)
   3028                                     {
   3029                                         au1_tile_slice_boundary[i] = 1;
   3030                                     }
   3031                                     else
   3032                                     {
   3033                                         au4_ilf_across_tile_slice_enable[i] = 1;
   3034                                     }
   3035                                 }
   3036                                 /*Reset indices*/
   3037                                 for(i = 0; i < 8; i++)
   3038                                 {
   3039                                     au4_idx_c[i] = 0;
   3040                                 }
   3041                             }
   3042 
   3043                             if(ps_pps->i1_tiles_enabled_flag)
   3044                             {
   3045                                 /* Calculate availability flags at slice boundary */
   3046                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   3047                                 {
   3048                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   3049                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   3050                                     {
   3051                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   3052                                         {
   3053                                             au4_idx_c[6] = -1;
   3054                                             au4_idx_c[0] = -1;
   3055                                             au4_idx_c[4] = -1;
   3056                                         }
   3057                                         else
   3058                                         {
   3059                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   3060                                         }
   3061 
   3062                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   3063                                         {
   3064                                             au4_idx_c[2] = -1;
   3065                                             au4_idx_c[5] = -1;
   3066                                             au4_idx_c[4] = -1;
   3067                                         }
   3068                                         else
   3069                                         {
   3070                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   3071                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   3072                                         }
   3073                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   3074                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   3075                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   3076 
   3077                                         for(i = 0; i < 8; i++)
   3078                                         {
   3079                                             /*Sets the edges that lie on the slice/tile boundary*/
   3080                                             if(au4_idx_c[i] != idx_c)
   3081                                             {
   3082                                                 au1_tile_slice_boundary[i] |= 1;
   3083                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   3084                                             }
   3085                                         }
   3086                                     }
   3087                                 }
   3088                             }
   3089 
   3090                             for(i = 0; i < 8; i++)
   3091                             {
   3092                                 /*Sets the edges that lie on the slice/tile boundary*/
   3093                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   3094                                 {
   3095                                     au1_avail_chroma[i] = 0;
   3096                                 }
   3097                             }
   3098                         }
   3099                     }
   3100 
   3101                     if(0 == ps_sao_ctxt->i4_ctb_x)
   3102                     {
   3103                         au1_avail_chroma[0] = 0;
   3104                         au1_avail_chroma[4] = 0;
   3105                         au1_avail_chroma[6] = 0;
   3106                     }
   3107 
   3108                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
   3109                     {
   3110                         au1_avail_chroma[1] = 0;
   3111                         au1_avail_chroma[5] = 0;
   3112                         au1_avail_chroma[7] = 0;
   3113                     }
   3114 
   3115                     if(0 == ps_sao_ctxt->i4_ctb_y)
   3116                     {
   3117                         au1_avail_chroma[2] = 0;
   3118                         au1_avail_chroma[4] = 0;
   3119                         au1_avail_chroma[5] = 0;
   3120                     }
   3121 
   3122                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
   3123                     {
   3124                         au1_avail_chroma[3] = 0;
   3125                         au1_avail_chroma[6] = 0;
   3126                         au1_avail_chroma[7] = 0;
   3127                     }
   3128 
   3129                     {
   3130                         au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
   3131                         au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
   3132 
   3133                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   3134                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   3135 
   3136                         if(chroma_yuv420sp_vu)
   3137                         {
   3138                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   3139                                                                                  src_strd,
   3140                                                                                  pu1_src_left_chroma,
   3141                                                                                  pu1_src_top_chroma,
   3142                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   3143                                                                                  au1_src_top_right,
   3144                                                                                  au1_sao_src_top_left_chroma_bot_left,
   3145                                                                                  au1_avail_chroma,
   3146                                                                                  ai1_offset_cr,
   3147                                                                                  ai1_offset_cb,
   3148                                                                                  sao_wd_chroma,
   3149                                                                                  sao_ht_chroma);
   3150                         }
   3151                         else
   3152                         {
   3153                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   3154                                                                                  src_strd,
   3155                                                                                  pu1_src_left_chroma,
   3156                                                                                  pu1_src_top_chroma,
   3157                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   3158                                                                                  au1_src_top_right,
   3159                                                                                  au1_sao_src_top_left_chroma_bot_left,
   3160                                                                                  au1_avail_chroma,
   3161                                                                                  ai1_offset_cb,
   3162                                                                                  ai1_offset_cr,
   3163                                                                                  sao_wd_chroma,
   3164                                                                                  sao_ht_chroma);
   3165                         }
   3166                     }
   3167 
   3168                 }
   3169                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
   3170                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
   3171 
   3172                 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
   3173                 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
   3174             }
   3175 
   3176         }
   3177     }
   3178 
   3179 
   3180 
   3181 
   3182 /* If no loop filter is enabled copy the backed up values */
   3183     {
   3184         /* Luma */
   3185         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && no_loop_filter_enabled_luma)
   3186         {
   3187             UWORD32 u4_no_loop_filter_flag;
   3188             WORD32 loop_filter_bit_pos;
   3189             WORD32 log2_min_cu = 3;
   3190             WORD32 min_cu = (1 << log2_min_cu);
   3191             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
   3192             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
   3193             WORD32 sao_blk_wd = ctb_size;
   3194             WORD32 remaining_rows;
   3195             WORD32 remaining_cols;
   3196 
   3197             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
   3198             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
   3199             if(remaining_rows <= SAO_SHIFT_CTB)
   3200                 sao_blk_ht += remaining_rows;
   3201             if(remaining_cols <= SAO_SHIFT_CTB)
   3202                 sao_blk_wd += remaining_cols;
   3203 
   3204             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
   3205             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
   3206 
   3207             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
   3208 
   3209             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
   3210                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
   3211             if(ps_sao_ctxt->i4_ctb_x > 0)
   3212                 loop_filter_bit_pos -= 1;
   3213 
   3214             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
   3215                             (loop_filter_bit_pos >> 3);
   3216 
   3217             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
   3218                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
   3219             {
   3220                 WORD32 tmp_wd = sao_blk_wd;
   3221 
   3222                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
   3223                                 (loop_filter_bit_pos & 7);
   3224                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
   3225 
   3226                 if(u4_no_loop_filter_flag)
   3227                 {
   3228                     while(tmp_wd > 0)
   3229                     {
   3230                         if(CTZ(u4_no_loop_filter_flag))
   3231                         {
   3232                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3233                             pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3234                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
   3235                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
   3236                         }
   3237                         else
   3238                         {
   3239                             for(row = 0; row < min_cu; row++)
   3240                             {
   3241                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
   3242                                 {
   3243                                     pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
   3244                                 }
   3245                             }
   3246                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3247                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3248                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
   3249                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
   3250                         }
   3251                     }
   3252 
   3253                     pu1_src_tmp_luma -= sao_blk_wd;
   3254                     pu1_src_backup_luma -= sao_blk_wd;
   3255                 }
   3256 
   3257                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
   3258                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
   3259             }
   3260         }
   3261 
   3262         /* Chroma */
   3263         if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && no_loop_filter_enabled_chroma)
   3264         {
   3265             UWORD32 u4_no_loop_filter_flag;
   3266             WORD32 loop_filter_bit_pos;
   3267             WORD32 log2_min_cu = 3;
   3268             WORD32 min_cu = (1 << log2_min_cu);
   3269             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
   3270             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
   3271             WORD32 sao_blk_wd = ctb_size;
   3272             WORD32 remaining_rows;
   3273             WORD32 remaining_cols;
   3274 
   3275             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
   3276             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
   3277             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
   3278                 sao_blk_ht += remaining_rows;
   3279             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
   3280                 sao_blk_wd += remaining_cols;
   3281 
   3282             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
   3283             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
   3284 
   3285             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
   3286 
   3287             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
   3288                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
   3289             if(ps_sao_ctxt->i4_ctb_x > 0)
   3290                 loop_filter_bit_pos -= 2;
   3291 
   3292             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
   3293                             (loop_filter_bit_pos >> 3);
   3294 
   3295             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
   3296                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
   3297             {
   3298                 WORD32 tmp_wd = sao_blk_wd;
   3299 
   3300                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
   3301                                 (loop_filter_bit_pos & 7);
   3302                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
   3303 
   3304                 if(u4_no_loop_filter_flag)
   3305                 {
   3306                     while(tmp_wd > 0)
   3307                     {
   3308                         if(CTZ(u4_no_loop_filter_flag))
   3309                         {
   3310                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3311                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3312                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
   3313                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
   3314                         }
   3315                         else
   3316                         {
   3317                             for(row = 0; row < min_cu / 2; row++)
   3318                             {
   3319                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
   3320                                 {
   3321                                     pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
   3322                                 }
   3323                             }
   3324 
   3325                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3326                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3327                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
   3328                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
   3329                         }
   3330                     }
   3331 
   3332                     pu1_src_tmp_chroma -= sao_blk_wd;
   3333                     pu1_src_backup_chroma -= sao_blk_wd;
   3334                 }
   3335 
   3336                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
   3337                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
   3338             }
   3339         }
   3340     }
   3341 
   3342 }
   3343 
   3344