Home | History | Annotate | Download | only in decoder
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19  *******************************************************************************
     20  * @file
     21  *  ihevc_sao.c
     22  *
     23  * @brief
     24  *  Contains function definitions for sample adaptive offset process
     25  *
     26  * @author
     27  *  Srinivas T
     28  *
     29  * @par List of Functions:
     30  *
     31  * @remarks
     32  *  None
     33  *
     34  *******************************************************************************
     35  */
     36 
     37 #include <stdio.h>
     38 #include <stddef.h>
     39 #include <stdlib.h>
     40 #include <string.h>
     41 #include <assert.h>
     42 
     43 #include "ihevc_typedefs.h"
     44 #include "iv.h"
     45 #include "ivd.h"
     46 #include "ihevcd_cxa.h"
     47 #include "ithread.h"
     48 
     49 #include "ihevc_defs.h"
     50 #include "ihevc_debug.h"
     51 #include "ihevc_defs.h"
     52 #include "ihevc_structs.h"
     53 #include "ihevc_macros.h"
     54 #include "ihevc_platform_macros.h"
     55 #include "ihevc_cabac_tables.h"
     56 #include "ihevc_sao.h"
     57 #include "ihevc_mem_fns.h"
     58 
     59 #include "ihevc_error.h"
     60 #include "ihevc_common_tables.h"
     61 
     62 #include "ihevcd_trace.h"
     63 #include "ihevcd_defs.h"
     64 #include "ihevcd_function_selector.h"
     65 #include "ihevcd_structs.h"
     66 #include "ihevcd_error.h"
     67 #include "ihevcd_nal.h"
     68 #include "ihevcd_bitstream.h"
     69 #include "ihevcd_job_queue.h"
     70 #include "ihevcd_utils.h"
     71 
     72 #include "ihevc_deblk.h"
     73 #include "ihevc_deblk_tables.h"
     74 #include "ihevcd_profile.h"
     75 #include "ihevcd_sao.h"
     76 #include "ihevcd_debug.h"
     77 
     78 #define SAO_SHIFT_CTB    8
     79 
     80 /**
     81  * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
     82  */
     83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
     84 {
     85     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
     86     UWORD8 *pu1_src_luma;
     87     UWORD8 *pu1_src_chroma;
     88     WORD32 src_strd;
     89     WORD32 ctb_size;
     90     WORD32 log2_ctb_size;
     91     sps_t *ps_sps;
     92     sao_t *ps_sao;
     93     WORD32 row, col;
     94     UWORD8 au1_avail_luma[8];
     95     UWORD8 au1_avail_chroma[8];
     96     WORD32 i;
     97     UWORD8 *pu1_src_top_luma;
     98     UWORD8 *pu1_src_top_chroma;
     99     UWORD8 *pu1_src_left_luma;
    100     UWORD8 *pu1_src_left_chroma;
    101     UWORD8 au1_src_top_right[2];
    102     UWORD8 au1_src_bot_left[2];
    103     UWORD8 *pu1_no_loop_filter_flag;
    104     WORD32 loop_filter_strd;
    105 
    106     WORD8 ai1_offset_y[5];
    107     WORD8 ai1_offset_cb[5];
    108     WORD8 ai1_offset_cr[5];
    109 
    110     PROFILE_DISABLE_SAO();
    111 
    112     ai1_offset_y[0] = 0;
    113     ai1_offset_cb[0] = 0;
    114     ai1_offset_cr[0] = 0;
    115 
    116     ps_sps = ps_sao_ctxt->ps_sps;
    117     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    118     ctb_size = (1 << log2_ctb_size);
    119     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
    120     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
    121     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
    122 
    123     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    124     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
    125 
    126     /* Current CTB */
    127     {
    128         WORD32 sao_wd_luma;
    129         WORD32 sao_wd_chroma;
    130         WORD32 sao_ht_luma;
    131         WORD32 sao_ht_chroma;
    132 
    133         WORD32 remaining_rows;
    134         WORD32 remaining_cols;
    135 
    136         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
    137         sao_wd_luma = MIN(ctb_size, remaining_cols);
    138         sao_wd_chroma = MIN(ctb_size, remaining_cols);
    139 
    140         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
    141         sao_ht_luma = MIN(ctb_size, remaining_rows);
    142         sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
    143 
    144         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
    145         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
    146         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
    147         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
    148 
    149         pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
    150                         ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
    151                         ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
    152 
    153         ai1_offset_y[1] = ps_sao->b4_y_offset_1;
    154         ai1_offset_y[2] = ps_sao->b4_y_offset_2;
    155         ai1_offset_y[3] = ps_sao->b4_y_offset_3;
    156         ai1_offset_y[4] = ps_sao->b4_y_offset_4;
    157 
    158         ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
    159         ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
    160         ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
    161         ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
    162 
    163         ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
    164         ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
    165         ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
    166         ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
    167 
    168         for(i = 0; i < 8; i++)
    169         {
    170             au1_avail_luma[i] = 255;
    171             au1_avail_chroma[i] = 255;
    172         }
    173 
    174 
    175         if(0 == ps_sao_ctxt->i4_ctb_x)
    176         {
    177             au1_avail_luma[0] = 0;
    178             au1_avail_luma[4] = 0;
    179             au1_avail_luma[6] = 0;
    180 
    181             au1_avail_chroma[0] = 0;
    182             au1_avail_chroma[4] = 0;
    183             au1_avail_chroma[6] = 0;
    184         }
    185 
    186         if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
    187         {
    188             au1_avail_luma[1] = 0;
    189             au1_avail_luma[5] = 0;
    190             au1_avail_luma[7] = 0;
    191 
    192             au1_avail_chroma[1] = 0;
    193             au1_avail_chroma[5] = 0;
    194             au1_avail_chroma[7] = 0;
    195         }
    196 
    197         if(0 == ps_sao_ctxt->i4_ctb_y)
    198         {
    199             au1_avail_luma[2] = 0;
    200             au1_avail_luma[4] = 0;
    201             au1_avail_luma[5] = 0;
    202 
    203             au1_avail_chroma[2] = 0;
    204             au1_avail_chroma[4] = 0;
    205             au1_avail_chroma[5] = 0;
    206         }
    207 
    208         if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
    209         {
    210             au1_avail_luma[3] = 0;
    211             au1_avail_luma[6] = 0;
    212             au1_avail_luma[7] = 0;
    213 
    214             au1_avail_chroma[3] = 0;
    215             au1_avail_chroma[6] = 0;
    216             au1_avail_chroma[7] = 0;
    217         }
    218 
    219 
    220         if(0 == ps_sao->b3_y_type_idx)
    221         {
    222             /* Update left, top and top-left */
    223             for(row = 0; row < sao_ht_luma; row++)
    224             {
    225                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
    226             }
    227             ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
    228 
    229             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
    230 
    231         }
    232         else
    233         {
    234             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
    235             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
    236             WORD32 tmp_strd = MAX_CTB_SIZE + 2;
    237             WORD32 no_loop_filter_enabled = 0;
    238 
    239             /* Check the loop filter flags and copy the original values for back up */
    240             {
    241                 UWORD32 u4_no_loop_filter_flag;
    242                 WORD32 min_cu = 8;
    243                 UWORD8 *pu1_src_tmp = pu1_src_luma;
    244 
    245                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
    246                 {
    247                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
    248                                     ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
    249                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
    250 
    251                     if(u4_no_loop_filter_flag)
    252                     {
    253                         WORD32 tmp_wd = sao_wd_luma;
    254                         no_loop_filter_enabled = 1;
    255                         while(tmp_wd > 0)
    256                         {
    257                             if(CTZ(u4_no_loop_filter_flag))
    258                             {
    259                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    260                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    261                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    262                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    263                             }
    264                             else
    265                             {
    266                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
    267                                 {
    268                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    269                                     {
    270                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
    271                                     }
    272                                 }
    273 
    274                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    275                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    276                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    277                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    278                             }
    279                         }
    280 
    281                         pu1_src_tmp -= sao_wd_luma;
    282                     }
    283 
    284                     pu1_src_tmp += min_cu * src_strd;
    285                     pu1_src_copy += min_cu * tmp_strd;
    286                 }
    287             }
    288 
    289             if(1 == ps_sao->b3_y_type_idx)
    290             {
    291                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
    292                                                                           src_strd,
    293                                                                           pu1_src_left_luma,
    294                                                                           pu1_src_top_luma,
    295                                                                           ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
    296                                                                           ps_sao->b5_y_band_pos,
    297                                                                           ai1_offset_y,
    298                                                                           sao_wd_luma,
    299                                                                           sao_ht_luma);
    300             }
    301             else // if(2 <= ps_sao->b3_y_type_idx)
    302             {
    303                 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
    304                 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
    305                 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
    306                                                                   src_strd,
    307                                                                   pu1_src_left_luma,
    308                                                                   pu1_src_top_luma,
    309                                                                   ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
    310                                                                   au1_src_top_right,
    311                                                                   au1_src_bot_left,
    312                                                                   au1_avail_luma,
    313                                                                   ai1_offset_y,
    314                                                                   sao_wd_luma,
    315                                                                   sao_ht_luma);
    316             }
    317 
    318             /* Check the loop filter flags and copy the original values back if they are set */
    319             if(no_loop_filter_enabled)
    320             {
    321                 UWORD32 u4_no_loop_filter_flag;
    322                 WORD32 min_cu = 8;
    323                 UWORD8 *pu1_src_tmp = pu1_src_luma;
    324 
    325                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
    326                 {
    327                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
    328                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
    329 
    330                     if(u4_no_loop_filter_flag)
    331                     {
    332                         WORD32 tmp_wd = sao_wd_luma;
    333                         while(tmp_wd > 0)
    334                         {
    335                             if(CTZ(u4_no_loop_filter_flag))
    336                             {
    337                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    338                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    339                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    340                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    341                             }
    342                             else
    343                             {
    344                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
    345                                 {
    346                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    347                                     {
    348                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
    349                                     }
    350                                 }
    351 
    352                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    353                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    354                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    355                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    356                             }
    357                         }
    358 
    359                         pu1_src_tmp -= sao_wd_luma;
    360                     }
    361 
    362                     pu1_src_tmp += min_cu * src_strd;
    363                     pu1_src_copy += min_cu * tmp_strd;
    364                 }
    365             }
    366 
    367         }
    368 
    369         if(0 == ps_sao->b3_cb_type_idx)
    370         {
    371             for(row = 0; row < sao_ht_chroma; row++)
    372             {
    373                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
    374                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
    375             }
    376             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
    377             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
    378 
    379             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
    380         }
    381         else
    382         {
    383             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
    384             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
    385             WORD32 tmp_strd = MAX_CTB_SIZE + 4;
    386             WORD32 no_loop_filter_enabled = 0;
    387 
    388             /* Check the loop filter flags and copy the original values for back up */
    389             {
    390                 UWORD32 u4_no_loop_filter_flag;
    391                 WORD32 min_cu = 4;
    392                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
    393 
    394                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
    395                 {
    396                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
    397                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
    398 
    399                     if(u4_no_loop_filter_flag)
    400                     {
    401                         WORD32 tmp_wd = sao_wd_chroma;
    402                         no_loop_filter_enabled = 1;
    403                         while(tmp_wd > 0)
    404                         {
    405                             if(CTZ(u4_no_loop_filter_flag))
    406                             {
    407                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    408                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    409                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    410                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    411                             }
    412                             else
    413                             {
    414                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
    415                                 {
    416                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    417                                     {
    418                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
    419                                     }
    420                                 }
    421 
    422                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    423                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    424                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    425                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    426                             }
    427                         }
    428 
    429                         pu1_src_tmp -= sao_wd_chroma;
    430                     }
    431 
    432                     pu1_src_tmp += min_cu * src_strd;
    433                     pu1_src_copy += min_cu * tmp_strd;
    434                 }
    435             }
    436 
    437             if(1 == ps_sao->b3_cb_type_idx)
    438             {
    439                 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
    440                                                                             src_strd,
    441                                                                             pu1_src_left_chroma,
    442                                                                             pu1_src_top_chroma,
    443                                                                             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
    444                                                                             ps_sao->b5_cb_band_pos,
    445                                                                             ps_sao->b5_cr_band_pos,
    446                                                                             ai1_offset_cb,
    447                                                                             ai1_offset_cr,
    448                                                                             sao_wd_chroma,
    449                                                                             sao_ht_chroma
    450                                                                            );
    451             }
    452             else // if(2 <= ps_sao->b3_cb_type_idx)
    453             {
    454                 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
    455                 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
    456                 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
    457                 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
    458                 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
    459                                                                      src_strd,
    460                                                                      pu1_src_left_chroma,
    461                                                                      pu1_src_top_chroma,
    462                                                                      ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
    463                                                                      au1_src_top_right,
    464                                                                      au1_src_bot_left,
    465                                                                      au1_avail_chroma,
    466                                                                      ai1_offset_cb,
    467                                                                      ai1_offset_cr,
    468                                                                      sao_wd_chroma,
    469                                                                      sao_ht_chroma);
    470             }
    471 
    472             /* Check the loop filter flags and copy the original values back if they are set */
    473             if(no_loop_filter_enabled)
    474             {
    475                 UWORD32 u4_no_loop_filter_flag;
    476                 WORD32 min_cu = 4;
    477                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
    478 
    479                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
    480                 {
    481                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
    482                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
    483 
    484                     if(u4_no_loop_filter_flag)
    485                     {
    486                         WORD32 tmp_wd = sao_wd_chroma;
    487                         while(tmp_wd > 0)
    488                         {
    489                             if(CTZ(u4_no_loop_filter_flag))
    490                             {
    491                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    492                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    493                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    494                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    495                             }
    496                             else
    497                             {
    498                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
    499                                 {
    500                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    501                                     {
    502                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
    503                                     }
    504                                 }
    505 
    506                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    507                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    508                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    509                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    510                             }
    511                         }
    512 
    513                         pu1_src_tmp -= sao_wd_chroma;
    514                     }
    515 
    516                     pu1_src_tmp += min_cu * src_strd;
    517                     pu1_src_copy += min_cu * tmp_strd;
    518                 }
    519             }
    520 
    521         }
    522 
    523     }
    524 }
    525 
    526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
    527 {
    528     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
    529     UWORD8 *pu1_src_luma;
    530     UWORD8 *pu1_src_chroma;
    531     WORD32 src_strd;
    532     WORD32 ctb_size;
    533     WORD32 log2_ctb_size;
    534     sps_t *ps_sps;
    535     sao_t *ps_sao;
    536     pps_t *ps_pps;
    537     slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
    538     tile_t *ps_tile;
    539     UWORD16 *pu1_slice_idx;
    540     UWORD16 *pu1_tile_idx;
    541     WORD32 row, col;
    542     UWORD8 au1_avail_luma[8];
    543     UWORD8 au1_avail_chroma[8];
    544     UWORD8 au1_tile_slice_boundary[8];
    545     UWORD8 au4_ilf_across_tile_slice_enable[8];
    546     WORD32 i;
    547     UWORD8 *pu1_src_top_luma;
    548     UWORD8 *pu1_src_top_chroma;
    549     UWORD8 *pu1_src_left_luma;
    550     UWORD8 *pu1_src_left_chroma;
    551     UWORD8 au1_src_top_right[2];
    552     UWORD8 au1_src_bot_left[2];
    553     UWORD8 *pu1_no_loop_filter_flag;
    554     UWORD8 *pu1_src_backup_luma;
    555     UWORD8 *pu1_src_backup_chroma;
    556     WORD32 backup_strd;
    557     WORD32 loop_filter_strd;
    558 
    559     WORD32 no_loop_filter_enabled_luma = 0;
    560     WORD32 no_loop_filter_enabled_chroma = 0;
    561     UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
    562     UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
    563     UWORD8 *pu1_sao_src_luma_top_left_ctb;
    564     UWORD8 *pu1_sao_src_chroma_top_left_ctb;
    565     UWORD8 *pu1_sao_src_top_left_luma_top_right;
    566     UWORD8 *pu1_sao_src_top_left_chroma_top_right;
    567     UWORD8  u1_sao_src_top_left_luma_bot_left;
    568     UWORD8  *pu1_sao_src_top_left_luma_bot_left;
    569     UWORD8 *au1_sao_src_top_left_chroma_bot_left;
    570     UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
    571 
    572     WORD8 ai1_offset_y[5];
    573     WORD8 ai1_offset_cb[5];
    574     WORD8 ai1_offset_cr[5];
    575     WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
    576 
    577     PROFILE_DISABLE_SAO();
    578 
    579     ai1_offset_y[0] = 0;
    580     ai1_offset_cb[0] = 0;
    581     ai1_offset_cr[0] = 0;
    582 
    583     ps_sps = ps_sao_ctxt->ps_sps;
    584     ps_pps = ps_sao_ctxt->ps_pps;
    585     ps_tile = ps_sao_ctxt->ps_tile;
    586 
    587     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    588     ctb_size = (1 << log2_ctb_size);
    589     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
    590     ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
    591     ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
    592 
    593     pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
    594     pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
    595     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
    596     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
    597 
    598     /*Stores the left value for each row ctbs- Needed for column tiles*/
    599     pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
    600     pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
    601     pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
    602     pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
    603     u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
    604     pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
    605     au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
    606     pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
    607     pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
    608     pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
    609 
    610     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    611     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
    612     backup_strd = 2 * MAX_CTB_SIZE;
    613 
    614     DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
    615 
    616     {
    617         /* Check the loop filter flags and copy the original values for back up */
    618         /* Luma */
    619 
    620         /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
    621          * can belong to different slice with their own sao_enable flag */
    622         {
    623             UWORD32 u4_no_loop_filter_flag;
    624             WORD32 loop_filter_bit_pos;
    625             WORD32 log2_min_cu = 3;
    626             WORD32 min_cu = (1 << log2_min_cu);
    627             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
    628             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
    629             WORD32 sao_blk_wd = ctb_size;
    630             WORD32 remaining_rows;
    631             WORD32 remaining_cols;
    632 
    633             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
    634             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
    635             if(remaining_rows <= SAO_SHIFT_CTB)
    636                 sao_blk_ht += remaining_rows;
    637             if(remaining_cols <= SAO_SHIFT_CTB)
    638                 sao_blk_wd += remaining_cols;
    639 
    640             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
    641             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
    642 
    643             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
    644 
    645             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
    646                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
    647             if(ps_sao_ctxt->i4_ctb_x > 0)
    648                 loop_filter_bit_pos -= 1;
    649 
    650             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
    651                             (loop_filter_bit_pos >> 3);
    652 
    653             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
    654                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
    655             {
    656                 WORD32 tmp_wd = sao_blk_wd;
    657 
    658                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
    659                                 (loop_filter_bit_pos & 7);
    660                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
    661 
    662                 if(u4_no_loop_filter_flag)
    663                 {
    664                     no_loop_filter_enabled_luma = 1;
    665                     while(tmp_wd > 0)
    666                     {
    667                         if(CTZ(u4_no_loop_filter_flag))
    668                         {
    669                             pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    670                             pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    671                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
    672                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    673                         }
    674                         else
    675                         {
    676                             for(row = 0; row < min_cu; row++)
    677                             {
    678                                 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
    679                                 {
    680                                     pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
    681                                 }
    682                             }
    683                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    684                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    685                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
    686                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    687                         }
    688                     }
    689 
    690                     pu1_src_tmp_luma -= sao_blk_wd;
    691                     pu1_src_backup_luma -= sao_blk_wd;
    692                 }
    693 
    694                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
    695                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
    696             }
    697         }
    698 
    699         /* Chroma */
    700 
    701         {
    702             UWORD32 u4_no_loop_filter_flag;
    703             WORD32 loop_filter_bit_pos;
    704             WORD32 log2_min_cu = 3;
    705             WORD32 min_cu = (1 << log2_min_cu);
    706             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
    707             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
    708             WORD32 sao_blk_wd = ctb_size;
    709             WORD32 remaining_rows;
    710             WORD32 remaining_cols;
    711 
    712             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
    713             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
    714             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
    715                 sao_blk_ht += remaining_rows;
    716             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
    717                 sao_blk_wd += remaining_cols;
    718 
    719             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
    720             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
    721 
    722             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
    723 
    724             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
    725                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
    726             if(ps_sao_ctxt->i4_ctb_x > 0)
    727                 loop_filter_bit_pos -= 2;
    728 
    729             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
    730                             (loop_filter_bit_pos >> 3);
    731 
    732             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
    733                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
    734             {
    735                 WORD32 tmp_wd = sao_blk_wd;
    736 
    737                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
    738                                 (loop_filter_bit_pos & 7);
    739                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
    740 
    741                 if(u4_no_loop_filter_flag)
    742                 {
    743                     no_loop_filter_enabled_chroma = 1;
    744                     while(tmp_wd > 0)
    745                     {
    746                         if(CTZ(u4_no_loop_filter_flag))
    747                         {
    748                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    749                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    750                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
    751                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    752                         }
    753                         else
    754                         {
    755                             for(row = 0; row < min_cu / 2; row++)
    756                             {
    757                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
    758                                 {
    759                                     pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
    760                                 }
    761                             }
    762 
    763                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    764                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    765                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
    766                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    767                         }
    768                     }
    769 
    770                     pu1_src_tmp_chroma -= sao_blk_wd;
    771                     pu1_src_backup_chroma -= sao_blk_wd;
    772                 }
    773 
    774                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
    775                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
    776             }
    777         }
    778     }
    779 
    780     DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
    781 
    782     /* Top-left CTB */
    783     if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
    784     {
    785         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
    786         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
    787         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
    788         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
    789 
    790         WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
    791         WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
    792         WORD32 au4_idx_tl[8], idx_tl;
    793 
    794         slice_header_t *ps_slice_hdr_top_left;
    795         {
    796             WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
    797                                         (ps_sao_ctxt->i4_ctb_x - 1);
    798             ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
    799         }
    800 
    801 
    802         pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
    803         pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
    804         ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
    805         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
    806         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
    807         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
    808         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
    809 
    810         if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
    811         {
    812             if(0 == ps_sao->b3_y_type_idx)
    813             {
    814                 /* Update left, top and top-left */
    815                 for(row = 0; row < sao_ht_luma; row++)
    816                 {
    817                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
    818                 }
    819                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
    820 
    821                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
    822 
    823 
    824             }
    825 
    826             else if(1 == ps_sao->b3_y_type_idx)
    827             {
    828                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
    829                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
    830                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
    831                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
    832 
    833                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
    834                                                                           src_strd,
    835                                                                           pu1_src_left_luma,
    836                                                                           pu1_src_top_luma,
    837                                                                           pu1_sao_src_luma_top_left_ctb,
    838                                                                           ps_sao->b5_y_band_pos,
    839                                                                           ai1_offset_y,
    840                                                                           sao_wd_luma,
    841                                                                           sao_ht_luma
    842                                                                          );
    843             }
    844 
    845             else // if(2 <= ps_sao->b3_y_type_idx)
    846             {
    847                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
    848                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
    849                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
    850                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
    851 
    852                 for(i = 0; i < 8; i++)
    853                 {
    854                     au1_avail_luma[i] = 255;
    855                     au1_tile_slice_boundary[i] = 0;
    856                     au4_idx_tl[i] = 0;
    857                     au4_ilf_across_tile_slice_enable[i] = 1;
    858                 }
    859 
    860                 /******************************************************************
    861                  * Derive the  Top-left CTB's neighbor pixel's slice indices.
    862                  *
    863                  *          TL_T
    864                  *       4  _2__5________
    865                  *     0   |    |       |
    866                  *    TL_L | TL | 1 TL_R|
    867                  *         |____|_______|____
    868                  *        6|TL_D|7      |    |
    869                  *         | 3  |       |    |
    870                  *         |____|_______|    |
    871                  *              |            |
    872                  *              |            |
    873                  *              |____________|
    874                  *
    875                  *****************************************************************/
    876 
    877                 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
    878                 {
    879                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
    880                     {
    881                         {
    882                             /*Assuming that sao shift is uniform along x and y directions*/
    883                             if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
    884                             {
    885                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
    886                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
    887                             }
    888                             else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
    889                             {
    890                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
    891                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
    892                             }
    893                             ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
    894                             ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
    895 
    896                             ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
    897                             ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
    898 
    899                             ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
    900                             ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
    901 
    902                             ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
    903                             ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
    904                         }
    905 
    906                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
    907                         {
    908                             /*Calculate slice indices for neighbor pixels*/
    909                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
    910                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
    911                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
    912                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
    913                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
    914                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
    915 
    916                             if((0 == (1 << log2_ctb_size) - sao_wd_luma))
    917                             {
    918                                 if(ps_sao_ctxt->i4_ctb_x == 1)
    919                                 {
    920                                     au4_idx_tl[6] = -1;
    921                                     au4_idx_tl[4] = -1;
    922                                 }
    923                                 else
    924                                 {
    925                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
    926                                 }
    927                                 if(ps_sao_ctxt->i4_ctb_y == 1)
    928                                 {
    929                                     au4_idx_tl[5] = -1;
    930                                     au4_idx_tl[4] = -1;
    931                                 }
    932                                 else
    933                                 {
    934                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
    935                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
    936                                 }
    937                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
    938                             }
    939 
    940                             /* Verify that the neighbor ctbs dont cross pic boundary.
    941                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
    942                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
    943                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
    944                              * the respective pixel's flags are checked
    945                              */
    946 
    947                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
    948                             {
    949                                 au4_ilf_across_tile_slice_enable[4] = 0;
    950                                 au4_ilf_across_tile_slice_enable[6] = 0;
    951                             }
    952                             else
    953                             {
    954                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
    955                             }
    956                             if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
    957                             {
    958                                 au4_ilf_across_tile_slice_enable[5] = 0;
    959                                 au4_ilf_across_tile_slice_enable[4] = 0;
    960                             }
    961                             else
    962                             {
    963                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    964                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    965                             }
    966                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    967                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    968                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
    969                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
    970                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
    971 
    972                             if(au4_idx_tl[5] > idx_tl)
    973                             {
    974                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
    975                             }
    976 
    977                             /*
    978                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
    979                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
    980                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
    981                              * the respective pixel's flags are checked
    982                              */
    983                             for(i = 0; i < 8; i++)
    984                             {
    985                                 /*Sets the edges that lie on the slice/tile boundary*/
    986                                 if(au4_idx_tl[i] != idx_tl)
    987                                 {
    988                                     au1_tile_slice_boundary[i] = 1;
    989                                 }
    990                                 else
    991                                 {
    992                                     au4_ilf_across_tile_slice_enable[i] = 1;
    993                                 }
    994                             }
    995 
    996                             ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
    997                         }
    998 
    999                         if(ps_pps->i1_tiles_enabled_flag)
   1000                         {
   1001                             /* Calculate availability flags at slice boundary */
   1002                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1003                             {
   1004                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1005                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1006                                 {
   1007                                     /*Set the boundary arrays*/
   1008                                     /*Calculate tile indices for neighbor pixels*/
   1009                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
   1010                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
   1011                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1012                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1013                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1014                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1015 
   1016                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
   1017                                     {
   1018                                         if(ps_sao_ctxt->i4_ctb_x == 1)
   1019                                         {
   1020                                             au4_idx_tl[6] = -1;
   1021                                             au4_idx_tl[4] = -1;
   1022                                         }
   1023                                         else
   1024                                         {
   1025                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1026                                         }
   1027                                         if(ps_sao_ctxt->i4_ctb_y == 1)
   1028                                         {
   1029                                             au4_idx_tl[5] = -1;
   1030                                             au4_idx_tl[4] = -1;
   1031                                         }
   1032                                         else
   1033                                         {
   1034                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1035                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
   1036                                         }
   1037                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1038                                     }
   1039                                     for(i = 0; i < 8; i++)
   1040                                     {
   1041                                         /*Sets the edges that lie on the tile boundary*/
   1042                                         if(au4_idx_tl[i] != idx_tl)
   1043                                         {
   1044                                             au1_tile_slice_boundary[i] |= 1;
   1045                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   1046                                         }
   1047                                     }
   1048                                 }
   1049                             }
   1050                         }
   1051 
   1052 
   1053                         /*Set availability flags based on tile and slice boundaries*/
   1054                         for(i = 0; i < 8; i++)
   1055                         {
   1056                             /*Sets the edges that lie on the slice/tile boundary*/
   1057                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1058                             {
   1059                                 au1_avail_luma[i] = 0;
   1060                             }
   1061                         }
   1062                     }
   1063                 }
   1064 
   1065                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
   1066                 {
   1067                     au1_avail_luma[0] = 0;
   1068                     au1_avail_luma[4] = 0;
   1069                     au1_avail_luma[6] = 0;
   1070                 }
   1071 
   1072                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   1073                 {
   1074                     au1_avail_luma[1] = 0;
   1075                     au1_avail_luma[5] = 0;
   1076                     au1_avail_luma[7] = 0;
   1077                 }
   1078                 //y==1 case
   1079                 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
   1080                 {
   1081                     au1_avail_luma[2] = 0;
   1082                     au1_avail_luma[4] = 0;
   1083                     au1_avail_luma[5] = 0;
   1084                 }
   1085                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1086                 {
   1087                     au1_avail_luma[3] = 0;
   1088                     au1_avail_luma[6] = 0;
   1089                     au1_avail_luma[7] = 0;
   1090                 }
   1091 
   1092                 {
   1093                     au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
   1094                     u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
   1095                     ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   1096                                                                       src_strd,
   1097                                                                       pu1_src_left_luma,
   1098                                                                       pu1_src_top_luma,
   1099                                                                       pu1_sao_src_luma_top_left_ctb,
   1100                                                                       au1_src_top_right,
   1101                                                                       &u1_sao_src_top_left_luma_bot_left,
   1102                                                                       au1_avail_luma,
   1103                                                                       ai1_offset_y,
   1104                                                                       sao_wd_luma,
   1105                                                                       sao_ht_luma);
   1106                 }
   1107             }
   1108 
   1109         }
   1110         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1111         {
   1112             /* Update left, top and top-left */
   1113             for(row = 0; row < sao_ht_luma; row++)
   1114             {
   1115                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   1116             }
   1117             pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   1118 
   1119             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   1120         }
   1121 
   1122         if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
   1123         {
   1124             if(0 == ps_sao->b3_cb_type_idx)
   1125             {
   1126                 for(row = 0; row < sao_ht_chroma; row++)
   1127                 {
   1128                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   1129                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   1130                 }
   1131                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   1132                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   1133 
   1134                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   1135 
   1136             }
   1137 
   1138             else if(1 == ps_sao->b3_cb_type_idx)
   1139             {
   1140                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1141                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1142                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1143                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1144 
   1145                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1146                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1147                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1148                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1149 
   1150                 if(chroma_yuv420sp_vu)
   1151                 {
   1152                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1153                                                                                 src_strd,
   1154                                                                                 pu1_src_left_chroma,
   1155                                                                                 pu1_src_top_chroma,
   1156                                                                                 pu1_sao_src_chroma_top_left_ctb,
   1157                                                                                 ps_sao->b5_cr_band_pos,
   1158                                                                                 ps_sao->b5_cb_band_pos,
   1159                                                                                 ai1_offset_cr,
   1160                                                                                 ai1_offset_cb,
   1161                                                                                 sao_wd_chroma,
   1162                                                                                 sao_ht_chroma
   1163                                                                                );
   1164                 }
   1165                 else
   1166                 {
   1167                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1168                                                                                 src_strd,
   1169                                                                                 pu1_src_left_chroma,
   1170                                                                                 pu1_src_top_chroma,
   1171                                                                                 pu1_sao_src_chroma_top_left_ctb,
   1172                                                                                 ps_sao->b5_cb_band_pos,
   1173                                                                                 ps_sao->b5_cr_band_pos,
   1174                                                                                 ai1_offset_cb,
   1175                                                                                 ai1_offset_cr,
   1176                                                                                 sao_wd_chroma,
   1177                                                                                 sao_ht_chroma
   1178                                                                                );
   1179                 }
   1180             }
   1181 
   1182             else // if(2 <= ps_sao->b3_cb_type_idx)
   1183             {
   1184                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1185                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1186                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1187                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1188 
   1189                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1190                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1191                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1192                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1193                 for(i = 0; i < 8; i++)
   1194                 {
   1195                     au1_avail_chroma[i] = 255;
   1196                     au1_tile_slice_boundary[i] = 0;
   1197                     au4_idx_tl[i] = 0;
   1198                     au4_ilf_across_tile_slice_enable[i] = 1;
   1199                 }
   1200                 /*In case of slices*/
   1201                 {
   1202                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1203                     {
   1204                         if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
   1205                         {
   1206                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
   1207                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
   1208                         }
   1209                         else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
   1210                         {
   1211                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
   1212                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
   1213                         }
   1214                         ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
   1215                         ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
   1216 
   1217                         ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
   1218                         ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
   1219 
   1220                         ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
   1221                         ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
   1222 
   1223                         ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
   1224                         ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
   1225 
   1226                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   1227                         {
   1228 
   1229                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
   1230                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
   1231                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1232                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1233                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1234                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1235 
   1236                             if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
   1237                             {
   1238                                 if(ps_sao_ctxt->i4_ctb_x == 1)
   1239                                 {
   1240                                     au4_idx_tl[6] = -1;
   1241                                     au4_idx_tl[4] = -1;
   1242                                 }
   1243                                 else
   1244                                 {
   1245                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1246                                 }
   1247                                 if(ps_sao_ctxt->i4_ctb_y == 1)
   1248                                 {
   1249                                     au4_idx_tl[5] = -1;
   1250                                     au4_idx_tl[4] = -1;
   1251                                 }
   1252                                 else
   1253                                 {
   1254                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1255                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
   1256                                 }
   1257                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1258                             }
   1259 
   1260                             /* Verify that the neighbor ctbs don't cross pic boundary
   1261                              * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
   1262                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
   1263                             {
   1264                                 au4_ilf_across_tile_slice_enable[4] = 0;
   1265                                 au4_ilf_across_tile_slice_enable[6] = 0;
   1266                             }
   1267                             else
   1268                             {
   1269                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   1270                             }
   1271                             if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
   1272                             {
   1273                                 au4_ilf_across_tile_slice_enable[5] = 0;
   1274                                 au4_ilf_across_tile_slice_enable[4] = 0;
   1275                             }
   1276                             else
   1277                             {
   1278                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1279                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
   1280                             }
   1281                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1282                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1283                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   1284                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   1285                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   1286                             /*
   1287                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   1288                              * of the pixel having a greater address is checked. Accordingly, set the availability flags
   1289                              */
   1290                             for(i = 0; i < 8; i++)
   1291                             {
   1292                                 /*Sets the edges that lie on the slice/tile boundary*/
   1293                                 if(au4_idx_tl[i] != idx_tl)
   1294                                 {
   1295                                     au1_tile_slice_boundary[i] = 1;
   1296                                 }
   1297                                 else
   1298                                 {
   1299                                     au4_ilf_across_tile_slice_enable[i] = 1;
   1300                                 }
   1301                             }
   1302 
   1303                             /*Reset indices*/
   1304                             for(i = 0; i < 8; i++)
   1305                             {
   1306                                 au4_idx_tl[i] = 0;
   1307                             }
   1308                         }
   1309                         if(ps_pps->i1_tiles_enabled_flag)
   1310                         {
   1311                             /* Calculate availability flags at slice boundary */
   1312                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1313                             {
   1314                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1315                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1316                                 {
   1317                                     /*Set the boundary arrays*/
   1318                                     /*Calculate tile indices for neighbor pixels*/
   1319                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
   1320                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
   1321                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1322                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1323                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1324                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1325 
   1326                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
   1327                                     {
   1328                                         if(ps_sao_ctxt->i4_ctb_x == 1)
   1329                                         {
   1330                                             au4_idx_tl[6] = -1;
   1331                                             au4_idx_tl[4] = -1;
   1332                                         }
   1333                                         else
   1334                                         {
   1335                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1336                                         }
   1337                                         if(ps_sao_ctxt->i4_ctb_y == 1)
   1338                                         {
   1339                                             au4_idx_tl[5] = -1;
   1340                                             au4_idx_tl[4] = -1;
   1341                                         }
   1342                                         else
   1343                                         {
   1344                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1345                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
   1346                                         }
   1347                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1348                                     }
   1349                                     for(i = 0; i < 8; i++)
   1350                                     {
   1351                                         /*Sets the edges that lie on the tile boundary*/
   1352                                         if(au4_idx_tl[i] != idx_tl)
   1353                                         {
   1354                                             au1_tile_slice_boundary[i] |= 1;
   1355                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   1356                                         }
   1357                                     }
   1358                                 }
   1359                             }
   1360                         }
   1361 
   1362                         for(i = 0; i < 8; i++)
   1363                         {
   1364                             /*Sets the edges that lie on the slice/tile boundary*/
   1365                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1366                             {
   1367                                 au1_avail_chroma[i] = 0;
   1368                             }
   1369                         }
   1370                     }
   1371                 }
   1372 
   1373                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
   1374                 {
   1375                     au1_avail_chroma[0] = 0;
   1376                     au1_avail_chroma[4] = 0;
   1377                     au1_avail_chroma[6] = 0;
   1378                 }
   1379                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   1380                 {
   1381                     au1_avail_chroma[1] = 0;
   1382                     au1_avail_chroma[5] = 0;
   1383                     au1_avail_chroma[7] = 0;
   1384                 }
   1385 
   1386                 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
   1387                 {
   1388                     au1_avail_chroma[2] = 0;
   1389                     au1_avail_chroma[4] = 0;
   1390                     au1_avail_chroma[5] = 0;
   1391                 }
   1392                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1393                 {
   1394                     au1_avail_chroma[3] = 0;
   1395                     au1_avail_chroma[6] = 0;
   1396                     au1_avail_chroma[7] = 0;
   1397                 }
   1398 
   1399                 {
   1400                     au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
   1401                     au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
   1402                     au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
   1403                     au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
   1404                     if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
   1405                     {
   1406                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   1407                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   1408                     }
   1409 
   1410                     if(chroma_yuv420sp_vu)
   1411                     {
   1412                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   1413                                                                              src_strd,
   1414                                                                              pu1_src_left_chroma,
   1415                                                                              pu1_src_top_chroma,
   1416                                                                              pu1_sao_src_chroma_top_left_ctb,
   1417                                                                              au1_src_top_right,
   1418                                                                              au1_sao_src_top_left_chroma_bot_left,
   1419                                                                              au1_avail_chroma,
   1420                                                                              ai1_offset_cr,
   1421                                                                              ai1_offset_cb,
   1422                                                                              sao_wd_chroma,
   1423                                                                              sao_ht_chroma);
   1424                     }
   1425                     else
   1426                     {
   1427                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   1428                                                                              src_strd,
   1429                                                                              pu1_src_left_chroma,
   1430                                                                              pu1_src_top_chroma,
   1431                                                                              pu1_sao_src_chroma_top_left_ctb,
   1432                                                                              au1_src_top_right,
   1433                                                                              au1_sao_src_top_left_chroma_bot_left,
   1434                                                                              au1_avail_chroma,
   1435                                                                              ai1_offset_cb,
   1436                                                                              ai1_offset_cr,
   1437                                                                              sao_wd_chroma,
   1438                                                                              sao_ht_chroma);
   1439                     }
   1440                 }
   1441             }
   1442         }
   1443         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1444         {
   1445             for(row = 0; row < sao_ht_chroma; row++)
   1446             {
   1447                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   1448                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   1449             }
   1450             pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   1451             pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   1452 
   1453             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   1454         }
   1455 
   1456         pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
   1457         pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
   1458         ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
   1459     }
   1460 
   1461 
   1462     /* Top CTB */
   1463     if((ps_sao_ctxt->i4_ctb_y > 0))
   1464     {
   1465         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
   1466         WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
   1467         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
   1468         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
   1469 
   1470         WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
   1471         WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
   1472         WORD32 au4_idx_t[8], idx_t;
   1473 
   1474         WORD32 remaining_cols;
   1475 
   1476         slice_header_t *ps_slice_hdr_top;
   1477         {
   1478             WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
   1479                                         (ps_sao_ctxt->i4_ctb_x);
   1480             ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
   1481         }
   1482 
   1483         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
   1484         if(remaining_cols <= SAO_SHIFT_CTB)
   1485         {
   1486             sao_wd_luma += remaining_cols;
   1487         }
   1488         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
   1489         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
   1490         {
   1491             sao_wd_chroma += remaining_cols;
   1492         }
   1493 
   1494         pu1_src_luma -= (sao_ht_luma * src_strd);
   1495         pu1_src_chroma -= (sao_ht_chroma * src_strd);
   1496         ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
   1497         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   1498         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   1499         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
   1500         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
   1501 
   1502         if(0 != sao_wd_luma)
   1503         {
   1504             if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
   1505             {
   1506                 if(0 == ps_sao->b3_y_type_idx)
   1507                 {
   1508                     /* Update left, top and top-left */
   1509                     for(row = 0; row < sao_ht_luma; row++)
   1510                     {
   1511                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   1512                     }
   1513                     pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   1514 
   1515                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   1516 
   1517                 }
   1518 
   1519                 else if(1 == ps_sao->b3_y_type_idx)
   1520                 {
   1521                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   1522                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   1523                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   1524                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   1525 
   1526                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
   1527                                                                               src_strd,
   1528                                                                               pu1_src_left_luma,
   1529                                                                               pu1_src_top_luma,
   1530                                                                               pu1_sao_src_luma_top_left_ctb,
   1531                                                                               ps_sao->b5_y_band_pos,
   1532                                                                               ai1_offset_y,
   1533                                                                               sao_wd_luma,
   1534                                                                               sao_ht_luma
   1535                                                                              );
   1536                 }
   1537 
   1538                 else // if(2 <= ps_sao->b3_y_type_idx)
   1539                 {
   1540                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   1541                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   1542                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   1543                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   1544 
   1545                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
   1546                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
   1547                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
   1548 
   1549                     for(i = 0; i < 8; i++)
   1550                     {
   1551 
   1552                         au4_ilf_across_tile_slice_enable[i] = 1;
   1553                     }
   1554                     /******************************************************************
   1555                      * Derive the  Top-left CTB's neighbor pixel's slice indices.
   1556                      *
   1557                      *               T_T
   1558                      *          ____________
   1559                      *         |    |       |
   1560                      *         | T_L|  T    |T_R
   1561                      *         |    | ______|____
   1562                      *         |    |  T_D  |    |
   1563                      *         |    |       |    |
   1564                      *         |____|_______|    |
   1565                      *              |            |
   1566                      *              |            |
   1567                      *              |____________|
   1568                      *
   1569                      *****************************************************************/
   1570 
   1571                     /*In case of slices*/
   1572                     {
   1573                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1574                         {
   1575 
   1576                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
   1577                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
   1578 
   1579                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
   1580                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
   1581 
   1582                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
   1583                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
   1584 
   1585                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
   1586                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
   1587 
   1588                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
   1589                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
   1590 
   1591                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   1592                             {
   1593                                 /*Calculate neighbor ctb slice indices*/
   1594                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1595                                 {
   1596                                     au4_idx_t[0] = -1;
   1597                                     au4_idx_t[6] = -1;
   1598                                     au4_idx_t[4] = -1;
   1599                                 }
   1600                                 else
   1601                                 {
   1602                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1603                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1604                                 }
   1605                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1606                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1607                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1608                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1609 
   1610                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
   1611                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1612                                 {
   1613                                     au4_ilf_across_tile_slice_enable[4] = 0;
   1614                                     au4_ilf_across_tile_slice_enable[6] = 0;
   1615                                     au4_ilf_across_tile_slice_enable[0] = 0;
   1616                                 }
   1617                                 else
   1618                                 {
   1619                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1620                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   1621                                 }
   1622 
   1623 
   1624 
   1625                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1626                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1627                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   1628                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   1629                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   1630 
   1631                                 if(au4_idx_t[6] < idx_t)
   1632                                 {
   1633                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1634                                 }
   1635 
   1636                                 /*
   1637                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   1638                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   1639                                  */
   1640 
   1641                                 for(i = 0; i < 8; i++)
   1642                                 {
   1643                                     /*Sets the edges that lie on the slice/tile boundary*/
   1644                                     if(au4_idx_t[i] != idx_t)
   1645                                     {
   1646                                         au1_tile_slice_boundary[i] = 1;
   1647                                         /*Check for slice flag at such boundaries*/
   1648                                     }
   1649                                     else
   1650                                     {
   1651                                         au4_ilf_across_tile_slice_enable[i] = 1;
   1652                                     }
   1653                                 }
   1654                                 /*Reset indices*/
   1655                                 for(i = 0; i < 8; i++)
   1656                                 {
   1657                                     au4_idx_t[i] = 0;
   1658                                 }
   1659                             }
   1660 
   1661                             if(ps_pps->i1_tiles_enabled_flag)
   1662                             {
   1663                                 /* Calculate availability flags at slice boundary */
   1664                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1665                                 {
   1666                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1667                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1668                                     {
   1669                                         /*Calculate neighbor ctb slice indices*/
   1670                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   1671                                         {
   1672                                             au4_idx_t[0] = -1;
   1673                                             au4_idx_t[6] = -1;
   1674                                             au4_idx_t[4] = -1;
   1675                                         }
   1676                                         else
   1677                                         {
   1678                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1679                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1680                                         }
   1681                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1682                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1683                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1684                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1685 
   1686                                         for(i = 0; i < 8; i++)
   1687                                         {
   1688                                             /*Sets the edges that lie on the tile boundary*/
   1689                                             if(au4_idx_t[i] != idx_t)
   1690                                             {
   1691                                                 au1_tile_slice_boundary[i] |= 1;
   1692                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
   1693                                             }
   1694                                         }
   1695                                     }
   1696                                 }
   1697                             }
   1698 
   1699                             for(i = 0; i < 8; i++)
   1700                             {
   1701                                 /*Sets the edges that lie on the slice/tile boundary*/
   1702                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1703                                 {
   1704                                     au1_avail_luma[i] = 0;
   1705                                 }
   1706                             }
   1707                         }
   1708                     }
   1709 
   1710 
   1711                     if(0 == ps_sao_ctxt->i4_ctb_x)
   1712                     {
   1713                         au1_avail_luma[0] = 0;
   1714                         au1_avail_luma[4] = 0;
   1715                         au1_avail_luma[6] = 0;
   1716                     }
   1717 
   1718                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
   1719                     {
   1720                         au1_avail_luma[1] = 0;
   1721                         au1_avail_luma[5] = 0;
   1722                         au1_avail_luma[7] = 0;
   1723                     }
   1724 
   1725                     if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
   1726                     {
   1727                         au1_avail_luma[2] = 0;
   1728                         au1_avail_luma[4] = 0;
   1729                         au1_avail_luma[5] = 0;
   1730                     }
   1731 
   1732                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1733                     {
   1734                         au1_avail_luma[3] = 0;
   1735                         au1_avail_luma[6] = 0;
   1736                         au1_avail_luma[7] = 0;
   1737                     }
   1738 
   1739                     {
   1740                         au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
   1741                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
   1742                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   1743                                                                           src_strd,
   1744                                                                           pu1_src_left_luma,
   1745                                                                           pu1_src_top_luma,
   1746                                                                           pu1_sao_src_luma_top_left_ctb,
   1747                                                                           au1_src_top_right,
   1748                                                                           &u1_sao_src_top_left_luma_bot_left,
   1749                                                                           au1_avail_luma,
   1750                                                                           ai1_offset_y,
   1751                                                                           sao_wd_luma,
   1752                                                                           sao_ht_luma);
   1753                     }
   1754                 }
   1755             }
   1756             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1757             {
   1758                 /* Update left, top and top-left */
   1759                 for(row = 0; row < sao_ht_luma; row++)
   1760                 {
   1761                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   1762                 }
   1763                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   1764 
   1765                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   1766             }
   1767         }
   1768 
   1769         if(0 != sao_wd_chroma)
   1770         {
   1771             if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
   1772             {
   1773                 if(0 == ps_sao->b3_cb_type_idx)
   1774                 {
   1775 
   1776                     for(row = 0; row < sao_ht_chroma; row++)
   1777                     {
   1778                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   1779                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   1780                     }
   1781                     pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   1782                     pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   1783 
   1784                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   1785 
   1786                 }
   1787 
   1788                 else if(1 == ps_sao->b3_cb_type_idx)
   1789                 {
   1790                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1791                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1792                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1793                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1794 
   1795                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1796                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1797                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1798                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1799 
   1800                     if(chroma_yuv420sp_vu)
   1801                     {
   1802                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1803                                                                                     src_strd,
   1804                                                                                     pu1_src_left_chroma,
   1805                                                                                     pu1_src_top_chroma,
   1806                                                                                     pu1_sao_src_chroma_top_left_ctb,
   1807                                                                                     ps_sao->b5_cr_band_pos,
   1808                                                                                     ps_sao->b5_cb_band_pos,
   1809                                                                                     ai1_offset_cr,
   1810                                                                                     ai1_offset_cb,
   1811                                                                                     sao_wd_chroma,
   1812                                                                                     sao_ht_chroma
   1813                                                                                    );
   1814                     }
   1815                     else
   1816                     {
   1817                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1818                                                                                     src_strd,
   1819                                                                                     pu1_src_left_chroma,
   1820                                                                                     pu1_src_top_chroma,
   1821                                                                                     pu1_sao_src_chroma_top_left_ctb,
   1822                                                                                     ps_sao->b5_cb_band_pos,
   1823                                                                                     ps_sao->b5_cr_band_pos,
   1824                                                                                     ai1_offset_cb,
   1825                                                                                     ai1_offset_cr,
   1826                                                                                     sao_wd_chroma,
   1827                                                                                     sao_ht_chroma
   1828                                                                                    );
   1829                     }
   1830                 }
   1831                 else // if(2 <= ps_sao->b3_cb_type_idx)
   1832                 {
   1833                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1834                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1835                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1836                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1837 
   1838                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1839                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1840                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1841                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1842 
   1843                     for(i = 0; i < 8; i++)
   1844                     {
   1845                         au1_avail_chroma[i] = 255;
   1846                         au1_tile_slice_boundary[i] = 0;
   1847                         au4_idx_t[i] = 0;
   1848                         au4_ilf_across_tile_slice_enable[i] = 1;
   1849                     }
   1850 
   1851                     {
   1852                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1853                         {
   1854                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
   1855                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
   1856 
   1857                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
   1858                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
   1859 
   1860                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
   1861                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
   1862 
   1863                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
   1864                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
   1865 
   1866                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
   1867                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
   1868 
   1869                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   1870                             {
   1871                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1872                                 {
   1873                                     au4_idx_t[0] = -1;
   1874                                     au4_idx_t[6] = -1;
   1875                                     au4_idx_t[4] = -1;
   1876                                 }
   1877                                 else
   1878                                 {
   1879                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1880                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1881                                 }
   1882                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1883                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1884                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1885                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1886 
   1887                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
   1888 
   1889                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1890                                 {
   1891                                     au4_ilf_across_tile_slice_enable[4] = 0;
   1892                                     au4_ilf_across_tile_slice_enable[6] = 0;
   1893                                     au4_ilf_across_tile_slice_enable[0] = 0;
   1894                                 }
   1895                                 else
   1896                                 {
   1897                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1898                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   1899                                 }
   1900 
   1901                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
   1902                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1903                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   1904                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   1905                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   1906 
   1907                                 if(idx_t > au4_idx_t[6])
   1908                                 {
   1909                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1910                                 }
   1911 
   1912                                 /*
   1913                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   1914                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   1915                                  */
   1916                                 for(i = 0; i < 8; i++)
   1917                                 {
   1918                                     /*Sets the edges that lie on the slice/tile boundary*/
   1919                                     if(au4_idx_t[i] != idx_t)
   1920                                     {
   1921                                         au1_tile_slice_boundary[i] = 1;
   1922                                     }
   1923                                     else
   1924                                     {
   1925                                         /*Indicates that the neighbour belongs to same/dependent slice*/
   1926                                         au4_ilf_across_tile_slice_enable[i] = 1;
   1927                                     }
   1928                                 }
   1929                                 /*Reset indices*/
   1930                                 for(i = 0; i < 8; i++)
   1931                                 {
   1932                                     au4_idx_t[i] = 0;
   1933                                 }
   1934                             }
   1935                             if(ps_pps->i1_tiles_enabled_flag)
   1936                             {
   1937                                 /* Calculate availability flags at slice boundary */
   1938                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1939                                 {
   1940                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1941                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1942                                     {
   1943                                         /*Calculate neighbor ctb slice indices*/
   1944                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   1945                                         {
   1946                                             au4_idx_t[0] = -1;
   1947                                             au4_idx_t[6] = -1;
   1948                                             au4_idx_t[4] = -1;
   1949                                         }
   1950                                         else
   1951                                         {
   1952                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1953                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1954                                         }
   1955                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1956                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1957                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1958                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1959 
   1960                                         for(i = 0; i < 8; i++)
   1961                                         {
   1962                                             /*Sets the edges that lie on the tile boundary*/
   1963                                             if(au4_idx_t[i] != idx_t)
   1964                                             {
   1965                                                 au1_tile_slice_boundary[i] |= 1;
   1966                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
   1967                                             }
   1968                                         }
   1969                                     }
   1970                                 }
   1971                             }
   1972                             for(i = 0; i < 8; i++)
   1973                             {
   1974                                 /*Sets the edges that lie on the slice/tile boundary*/
   1975                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1976                                 {
   1977                                     au1_avail_chroma[i] = 0;
   1978                                 }
   1979                             }
   1980 
   1981                         }
   1982                     }
   1983                     if(0 == ps_sao_ctxt->i4_ctb_x)
   1984                     {
   1985                         au1_avail_chroma[0] = 0;
   1986                         au1_avail_chroma[4] = 0;
   1987                         au1_avail_chroma[6] = 0;
   1988                     }
   1989 
   1990                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
   1991                     {
   1992                         au1_avail_chroma[1] = 0;
   1993                         au1_avail_chroma[5] = 0;
   1994                         au1_avail_chroma[7] = 0;
   1995                     }
   1996 
   1997                     if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
   1998                     {
   1999                         au1_avail_chroma[2] = 0;
   2000                         au1_avail_chroma[4] = 0;
   2001                         au1_avail_chroma[5] = 0;
   2002                     }
   2003 
   2004                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   2005                     {
   2006                         au1_avail_chroma[3] = 0;
   2007                         au1_avail_chroma[6] = 0;
   2008                         au1_avail_chroma[7] = 0;
   2009                     }
   2010 
   2011                     {
   2012                         au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
   2013                         au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
   2014                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   2015                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   2016 
   2017                         if(chroma_yuv420sp_vu)
   2018                         {
   2019                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2020                                                                                  src_strd,
   2021                                                                                  pu1_src_left_chroma,
   2022                                                                                  pu1_src_top_chroma,
   2023                                                                                  pu1_sao_src_chroma_top_left_ctb,
   2024                                                                                  au1_src_top_right,
   2025                                                                                  au1_sao_src_top_left_chroma_bot_left,
   2026                                                                                  au1_avail_chroma,
   2027                                                                                  ai1_offset_cr,
   2028                                                                                  ai1_offset_cb,
   2029                                                                                  sao_wd_chroma,
   2030                                                                                  sao_ht_chroma);
   2031                         }
   2032                         else
   2033                         {
   2034                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2035                                                                                  src_strd,
   2036                                                                                  pu1_src_left_chroma,
   2037                                                                                  pu1_src_top_chroma,
   2038                                                                                  pu1_sao_src_chroma_top_left_ctb,
   2039                                                                                  au1_src_top_right,
   2040                                                                                  au1_sao_src_top_left_chroma_bot_left,
   2041                                                                                  au1_avail_chroma,
   2042                                                                                  ai1_offset_cb,
   2043                                                                                  ai1_offset_cr,
   2044                                                                                  sao_wd_chroma,
   2045                                                                                  sao_ht_chroma);
   2046                         }
   2047                     }
   2048 
   2049                 }
   2050             }
   2051             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2052             {
   2053                 for(row = 0; row < sao_ht_chroma; row++)
   2054                 {
   2055                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   2056                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   2057                 }
   2058                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   2059                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   2060 
   2061                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   2062             }
   2063         }
   2064 
   2065         pu1_src_luma += sao_ht_luma * src_strd;
   2066         pu1_src_chroma += sao_ht_chroma * src_strd;
   2067         ps_sao += (ps_sps->i2_pic_wd_in_ctb);
   2068     }
   2069 
   2070     /* Left CTB */
   2071     if(ps_sao_ctxt->i4_ctb_x > 0)
   2072     {
   2073         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
   2074         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
   2075         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
   2076         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
   2077 
   2078         WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
   2079         WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
   2080         WORD32 au4_idx_l[8], idx_l;
   2081 
   2082         WORD32 remaining_rows;
   2083         slice_header_t *ps_slice_hdr_left;
   2084         {
   2085             WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
   2086                                         (ps_sao_ctxt->i4_ctb_x - 1);
   2087             ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
   2088         }
   2089 
   2090         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
   2091         if(remaining_rows <= SAO_SHIFT_CTB)
   2092         {
   2093             sao_ht_luma += remaining_rows;
   2094         }
   2095         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
   2096         if(remaining_rows <= SAO_SHIFT_CTB)
   2097         {
   2098             sao_ht_chroma += remaining_rows;
   2099         }
   2100 
   2101         pu1_src_luma -= sao_wd_luma;
   2102         pu1_src_chroma -= sao_wd_chroma;
   2103         ps_sao -= 1;
   2104         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
   2105         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
   2106         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2107         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2108 
   2109 
   2110         if(0 != sao_ht_luma)
   2111         {
   2112             if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
   2113             {
   2114                 if(0 == ps_sao->b3_y_type_idx)
   2115                 {
   2116                     /* Update left, top and top-left */
   2117                     for(row = 0; row < sao_ht_luma; row++)
   2118                     {
   2119                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2120                     }
   2121                     /*Update in next location*/
   2122                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2123 
   2124                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2125 
   2126                 }
   2127 
   2128                 else if(1 == ps_sao->b3_y_type_idx)
   2129                 {
   2130                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2131                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2132                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2133                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2134 
   2135                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
   2136                                                                               src_strd,
   2137                                                                               pu1_src_left_luma,
   2138                                                                               pu1_src_top_luma,
   2139                                                                               pu1_sao_src_top_left_luma_curr_ctb,
   2140                                                                               ps_sao->b5_y_band_pos,
   2141                                                                               ai1_offset_y,
   2142                                                                               sao_wd_luma,
   2143                                                                               sao_ht_luma
   2144                                                                              );
   2145                 }
   2146 
   2147                 else // if(2 <= ps_sao->b3_y_type_idx)
   2148                 {
   2149                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2150                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2151                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2152                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2153 
   2154                     for(i = 0; i < 8; i++)
   2155                     {
   2156                         au1_avail_luma[i] = 255;
   2157                         au1_tile_slice_boundary[i] = 0;
   2158                         au4_idx_l[i] = 0;
   2159                         au4_ilf_across_tile_slice_enable[i] = 1;
   2160                     }
   2161                     /******************************************************************
   2162                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
   2163                      *
   2164                      *
   2165                      *          ____________
   2166                      *         |    |       |
   2167                      *         | L_T|       |
   2168                      *         |____|_______|____
   2169                      *         |    |       |    |
   2170                      *     L_L |  L |  L_R  |    |
   2171                      *         |____|_______|    |
   2172                      *              |            |
   2173                      *          L_D |            |
   2174                      *              |____________|
   2175                      *
   2176                      *****************************************************************/
   2177 
   2178                     /*In case of slices or tiles*/
   2179                     {
   2180                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2181                         {
   2182                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
   2183                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
   2184 
   2185                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
   2186                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
   2187 
   2188                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
   2189                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
   2190 
   2191                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
   2192                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
   2193 
   2194                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
   2195                             ctby_l = ps_sao_ctxt->i4_ctb_y;
   2196 
   2197                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2198                             {
   2199                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2200                                 {
   2201                                     au4_idx_l[2] = -1;
   2202                                     au4_idx_l[4] = -1;
   2203                                     au4_idx_l[5] = -1;
   2204                                 }
   2205                                 else
   2206                                 {
   2207                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2208                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2209                                 }
   2210                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2211                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2212                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2213                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2214 
   2215                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
   2216                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2217                                 {
   2218                                     au4_ilf_across_tile_slice_enable[2] = 0;
   2219                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2220                                     au4_ilf_across_tile_slice_enable[5] = 0;
   2221                                 }
   2222                                 else
   2223                                 {
   2224                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
   2225                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   2226 
   2227                                 }
   2228                                 //TODO: ILF flag checks for [0] and [6] is missing.
   2229                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   2230                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   2231                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   2232 
   2233                                 if(idx_l < au4_idx_l[5])
   2234                                 {
   2235                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
   2236                                 }
   2237 
   2238                                 /*
   2239                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   2240                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   2241                                  */
   2242                                 for(i = 0; i < 8; i++)
   2243                                 {
   2244                                     /*Sets the edges that lie on the slice/tile boundary*/
   2245                                     if(au4_idx_l[i] != idx_l)
   2246                                     {
   2247                                         au1_tile_slice_boundary[i] = 1;
   2248                                     }
   2249                                     else
   2250                                     {
   2251                                         au4_ilf_across_tile_slice_enable[i] = 1;
   2252                                     }
   2253                                 }
   2254                                 /*Reset indices*/
   2255                                 for(i = 0; i < 8; i++)
   2256                                 {
   2257                                     au4_idx_l[i] = 0;
   2258                                 }
   2259                             }
   2260 
   2261                             if(ps_pps->i1_tiles_enabled_flag)
   2262                             {
   2263                                 /* Calculate availability flags at slice boundary */
   2264                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   2265                                 {
   2266                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   2267                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   2268                                     {
   2269                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   2270                                         {
   2271                                             au4_idx_l[2] = -1;
   2272                                             au4_idx_l[4] = -1;
   2273                                             au4_idx_l[5] = -1;
   2274                                         }
   2275                                         else
   2276                                         {
   2277                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2278                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2279                                         }
   2280 
   2281                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2282                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2283                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2284                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2285 
   2286                                         for(i = 0; i < 8; i++)
   2287                                         {
   2288                                             /*Sets the edges that lie on the slice/tile boundary*/
   2289                                             if(au4_idx_l[i] != idx_l)
   2290                                             {
   2291                                                 au1_tile_slice_boundary[i] |= 1;
   2292                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
   2293                                             }
   2294                                         }
   2295                                     }
   2296                                 }
   2297                             }
   2298 
   2299                             for(i = 0; i < 8; i++)
   2300                             {
   2301                                 /*Sets the edges that lie on the slice/tile boundary*/
   2302                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   2303                                 {
   2304                                     au1_avail_luma[i] = 0;
   2305                                 }
   2306                             }
   2307                         }
   2308                     }
   2309                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
   2310                     {
   2311                         au1_avail_luma[0] = 0;
   2312                         au1_avail_luma[4] = 0;
   2313                         au1_avail_luma[6] = 0;
   2314                     }
   2315                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   2316                     {
   2317                         au1_avail_luma[1] = 0;
   2318                         au1_avail_luma[5] = 0;
   2319                         au1_avail_luma[7] = 0;
   2320                     }
   2321 
   2322                     if(0 == ps_sao_ctxt->i4_ctb_y)
   2323                     {
   2324                         au1_avail_luma[2] = 0;
   2325                         au1_avail_luma[4] = 0;
   2326                         au1_avail_luma[5] = 0;
   2327                     }
   2328 
   2329                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
   2330                     {
   2331                         au1_avail_luma[3] = 0;
   2332                         au1_avail_luma[6] = 0;
   2333                         au1_avail_luma[7] = 0;
   2334                     }
   2335 
   2336                     {
   2337                         au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
   2338                         u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
   2339                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   2340                                                                           src_strd,
   2341                                                                           pu1_src_left_luma,
   2342                                                                           pu1_src_top_luma,
   2343                                                                           pu1_sao_src_top_left_luma_curr_ctb,
   2344                                                                           au1_src_top_right,
   2345                                                                           &u1_sao_src_top_left_luma_bot_left,
   2346                                                                           au1_avail_luma,
   2347                                                                           ai1_offset_y,
   2348                                                                           sao_wd_luma,
   2349                                                                           sao_ht_luma);
   2350                     }
   2351 
   2352                 }
   2353             }
   2354             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2355             {
   2356                 /* Update left, top and top-left */
   2357                 for(row = 0; row < sao_ht_luma; row++)
   2358                 {
   2359                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2360                 }
   2361                 /*Update in next location*/
   2362                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2363 
   2364                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2365             }
   2366         }
   2367 
   2368         if(0 != sao_ht_chroma)
   2369         {
   2370             if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
   2371             {
   2372                 if(0 == ps_sao->b3_cb_type_idx)
   2373                 {
   2374                     for(row = 0; row < sao_ht_chroma; row++)
   2375                     {
   2376                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   2377                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   2378                     }
   2379                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   2380                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   2381 
   2382                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   2383                 }
   2384 
   2385                 else if(1 == ps_sao->b3_cb_type_idx)
   2386                 {
   2387                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   2388                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   2389                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   2390                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   2391 
   2392                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   2393                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   2394                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   2395                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   2396 
   2397                     if(chroma_yuv420sp_vu)
   2398                     {
   2399                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   2400                                                                                     src_strd,
   2401                                                                                     pu1_src_left_chroma,
   2402                                                                                     pu1_src_top_chroma,
   2403                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   2404                                                                                     ps_sao->b5_cr_band_pos,
   2405                                                                                     ps_sao->b5_cb_band_pos,
   2406                                                                                     ai1_offset_cr,
   2407                                                                                     ai1_offset_cb,
   2408                                                                                     sao_wd_chroma,
   2409                                                                                     sao_ht_chroma
   2410                                                                                    );
   2411                     }
   2412                     else
   2413                     {
   2414                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   2415                                                                                     src_strd,
   2416                                                                                     pu1_src_left_chroma,
   2417                                                                                     pu1_src_top_chroma,
   2418                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   2419                                                                                     ps_sao->b5_cb_band_pos,
   2420                                                                                     ps_sao->b5_cr_band_pos,
   2421                                                                                     ai1_offset_cb,
   2422                                                                                     ai1_offset_cr,
   2423                                                                                     sao_wd_chroma,
   2424                                                                                     sao_ht_chroma
   2425                                                                                    );
   2426                     }
   2427                 }
   2428 
   2429                 else // if(2 <= ps_sao->b3_cb_type_idx)
   2430                 {
   2431                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   2432                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   2433                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   2434                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   2435 
   2436                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   2437                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   2438                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   2439                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   2440 
   2441                     for(i = 0; i < 8; i++)
   2442                     {
   2443                         au1_avail_chroma[i] = 255;
   2444                         au1_tile_slice_boundary[i] = 0;
   2445                         au4_idx_l[i] = 0;
   2446                         au4_ilf_across_tile_slice_enable[i] = 1;
   2447                     }
   2448                     /*In case of slices*/
   2449                     {
   2450                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2451                         {
   2452                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
   2453                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
   2454 
   2455                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
   2456                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
   2457 
   2458                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
   2459                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
   2460 
   2461                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
   2462                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
   2463 
   2464                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
   2465                             ctby_l = ps_sao_ctxt->i4_ctb_y;
   2466 
   2467                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2468                             {
   2469                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2470                                 {
   2471                                     au4_idx_l[2] = -1;
   2472                                     au4_idx_l[4] = -1;
   2473                                     au4_idx_l[5] = -1;
   2474                                 }
   2475                                 else
   2476                                 {
   2477                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2478                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2479                                 }
   2480                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2481                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2482                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2483                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2484 
   2485                                 /*Verify that the neighbour ctbs dont cross pic boundary.*/
   2486                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2487                                 {
   2488                                     au4_ilf_across_tile_slice_enable[2] = 0;
   2489                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2490                                     au4_ilf_across_tile_slice_enable[5] = 0;
   2491                                 }
   2492                                 else
   2493                                 {
   2494                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
   2495                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   2496                                 }
   2497 
   2498                                 if(au4_idx_l[5] > idx_l)
   2499                                 {
   2500                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
   2501                                 }
   2502 
   2503                                 //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
   2504                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   2505                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   2506                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   2507                                 /*
   2508                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   2509                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   2510                                  */
   2511                                 for(i = 0; i < 8; i++)
   2512                                 {
   2513                                     /*Sets the edges that lie on the slice/tile boundary*/
   2514                                     if(au4_idx_l[i] != idx_l)
   2515                                     {
   2516                                         au1_tile_slice_boundary[i] = 1;
   2517                                     }
   2518                                     else
   2519                                     {
   2520                                         au4_ilf_across_tile_slice_enable[i] = 1;
   2521                                     }
   2522                                 }
   2523                                 /*Reset indices*/
   2524                                 for(i = 0; i < 8; i++)
   2525                                 {
   2526                                     au4_idx_l[i] = 0;
   2527                                 }
   2528                             }
   2529                             if(ps_pps->i1_tiles_enabled_flag)
   2530                             {
   2531                                 /* Calculate availability flags at slice boundary */
   2532                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   2533                                 {
   2534                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   2535                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   2536                                     {
   2537                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   2538                                         {
   2539                                             au4_idx_l[2] = -1;
   2540                                             au4_idx_l[4] = -1;
   2541                                             au4_idx_l[5] = -1;
   2542                                         }
   2543                                         else
   2544                                         {
   2545                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2546                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2547                                         }
   2548 
   2549                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2550                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2551                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2552                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2553 
   2554                                         for(i = 0; i < 8; i++)
   2555                                         {
   2556                                             /*Sets the edges that lie on the slice/tile boundary*/
   2557                                             if(au4_idx_l[i] != idx_l)
   2558                                             {
   2559                                                 au1_tile_slice_boundary[i] |= 1;
   2560                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   2561                                             }
   2562                                         }
   2563                                     }
   2564                                 }
   2565                             }
   2566                             for(i = 0; i < 8; i++)
   2567                             {
   2568                                 /*Sets the edges that lie on the slice/tile boundary*/
   2569                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   2570                                 {
   2571                                     au1_avail_chroma[i] = 0;
   2572                                 }
   2573                             }
   2574                         }
   2575                     }
   2576                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
   2577                     {
   2578                         au1_avail_chroma[0] = 0;
   2579                         au1_avail_chroma[4] = 0;
   2580                         au1_avail_chroma[6] = 0;
   2581                     }
   2582 
   2583                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   2584                     {
   2585                         au1_avail_chroma[1] = 0;
   2586                         au1_avail_chroma[5] = 0;
   2587                         au1_avail_chroma[7] = 0;
   2588                     }
   2589 
   2590                     if(0 == ps_sao_ctxt->i4_ctb_y)
   2591                     {
   2592                         au1_avail_chroma[2] = 0;
   2593                         au1_avail_chroma[4] = 0;
   2594                         au1_avail_chroma[5] = 0;
   2595                     }
   2596 
   2597                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
   2598                     {
   2599                         au1_avail_chroma[3] = 0;
   2600                         au1_avail_chroma[6] = 0;
   2601                         au1_avail_chroma[7] = 0;
   2602                     }
   2603 
   2604                     {
   2605                         au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
   2606                         au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
   2607                         au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
   2608                         au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
   2609                         //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   2610                         //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   2611                         if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
   2612                         {
   2613                             au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
   2614                             au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
   2615                         }
   2616 
   2617 
   2618                         if(chroma_yuv420sp_vu)
   2619                         {
   2620                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2621                                                                                  src_strd,
   2622                                                                                  pu1_src_left_chroma,
   2623                                                                                  pu1_src_top_chroma,
   2624                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   2625                                                                                  au1_src_top_right,
   2626                                                                                  au1_src_bot_left,
   2627                                                                                  au1_avail_chroma,
   2628                                                                                  ai1_offset_cr,
   2629                                                                                  ai1_offset_cb,
   2630                                                                                  sao_wd_chroma,
   2631                                                                                  sao_ht_chroma);
   2632                         }
   2633                         else
   2634                         {
   2635                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2636                                                                                  src_strd,
   2637                                                                                  pu1_src_left_chroma,
   2638                                                                                  pu1_src_top_chroma,
   2639                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   2640                                                                                  au1_src_top_right,
   2641                                                                                  au1_src_bot_left,
   2642                                                                                  au1_avail_chroma,
   2643                                                                                  ai1_offset_cb,
   2644                                                                                  ai1_offset_cr,
   2645                                                                                  sao_wd_chroma,
   2646                                                                                  sao_ht_chroma);
   2647                         }
   2648                     }
   2649 
   2650                 }
   2651             }
   2652             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2653             {
   2654                 for(row = 0; row < sao_ht_chroma; row++)
   2655                 {
   2656                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   2657                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   2658                 }
   2659                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   2660                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   2661 
   2662                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   2663             }
   2664 
   2665         }
   2666         pu1_src_luma += sao_wd_luma;
   2667         pu1_src_chroma += sao_wd_chroma;
   2668         ps_sao += 1;
   2669     }
   2670 
   2671 
   2672     /* Current CTB */
   2673     {
   2674         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
   2675         WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
   2676         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
   2677         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
   2678         WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
   2679         WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
   2680         WORD32 au4_idx_c[8], idx_c;
   2681 
   2682         WORD32 remaining_rows;
   2683         WORD32 remaining_cols;
   2684 
   2685         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
   2686         if(remaining_cols <= SAO_SHIFT_CTB)
   2687         {
   2688             sao_wd_luma += remaining_cols;
   2689         }
   2690         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
   2691         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
   2692         {
   2693             sao_wd_chroma += remaining_cols;
   2694         }
   2695 
   2696         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
   2697         if(remaining_rows <= SAO_SHIFT_CTB)
   2698         {
   2699             sao_ht_luma += remaining_rows;
   2700         }
   2701         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
   2702         if(remaining_rows <= SAO_SHIFT_CTB)
   2703         {
   2704             sao_ht_chroma += remaining_rows;
   2705         }
   2706 
   2707         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   2708         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   2709         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2710         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2711 
   2712         if((0 != sao_wd_luma) && (0 != sao_ht_luma))
   2713         {
   2714             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
   2715             {
   2716                 if(0 == ps_sao->b3_y_type_idx)
   2717                 {
   2718                     /* Update left, top and top-left */
   2719                     for(row = 0; row < sao_ht_luma; row++)
   2720                     {
   2721                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2722                     }
   2723                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2724 
   2725                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2726 
   2727                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
   2728 
   2729                 }
   2730 
   2731                 else if(1 == ps_sao->b3_y_type_idx)
   2732                 {
   2733                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2734                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2735                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2736                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2737 
   2738                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
   2739                                                                               src_strd,
   2740                                                                               pu1_src_left_luma,
   2741                                                                               pu1_src_top_luma,
   2742                                                                               pu1_sao_src_top_left_luma_curr_ctb,
   2743                                                                               ps_sao->b5_y_band_pos,
   2744                                                                               ai1_offset_y,
   2745                                                                               sao_wd_luma,
   2746                                                                               sao_ht_luma
   2747                                                                              );
   2748                 }
   2749 
   2750                 else // if(2 <= ps_sao->b3_y_type_idx)
   2751                 {
   2752                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2753                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2754                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2755                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2756 
   2757                     for(i = 0; i < 8; i++)
   2758                     {
   2759                         au1_avail_luma[i] = 255;
   2760                         au1_tile_slice_boundary[i] = 0;
   2761                         au4_idx_c[i] = 0;
   2762                         au4_ilf_across_tile_slice_enable[i] = 1;
   2763                     }
   2764                     /******************************************************************
   2765                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
   2766                      *
   2767                      *
   2768                      *          ____________
   2769                      *         |    |       |
   2770                      *         |    | C_T   |
   2771                      *         |____|_______|____
   2772                      *         |    |       |    |
   2773                      *         | C_L|   C   | C_R|
   2774                      *         |____|_______|    |
   2775                      *              |  C_D       |
   2776                      *              |            |
   2777                      *              |____________|
   2778                      *
   2779                      *****************************************************************/
   2780 
   2781                     /*In case of slices*/
   2782                     {
   2783                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2784                         {
   2785                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
   2786                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
   2787 
   2788                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
   2789                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
   2790 
   2791                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
   2792                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
   2793 
   2794                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
   2795                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
   2796 
   2797                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
   2798                             ctby_c = ps_sao_ctxt->i4_ctb_y;
   2799 
   2800                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2801                             {
   2802                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   2803                                 {
   2804                                     au4_idx_c[6] = -1;
   2805                                     au4_idx_c[0] = -1;
   2806                                     au4_idx_c[4] = -1;
   2807                                 }
   2808                                 else
   2809                                 {
   2810                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   2811                                 }
   2812 
   2813                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2814                                 {
   2815                                     au4_idx_c[2] = -1;
   2816                                     au4_idx_c[5] = -1;
   2817                                     au4_idx_c[4] = -1;
   2818                                 }
   2819                                 else
   2820                                 {
   2821                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   2822                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   2823                                 }
   2824                                 idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   2825                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   2826                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   2827 
   2828                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   2829                                 {
   2830                                     au4_ilf_across_tile_slice_enable[6] = 0;
   2831                                     au4_ilf_across_tile_slice_enable[0] = 0;
   2832                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2833                                 }
   2834                                 else
   2835                                 {
   2836                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   2837                                     au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
   2838                                 }
   2839                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2840                                 {
   2841                                     au4_ilf_across_tile_slice_enable[2] = 0;
   2842                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2843                                     au4_ilf_across_tile_slice_enable[5] = 0;
   2844                                 }
   2845                                 else
   2846                                 {
   2847                                     au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   2848                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   2849                                 }
   2850                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   2851                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   2852                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   2853 
   2854                                 if(au4_idx_c[6] < idx_c)
   2855                                 {
   2856                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   2857                                 }
   2858 
   2859                                 /*
   2860                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   2861                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   2862                                  */
   2863                                 for(i = 0; i < 8; i++)
   2864                                 {
   2865                                     /*Sets the edges that lie on the slice/tile boundary*/
   2866                                     if(au4_idx_c[i] != idx_c)
   2867                                     {
   2868                                         au1_tile_slice_boundary[i] = 1;
   2869                                     }
   2870                                     else
   2871                                     {
   2872                                         au4_ilf_across_tile_slice_enable[i] = 1;
   2873                                     }
   2874                                 }
   2875                                 /*Reset indices*/
   2876                                 for(i = 0; i < 8; i++)
   2877                                 {
   2878                                     au4_idx_c[i] = 0;
   2879                                 }
   2880                             }
   2881 
   2882                             if(ps_pps->i1_tiles_enabled_flag)
   2883                             {
   2884                                 /* Calculate availability flags at slice boundary */
   2885                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   2886                                 {
   2887                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   2888                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   2889                                     {
   2890                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   2891                                         {
   2892                                             au4_idx_c[6] = -1;
   2893                                             au4_idx_c[0] = -1;
   2894                                             au4_idx_c[4] = -1;
   2895                                         }
   2896                                         else
   2897                                         {
   2898                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   2899                                         }
   2900 
   2901                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   2902                                         {
   2903                                             au4_idx_c[2] = -1;
   2904                                             au4_idx_c[5] = -1;
   2905                                             au4_idx_c[4] = -1;
   2906                                         }
   2907                                         else
   2908                                         {
   2909                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   2910                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   2911                                         }
   2912                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   2913                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   2914                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   2915 
   2916                                         for(i = 0; i < 8; i++)
   2917                                         {
   2918                                             /*Sets the edges that lie on the slice/tile boundary*/
   2919                                             if(au4_idx_c[i] != idx_c)
   2920                                             {
   2921                                                 au1_tile_slice_boundary[i] |= 1;
   2922                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   2923                                             }
   2924                                         }
   2925                                     }
   2926                                 }
   2927                             }
   2928 
   2929                             for(i = 0; i < 8; i++)
   2930                             {
   2931                                 /*Sets the edges that lie on the slice/tile boundary*/
   2932                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   2933                                 {
   2934                                     au1_avail_luma[i] = 0;
   2935                                 }
   2936                             }
   2937 
   2938                         }
   2939                     }
   2940                     if(0 == ps_sao_ctxt->i4_ctb_x)
   2941                     {
   2942                         au1_avail_luma[0] = 0;
   2943                         au1_avail_luma[4] = 0;
   2944                         au1_avail_luma[6] = 0;
   2945                     }
   2946 
   2947                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
   2948                     {
   2949                         au1_avail_luma[1] = 0;
   2950                         au1_avail_luma[5] = 0;
   2951                         au1_avail_luma[7] = 0;
   2952                     }
   2953 
   2954                     if(0 == ps_sao_ctxt->i4_ctb_y)
   2955                     {
   2956                         au1_avail_luma[2] = 0;
   2957                         au1_avail_luma[4] = 0;
   2958                         au1_avail_luma[5] = 0;
   2959                     }
   2960 
   2961                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
   2962                     {
   2963                         au1_avail_luma[3] = 0;
   2964                         au1_avail_luma[6] = 0;
   2965                         au1_avail_luma[7] = 0;
   2966                     }
   2967 
   2968                     {
   2969                         au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
   2970                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
   2971 
   2972                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   2973                                                                           src_strd,
   2974                                                                           pu1_src_left_luma,
   2975                                                                           pu1_src_top_luma,
   2976                                                                           pu1_sao_src_top_left_luma_curr_ctb,
   2977                                                                           au1_src_top_right,
   2978                                                                           &u1_sao_src_top_left_luma_bot_left,
   2979                                                                           au1_avail_luma,
   2980                                                                           ai1_offset_y,
   2981                                                                           sao_wd_luma,
   2982                                                                           sao_ht_luma);
   2983                     }
   2984                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
   2985                     pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
   2986                 }
   2987             }
   2988             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2989             {
   2990                 /* Update left, top and top-left */
   2991                 for(row = 0; row < sao_ht_luma; row++)
   2992                 {
   2993                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2994                 }
   2995                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2996 
   2997                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2998 
   2999                 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
   3000             }
   3001         }
   3002 
   3003         if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
   3004         {
   3005             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
   3006             {
   3007                 if(0 == ps_sao->b3_cb_type_idx)
   3008                 {
   3009                     for(row = 0; row < sao_ht_chroma; row++)
   3010                     {
   3011                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   3012                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   3013                     }
   3014                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   3015                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   3016 
   3017                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   3018 
   3019                     pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
   3020                     pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
   3021                 }
   3022 
   3023                 else if(1 == ps_sao->b3_cb_type_idx)
   3024                 {
   3025                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   3026                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   3027                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   3028                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   3029 
   3030                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   3031                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   3032                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   3033                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   3034 
   3035                     if(chroma_yuv420sp_vu)
   3036                     {
   3037                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   3038                                                                                     src_strd,
   3039                                                                                     pu1_src_left_chroma,
   3040                                                                                     pu1_src_top_chroma,
   3041                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   3042                                                                                     ps_sao->b5_cr_band_pos,
   3043                                                                                     ps_sao->b5_cb_band_pos,
   3044                                                                                     ai1_offset_cr,
   3045                                                                                     ai1_offset_cb,
   3046                                                                                     sao_wd_chroma,
   3047                                                                                     sao_ht_chroma
   3048                                                                                    );
   3049                     }
   3050                     else
   3051                     {
   3052                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   3053                                                                                     src_strd,
   3054                                                                                     pu1_src_left_chroma,
   3055                                                                                     pu1_src_top_chroma,
   3056                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   3057                                                                                     ps_sao->b5_cb_band_pos,
   3058                                                                                     ps_sao->b5_cr_band_pos,
   3059                                                                                     ai1_offset_cb,
   3060                                                                                     ai1_offset_cr,
   3061                                                                                     sao_wd_chroma,
   3062                                                                                     sao_ht_chroma
   3063                                                                                    );
   3064                     }
   3065                 }
   3066 
   3067                 else // if(2 <= ps_sao->b3_cb_type_idx)
   3068                 {
   3069                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   3070                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   3071                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   3072                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   3073 
   3074                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   3075                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   3076                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   3077                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   3078 
   3079                     for(i = 0; i < 8; i++)
   3080                     {
   3081                         au1_avail_chroma[i] = 255;
   3082                         au1_tile_slice_boundary[i] = 0;
   3083                         au4_idx_c[i] = 0;
   3084                         au4_ilf_across_tile_slice_enable[i] = 1;
   3085                     }
   3086                     {
   3087                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   3088                         {
   3089                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
   3090                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
   3091 
   3092                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
   3093                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
   3094 
   3095                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
   3096                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
   3097 
   3098                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
   3099                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
   3100 
   3101                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
   3102                             ctby_c = ps_sao_ctxt->i4_ctb_y;
   3103 
   3104                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   3105                             {
   3106                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   3107                                 {
   3108                                     au4_idx_c[0] = -1;
   3109                                     au4_idx_c[4] = -1;
   3110                                     au4_idx_c[6] = -1;
   3111                                 }
   3112                                 else
   3113                                 {
   3114                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   3115                                 }
   3116 
   3117                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   3118                                 {
   3119                                     au4_idx_c[2] = -1;
   3120                                     au4_idx_c[4] = -1;
   3121                                     au4_idx_c[5] = -1;
   3122                                 }
   3123                                 else
   3124                                 {
   3125                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   3126                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   3127                                 }
   3128                                 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   3129                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   3130                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   3131 
   3132                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   3133                                 {
   3134                                     au4_ilf_across_tile_slice_enable[0] = 0;
   3135                                     au4_ilf_across_tile_slice_enable[4] = 0;
   3136                                     au4_ilf_across_tile_slice_enable[6] = 0;
   3137                                 }
   3138                                 else
   3139                                 {
   3140                                     au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   3141                                     au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   3142                                 }
   3143 
   3144                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   3145                                 {
   3146                                     au4_ilf_across_tile_slice_enable[2] = 0;
   3147                                     au4_ilf_across_tile_slice_enable[4] = 0;
   3148                                     au4_ilf_across_tile_slice_enable[5] = 0;
   3149                                 }
   3150                                 else
   3151                                 {
   3152                                     au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   3153                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   3154                                 }
   3155 
   3156                                 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   3157                                 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   3158                                 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   3159 
   3160                                 if(idx_c > au4_idx_c[6])
   3161                                 {
   3162                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   3163                                 }
   3164 
   3165                                 /*
   3166                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   3167                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   3168                                  */
   3169                                 for(i = 0; i < 8; i++)
   3170                                 {
   3171                                     /*Sets the edges that lie on the slice/tile boundary*/
   3172                                     if(au4_idx_c[i] != idx_c)
   3173                                     {
   3174                                         au1_tile_slice_boundary[i] = 1;
   3175                                     }
   3176                                     else
   3177                                     {
   3178                                         au4_ilf_across_tile_slice_enable[i] = 1;
   3179                                     }
   3180                                 }
   3181                                 /*Reset indices*/
   3182                                 for(i = 0; i < 8; i++)
   3183                                 {
   3184                                     au4_idx_c[i] = 0;
   3185                                 }
   3186                             }
   3187 
   3188                             if(ps_pps->i1_tiles_enabled_flag)
   3189                             {
   3190                                 /* Calculate availability flags at slice boundary */
   3191                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   3192                                 {
   3193                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   3194                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   3195                                     {
   3196                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   3197                                         {
   3198                                             au4_idx_c[6] = -1;
   3199                                             au4_idx_c[0] = -1;
   3200                                             au4_idx_c[4] = -1;
   3201                                         }
   3202                                         else
   3203                                         {
   3204                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   3205                                         }
   3206 
   3207                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   3208                                         {
   3209                                             au4_idx_c[2] = -1;
   3210                                             au4_idx_c[5] = -1;
   3211                                             au4_idx_c[4] = -1;
   3212                                         }
   3213                                         else
   3214                                         {
   3215                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   3216                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   3217                                         }
   3218                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   3219                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   3220                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   3221 
   3222                                         for(i = 0; i < 8; i++)
   3223                                         {
   3224                                             /*Sets the edges that lie on the slice/tile boundary*/
   3225                                             if(au4_idx_c[i] != idx_c)
   3226                                             {
   3227                                                 au1_tile_slice_boundary[i] |= 1;
   3228                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   3229                                             }
   3230                                         }
   3231                                     }
   3232                                 }
   3233                             }
   3234 
   3235                             for(i = 0; i < 8; i++)
   3236                             {
   3237                                 /*Sets the edges that lie on the slice/tile boundary*/
   3238                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   3239                                 {
   3240                                     au1_avail_chroma[i] = 0;
   3241                                 }
   3242                             }
   3243                         }
   3244                     }
   3245 
   3246                     if(0 == ps_sao_ctxt->i4_ctb_x)
   3247                     {
   3248                         au1_avail_chroma[0] = 0;
   3249                         au1_avail_chroma[4] = 0;
   3250                         au1_avail_chroma[6] = 0;
   3251                     }
   3252 
   3253                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
   3254                     {
   3255                         au1_avail_chroma[1] = 0;
   3256                         au1_avail_chroma[5] = 0;
   3257                         au1_avail_chroma[7] = 0;
   3258                     }
   3259 
   3260                     if(0 == ps_sao_ctxt->i4_ctb_y)
   3261                     {
   3262                         au1_avail_chroma[2] = 0;
   3263                         au1_avail_chroma[4] = 0;
   3264                         au1_avail_chroma[5] = 0;
   3265                     }
   3266 
   3267                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
   3268                     {
   3269                         au1_avail_chroma[3] = 0;
   3270                         au1_avail_chroma[6] = 0;
   3271                         au1_avail_chroma[7] = 0;
   3272                     }
   3273 
   3274                     {
   3275                         au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
   3276                         au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
   3277 
   3278                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   3279                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   3280 
   3281                         if(chroma_yuv420sp_vu)
   3282                         {
   3283                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   3284                                                                                  src_strd,
   3285                                                                                  pu1_src_left_chroma,
   3286                                                                                  pu1_src_top_chroma,
   3287                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   3288                                                                                  au1_src_top_right,
   3289                                                                                  au1_sao_src_top_left_chroma_bot_left,
   3290                                                                                  au1_avail_chroma,
   3291                                                                                  ai1_offset_cr,
   3292                                                                                  ai1_offset_cb,
   3293                                                                                  sao_wd_chroma,
   3294                                                                                  sao_ht_chroma);
   3295                         }
   3296                         else
   3297                         {
   3298                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   3299                                                                                  src_strd,
   3300                                                                                  pu1_src_left_chroma,
   3301                                                                                  pu1_src_top_chroma,
   3302                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   3303                                                                                  au1_src_top_right,
   3304                                                                                  au1_sao_src_top_left_chroma_bot_left,
   3305                                                                                  au1_avail_chroma,
   3306                                                                                  ai1_offset_cb,
   3307                                                                                  ai1_offset_cr,
   3308                                                                                  sao_wd_chroma,
   3309                                                                                  sao_ht_chroma);
   3310                         }
   3311                     }
   3312 
   3313                 }
   3314                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
   3315                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
   3316 
   3317                 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
   3318                 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
   3319             }
   3320             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   3321             {
   3322                 for(row = 0; row < sao_ht_chroma; row++)
   3323                 {
   3324                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   3325                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   3326                 }
   3327                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   3328                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   3329 
   3330                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   3331 
   3332                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
   3333                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
   3334             }
   3335 
   3336         }
   3337     }
   3338 
   3339 
   3340 
   3341 
   3342 /* If no loop filter is enabled copy the backed up values */
   3343     {
   3344         /* Luma */
   3345         if(no_loop_filter_enabled_luma)
   3346         {
   3347             UWORD32 u4_no_loop_filter_flag;
   3348             WORD32 loop_filter_bit_pos;
   3349             WORD32 log2_min_cu = 3;
   3350             WORD32 min_cu = (1 << log2_min_cu);
   3351             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
   3352             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
   3353             WORD32 sao_blk_wd = ctb_size;
   3354             WORD32 remaining_rows;
   3355             WORD32 remaining_cols;
   3356 
   3357             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
   3358             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
   3359             if(remaining_rows <= SAO_SHIFT_CTB)
   3360                 sao_blk_ht += remaining_rows;
   3361             if(remaining_cols <= SAO_SHIFT_CTB)
   3362                 sao_blk_wd += remaining_cols;
   3363 
   3364             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
   3365             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
   3366 
   3367             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
   3368 
   3369             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
   3370                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
   3371             if(ps_sao_ctxt->i4_ctb_x > 0)
   3372                 loop_filter_bit_pos -= 1;
   3373 
   3374             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
   3375                             (loop_filter_bit_pos >> 3);
   3376 
   3377             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
   3378                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
   3379             {
   3380                 WORD32 tmp_wd = sao_blk_wd;
   3381 
   3382                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
   3383                                 (loop_filter_bit_pos & 7);
   3384                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
   3385 
   3386                 if(u4_no_loop_filter_flag)
   3387                 {
   3388                     while(tmp_wd > 0)
   3389                     {
   3390                         if(CTZ(u4_no_loop_filter_flag))
   3391                         {
   3392                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3393                             pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3394                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
   3395                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
   3396                         }
   3397                         else
   3398                         {
   3399                             for(row = 0; row < min_cu; row++)
   3400                             {
   3401                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
   3402                                 {
   3403                                     pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
   3404                                 }
   3405                             }
   3406                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3407                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3408                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
   3409                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
   3410                         }
   3411                     }
   3412 
   3413                     pu1_src_tmp_luma -= sao_blk_wd;
   3414                     pu1_src_backup_luma -= sao_blk_wd;
   3415                 }
   3416 
   3417                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
   3418                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
   3419             }
   3420         }
   3421 
   3422         /* Chroma */
   3423         if(no_loop_filter_enabled_chroma)
   3424         {
   3425             UWORD32 u4_no_loop_filter_flag;
   3426             WORD32 loop_filter_bit_pos;
   3427             WORD32 log2_min_cu = 3;
   3428             WORD32 min_cu = (1 << log2_min_cu);
   3429             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
   3430             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
   3431             WORD32 sao_blk_wd = ctb_size;
   3432             WORD32 remaining_rows;
   3433             WORD32 remaining_cols;
   3434 
   3435             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
   3436             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
   3437             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
   3438                 sao_blk_ht += remaining_rows;
   3439             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
   3440                 sao_blk_wd += remaining_cols;
   3441 
   3442             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
   3443             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
   3444 
   3445             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
   3446 
   3447             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
   3448                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
   3449             if(ps_sao_ctxt->i4_ctb_x > 0)
   3450                 loop_filter_bit_pos -= 2;
   3451 
   3452             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
   3453                             (loop_filter_bit_pos >> 3);
   3454 
   3455             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
   3456                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
   3457             {
   3458                 WORD32 tmp_wd = sao_blk_wd;
   3459 
   3460                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
   3461                                 (loop_filter_bit_pos & 7);
   3462                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
   3463 
   3464                 if(u4_no_loop_filter_flag)
   3465                 {
   3466                     while(tmp_wd > 0)
   3467                     {
   3468                         if(CTZ(u4_no_loop_filter_flag))
   3469                         {
   3470                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3471                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3472                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
   3473                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
   3474                         }
   3475                         else
   3476                         {
   3477                             for(row = 0; row < min_cu / 2; row++)
   3478                             {
   3479                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
   3480                                 {
   3481                                     pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
   3482                                 }
   3483                             }
   3484 
   3485                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3486                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3487                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
   3488                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
   3489                         }
   3490                     }
   3491 
   3492                     pu1_src_tmp_chroma -= sao_blk_wd;
   3493                     pu1_src_backup_chroma -= sao_blk_wd;
   3494                 }
   3495 
   3496                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
   3497                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
   3498             }
   3499         }
   3500     }
   3501 
   3502 }
   3503 
   3504