Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /**
     22 *******************************************************************************
     23 * @file
     24 *  ih264e_process.c
     25 *
     26 * @brief
     27 *  Contains functions for codec thread
     28 *
     29 * @author
     30 *  Harish
     31 *
     32 * @par List of Functions:
     33 * - ih264e_generate_sps_pps()
     34 * - ih264e_init_entropy_ctxt()
     35 * - ih264e_entropy()
     36 * - ih264e_pack_header_data()
     37 * - ih264e_update_proc_ctxt()
     38 * - ih264e_init_proc_ctxt()
     39 * - ih264e_pad_recon_buffer()
     40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
     41 * - ih264e_process()
     42 * - ih264e_set_rc_pic_params()
     43 * - ih264e_update_rc_post_enc()
     44 * - ih264e_process_thread()
     45 *
     46 * @remarks
     47 *  None
     48 *
     49 *******************************************************************************
     50 */
     51 
     52 /*****************************************************************************/
     53 /* File Includes                                                             */
     54 /*****************************************************************************/
     55 
     56 /* System include files */
     57 #include <stdio.h>
     58 #include <stddef.h>
     59 #include <stdlib.h>
     60 #include <string.h>
     61 #include <limits.h>
     62 #include <assert.h>
     63 
     64 /* User include files */
     65 #include "ih264_typedefs.h"
     66 #include "iv2.h"
     67 #include "ive2.h"
     68 #include "ih264_defs.h"
     69 #include "ih264_debug.h"
     70 #include "ime_distortion_metrics.h"
     71 #include "ime_defs.h"
     72 #include "ime_structs.h"
     73 #include "ih264_error.h"
     74 #include "ih264_structs.h"
     75 #include "ih264_trans_quant_itrans_iquant.h"
     76 #include "ih264_inter_pred_filters.h"
     77 #include "ih264_mem_fns.h"
     78 #include "ih264_padding.h"
     79 #include "ih264_intra_pred_filters.h"
     80 #include "ih264_deblk_edge_filters.h"
     81 #include "ih264_cabac_tables.h"
     82 #include "ih264_platform_macros.h"
     83 #include "ih264_macros.h"
     84 #include "ih264_buf_mgr.h"
     85 #include "ih264e_error.h"
     86 #include "ih264e_bitstream.h"
     87 #include "ih264_common_tables.h"
     88 #include "ih264_list.h"
     89 #include "ih264e_defs.h"
     90 #include "irc_cntrl_param.h"
     91 #include "irc_frame_info_collector.h"
     92 #include "ih264e_rate_control.h"
     93 #include "ih264e_cabac_structs.h"
     94 #include "ih264e_structs.h"
     95 #include "ih264e_cabac.h"
     96 #include "ih264e_process.h"
     97 #include "ithread.h"
     98 #include "ih264e_intra_modes_eval.h"
     99 #include "ih264e_encode_header.h"
    100 #include "ih264e_globals.h"
    101 #include "ih264e_config.h"
    102 #include "ih264e_trace.h"
    103 #include "ih264e_statistics.h"
    104 #include "ih264_cavlc_tables.h"
    105 #include "ih264e_cavlc.h"
    106 #include "ih264e_deblk.h"
    107 #include "ih264e_me.h"
    108 #include "ih264e_debug.h"
    109 #include "ih264e_master.h"
    110 #include "ih264e_utils.h"
    111 #include "irc_mem_req_and_acq.h"
    112 #include "irc_rate_control_api.h"
    113 #include "ih264e_platform_macros.h"
    114 #include "ime_statistics.h"
    115 
    116 
    117 /*****************************************************************************/
    118 /* Function Definitions                                                      */
    119 /*****************************************************************************/
    120 
    121 /**
    122 ******************************************************************************
    123 *
    124 *  @brief This function generates sps, pps set on request
    125 *
    126 *  @par   Description
    127 *  When the encoder is set in header generation mode, the following function
    128 *  is called. This generates sps and pps headers and returns the control back
    129 *  to caller.
    130 *
    131 *  @param[in]    ps_codec
    132 *  pointer to codec context
    133 *
    134 *  @return      success or failure error code
    135 *
    136 ******************************************************************************
    137 */
    138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
    139 {
    140     /* choose between ping-pong process buffer set */
    141     WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
    142 
    143     /* entropy ctxt */
    144     entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
    145 
    146     /* Bitstream structure */
    147     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
    148 
    149     /* sps */
    150     sps_t *ps_sps = NULL;
    151 
    152     /* pps */
    153     pps_t *ps_pps = NULL;
    154 
    155     /* output buff */
    156     out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
    157 
    158 
    159     /********************************************************************/
    160     /*      initialize the bit stream buffer                            */
    161     /********************************************************************/
    162     ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
    163 
    164     /********************************************************************/
    165     /*                    BEGIN HEADER GENERATION                       */
    166     /********************************************************************/
    167     /*ps_codec->i4_pps_id ++;*/
    168     ps_codec->i4_pps_id %= MAX_PPS_CNT;
    169 
    170     /*ps_codec->i4_sps_id ++;*/
    171     ps_codec->i4_sps_id %= MAX_SPS_CNT;
    172 
    173     /* populate sps header */
    174     ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
    175     ih264e_populate_sps(ps_codec, ps_sps);
    176 
    177     /* populate pps header */
    178     ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
    179     ih264e_populate_pps(ps_codec, ps_pps);
    180 
    181     ps_entropy->i4_error_code = IH264E_SUCCESS;
    182 
    183     /* generate sps */
    184     ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
    185 
    186     /* generate pps */
    187     ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
    188 
    189     /* queue output buffer */
    190     ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
    191 
    192     return ps_entropy->i4_error_code;
    193 }
    194 
    195 /**
    196 *******************************************************************************
    197 *
    198 * @brief   initialize entropy context.
    199 *
    200 * @par Description:
    201 *  Before invoking the call to perform to entropy coding the entropy context
    202 *  associated with the job needs to be initialized. This involves the start
    203 *  mb address, end mb address, slice index and the pointer to location at
    204 *  which the mb residue info and mb header info are packed.
    205 *
    206 * @param[in] ps_proc
    207 *  Pointer to the current process context
    208 *
    209 * @returns error status
    210 *
    211 * @remarks none
    212 *
    213 *******************************************************************************
    214 */
    215 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
    216 {
    217     /* codec context */
    218     codec_t *ps_codec = ps_proc->ps_codec;
    219 
    220     /* entropy ctxt */
    221     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
    222 
    223     /* start address */
    224     ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
    225 
    226     /* end address */
    227     ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
    228 
    229     /* slice index */
    230     ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
    231 
    232     /* sof */
    233     /* @ start of frame or start of a new slice, set sof flag */
    234     if (ps_entropy->i4_mb_start_add == 0)
    235     {
    236         ps_entropy->i4_sof = 1;
    237     }
    238 
    239     if (ps_entropy->i4_mb_x == 0)
    240     {
    241         /* packed mb coeff data */
    242         ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
    243                         ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
    244 
    245         /* packed mb header data */
    246         ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
    247                         ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
    248     }
    249 
    250     return IH264E_SUCCESS;
    251 }
    252 
    253 /**
    254 *******************************************************************************
    255 *
    256 * @brief entry point for entropy coding
    257 *
    258 * @par Description
    259 *  This function calls lower level functions to perform entropy coding for a
    260 *  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
    261 *  back the control, updates the ctxt and calls lower level functions again.
    262 *  This process is repeated till all the rows or group of mb's (which ever is
    263 *  minimum) are coded
    264 *
    265 * @param[in] ps_proc
    266 *  process context
    267 *
    268 * @returns  error status
    269 *
    270 * @remarks
    271 *
    272 *******************************************************************************
    273 */
    274 
    275 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
    276 {
    277     /* codec context */
    278     codec_t *ps_codec = ps_proc->ps_codec;
    279 
    280     /* entropy context */
    281     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
    282 
    283     /* cabac context */
    284     cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
    285 
    286     /* sps */
    287     sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
    288 
    289     /* pps */
    290     pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
    291 
    292     /* slice header */
    293     slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
    294 
    295     /* slice type */
    296     WORD32 i4_slice_type = ps_proc->i4_slice_type;
    297 
    298     /* Bitstream structure */
    299     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
    300 
    301     /* output buff */
    302     out_buf_t s_out_buf;
    303 
    304     /* proc map */
    305     UWORD8  *pu1_proc_map;
    306 
    307     /* entropy map */
    308     UWORD8  *pu1_entropy_map_curr;
    309 
    310     /* proc base idx */
    311     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
    312 
    313     /* temp var */
    314     WORD32 i4_wd_mbs, i4_ht_mbs;
    315     UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
    316     WORD32 bitstream_start_offset, bitstream_end_offset;
    317     /********************************************************************/
    318     /*                            BEGIN INIT                            */
    319     /********************************************************************/
    320 
    321     /* entropy encode start address */
    322     u4_mb_idx = ps_entropy->i4_mb_start_add;
    323 
    324     /* entropy encode end address */
    325     u4_mb_end_idx = ps_entropy->i4_mb_end_add;
    326 
    327     /* width in mbs */
    328     i4_wd_mbs = ps_entropy->i4_wd_mbs;
    329 
    330     /* height in mbs */
    331     i4_ht_mbs = ps_entropy->i4_ht_mbs;
    332 
    333     /* total mb cnt */
    334     u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
    335 
    336     /* proc map */
    337     pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
    338 
    339     /* entropy map */
    340     pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
    341 
    342     /********************************************************************/
    343     /* @ start of frame / slice,                                        */
    344     /*      initialize the output buffer,                               */
    345     /*      initialize the bit stream buffer,                           */
    346     /*      check if sps and pps headers have to be generated,          */
    347     /*      populate and generate slice header                          */
    348     /********************************************************************/
    349     if (ps_entropy->i4_sof)
    350     {
    351         /********************************************************************/
    352         /*      initialize the output buffer                                */
    353         /********************************************************************/
    354         s_out_buf = ps_codec->as_out_buf[ctxt_sel];
    355 
    356         /* is last frame to encode */
    357         s_out_buf.u4_is_last = ps_entropy->u4_is_last;
    358 
    359         /* frame idx */
    360         s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
    361         s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
    362 
    363         /********************************************************************/
    364         /*      initialize the bit stream buffer                            */
    365         /********************************************************************/
    366         ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
    367 
    368         /********************************************************************/
    369         /*                    BEGIN HEADER GENERATION                       */
    370         /********************************************************************/
    371         if (1 == ps_entropy->i4_gen_header)
    372         {
    373             /* generate sps */
    374             ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
    375 
    376             /* generate pps */
    377             ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
    378 
    379             /* reset i4_gen_header */
    380             ps_entropy->i4_gen_header = 0;
    381         }
    382 
    383         /* populate slice header */
    384         ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
    385 
    386         /* generate slice header */
    387         ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
    388                                                                   ps_pps, ps_sps);
    389 
    390         /* once start of frame / slice is done, you can reset it */
    391         /* it is the responsibility of the caller to set this flag */
    392         ps_entropy->i4_sof = 0;
    393 
    394         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
    395         {
    396             BITSTREAM_BYTE_ALIGN(ps_bitstrm);
    397             BITSTREAM_FLUSH(ps_bitstrm);
    398             ih264e_init_cabac_ctxt(ps_entropy);
    399         }
    400     }
    401 
    402     /* begin entropy coding for the mb set */
    403     while (u4_mb_idx < u4_mb_end_idx)
    404     {
    405         /* init ptrs/indices */
    406         if (ps_entropy->i4_mb_x == i4_wd_mbs)
    407         {
    408             ps_entropy->i4_mb_y++;
    409             ps_entropy->i4_mb_x = 0;
    410 
    411             /* packed mb coeff data */
    412             ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
    413                             ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
    414 
    415             /* packed mb header data */
    416             ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
    417                             ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
    418 
    419             /* proc map */
    420             pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
    421 
    422             /* entropy map */
    423             pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
    424         }
    425 
    426         DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
    427         ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
    428         ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
    429 
    430         /* wait until the curr mb is core coded */
    431         /* The wait for curr mb to be core coded is essential when entropy is launched
    432          * as a separate job
    433          */
    434         while (1)
    435         {
    436             volatile UWORD8 *pu1_buf1;
    437             WORD32 idx = ps_entropy->i4_mb_x;
    438 
    439             pu1_buf1 = pu1_proc_map + idx;
    440             if (*pu1_buf1)
    441                 break;
    442             ithread_yield();
    443         }
    444 
    445 
    446         /* write mb layer */
    447         ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
    448         /* Starting bitstream offset for header in bits */
    449         bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
    450 
    451         /* set entropy map */
    452         pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
    453 
    454         u4_mb_idx++;
    455         ps_entropy->i4_mb_x++;
    456         /* check for eof */
    457         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
    458         {
    459             if (ps_entropy->i4_mb_x < i4_wd_mbs)
    460             {
    461                 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
    462             }
    463         }
    464 
    465         if (ps_entropy->i4_mb_x == i4_wd_mbs)
    466         {
    467             /* if slices are enabled */
    468             if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
    469             {
    470                 /* current slice index */
    471                 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
    472 
    473                 /* slice map */
    474                 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
    475 
    476                 /* No need to open a slice at end of frame. The current slice can be closed at the time
    477                  * of signaling eof flag.
    478                  */
    479                 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
    480                                                 != pu1_slice_idx[u4_mb_idx]))
    481                 {
    482                     if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
    483                     { /* mb skip run */
    484                         if ((i4_slice_type != ISLICE)
    485                                         && *ps_entropy->pi4_mb_skip_run)
    486                         {
    487                             if (*ps_entropy->pi4_mb_skip_run)
    488                             {
    489                             PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
    490                                 *ps_entropy->pi4_mb_skip_run = 0;
    491                             }
    492                         }
    493                         /* put rbsp trailing bits for the previous slice */
    494                                  ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
    495                     }
    496                     else
    497                     {
    498                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
    499                     }
    500 
    501                     /* update slice header pointer */
    502                     i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
    503                     ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
    504                     ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
    505 
    506                     /* populate slice header */
    507                     ps_entropy->i4_mb_start_add = u4_mb_idx;
    508                     ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
    509                                                  ps_sps);
    510 
    511                     /* generate slice header */
    512                     ps_entropy->i4_error_code |= ih264e_generate_slice_header(
    513                                     ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
    514                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
    515                     {
    516                         BITSTREAM_BYTE_ALIGN(ps_bitstrm);
    517                         BITSTREAM_FLUSH(ps_bitstrm);
    518                         ih264e_init_cabac_ctxt(ps_entropy);
    519                     }
    520                 }
    521                 else
    522                 {
    523                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
    524                                     && u4_mb_idx != u4_mb_cnt)
    525                     {
    526                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
    527                     }
    528                 }
    529             }
    530             /* Dont execute any further instructions until store synchronization took place */
    531             DATA_SYNC();
    532         }
    533 
    534         /* Ending bitstream offset for header in bits */
    535         bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
    536         ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
    537                         bitstream_end_offset - bitstream_start_offset;
    538     }
    539 
    540     /* check for eof */
    541     if (u4_mb_idx == u4_mb_cnt)
    542     {
    543         /* set end of frame flag */
    544         ps_entropy->i4_eof = 1;
    545     }
    546     else
    547     {
    548         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
    549                         && ps_codec->s_cfg.e_slice_mode
    550                                         != IVE_SLICE_MODE_BLOCKS)
    551         {
    552             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
    553         }
    554     }
    555 
    556     if (ps_entropy->i4_eof)
    557     {
    558         if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
    559         {
    560             /* mb skip run */
    561             if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
    562             {
    563                 if (*ps_entropy->pi4_mb_skip_run)
    564                 {
    565                     PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
    566                                  ps_entropy->i4_error_code, "mb skip run");
    567                     *ps_entropy->pi4_mb_skip_run = 0;
    568                 }
    569             }
    570             /* put rbsp trailing bits */
    571              ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
    572         }
    573         else
    574         {
    575             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
    576         }
    577 
    578         /* update current frame stats to rc library */
    579         {
    580             /* number of bytes to stuff */
    581             WORD32 i4_stuff_bytes;
    582 
    583             /* update */
    584             i4_stuff_bytes = ih264e_update_rc_post_enc(
    585                             ps_codec, ctxt_sel,
    586                             (ps_proc->ps_codec->i4_poc == 0));
    587 
    588             /* cbr rc - house keeping */
    589             if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
    590             {
    591                 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
    592             }
    593             else if (i4_stuff_bytes)
    594             {
    595                 /* add filler nal units */
    596                 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
    597             }
    598         }
    599 
    600         /*
    601          *Frame number is to be incremented only if the current frame is a
    602          * reference frame. After each successful frame encode, we increment
    603          * frame number by 1
    604          */
    605         if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
    606                         && ps_codec->u4_is_curr_frm_ref)
    607         {
    608             ps_codec->i4_frame_num++;
    609         }
    610         /********************************************************************/
    611         /*      signal the output                                           */
    612         /********************************************************************/
    613         ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
    614                         ps_entropy->ps_bitstrm->u4_strm_buf_offset;
    615 
    616         DEBUG("entropy status %x", ps_entropy->i4_error_code);
    617     }
    618 
    619     /* allow threads to dequeue entropy jobs */
    620     ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
    621 
    622     return ps_entropy->i4_error_code;
    623 }
    624 
    625 /**
    626 *******************************************************************************
    627 *
    628 * @brief Packs header information of a mb in to a buffer
    629 *
    630 * @par Description:
    631 *  After the deciding the mode info of a macroblock, the syntax elements
    632 *  associated with the mb are packed and stored. The entropy thread unpacks
    633 *  this buffer and generates the end bit stream.
    634 *
    635 * @param[in] ps_proc
    636 *  Pointer to the current process context
    637 *
    638 * @returns error status
    639 *
    640 * @remarks none
    641 *
    642 *******************************************************************************
    643 */
    644 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
    645 {
    646     /* curr mb type */
    647     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
    648 
    649     /* pack mb syntax layer of curr mb (used for entropy coding) */
    650     if (u4_mb_type == I4x4)
    651     {
    652         /* pointer to mb header storage space */
    653         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    654 
    655         /* temp var */
    656         WORD32 i4, byte;
    657 
    658         /* mb type plus mode */
    659         *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
    660 
    661         /* cbp */
    662         *pu1_ptr++ = ps_proc->u4_cbp;
    663 
    664         /* mb qp delta */
    665         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    666 
    667         /* sub mb modes */
    668         for (i4 = 0; i4 < 16; i4 ++)
    669         {
    670             byte = 0;
    671 
    672             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
    673                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
    674             {
    675                 byte |= 1;
    676             }
    677             else
    678             {
    679 
    680                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
    681                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
    682                 {
    683                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
    684                 }
    685                 else
    686                 {
    687                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
    688                 }
    689             }
    690 
    691             i4++;
    692 
    693             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
    694                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
    695             {
    696                 byte |= 16;
    697             }
    698             else
    699             {
    700 
    701                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
    702                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
    703                 {
    704                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
    705                 }
    706                 else
    707                 {
    708                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
    709                 }
    710             }
    711 
    712             *pu1_ptr++ = byte;
    713         }
    714 
    715         /* end of mb layer */
    716         ps_proc->pv_mb_header_data = pu1_ptr;
    717     }
    718     else if (u4_mb_type == I16x16)
    719     {
    720         /* pointer to mb header storage space */
    721         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    722 
    723         /* mb type plus mode */
    724         *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
    725 
    726         /* cbp */
    727         *pu1_ptr++ = ps_proc->u4_cbp;
    728 
    729         /* mb qp delta */
    730         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    731 
    732         /* end of mb layer */
    733         ps_proc->pv_mb_header_data = pu1_ptr;
    734     }
    735     else if (u4_mb_type == P16x16)
    736     {
    737         /* pointer to mb header storage space */
    738         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    739 
    740         WORD16 *i2_mv_ptr;
    741 
    742         /* mb type plus mode */
    743         *pu1_ptr++ = u4_mb_type;
    744 
    745         /* cbp */
    746         *pu1_ptr++ = ps_proc->u4_cbp;
    747 
    748         /* mb qp delta */
    749         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    750 
    751         i2_mv_ptr = (WORD16 *)pu1_ptr;
    752 
    753         *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
    754 
    755         *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
    756 
    757         /* end of mb layer */
    758         ps_proc->pv_mb_header_data = i2_mv_ptr;
    759     }
    760     else if (u4_mb_type == PSKIP)
    761     {
    762         /* pointer to mb header storage space */
    763         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    764 
    765         /* mb type plus mode */
    766         *pu1_ptr++ = u4_mb_type;
    767 
    768         /* end of mb layer */
    769         ps_proc->pv_mb_header_data = pu1_ptr;
    770     }
    771     else if(u4_mb_type == B16x16)
    772     {
    773 
    774         /* pointer to mb header storage space */
    775         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    776 
    777         WORD16 *i2_mv_ptr;
    778 
    779         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
    780 
    781         /* mb type plus mode */
    782         *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
    783 
    784         /* cbp */
    785         *pu1_ptr++ = ps_proc->u4_cbp;
    786 
    787         /* mb qp delta */
    788         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    789 
    790         /* l0 & l1 me data */
    791         i2_mv_ptr = (WORD16 *)pu1_ptr;
    792 
    793         if (u4_pred_mode != PRED_L1)
    794         {
    795             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
    796                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
    797 
    798             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
    799                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
    800         }
    801         if (u4_pred_mode != PRED_L0)
    802         {
    803             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
    804                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
    805 
    806             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
    807                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
    808         }
    809 
    810         /* end of mb layer */
    811         ps_proc->pv_mb_header_data = i2_mv_ptr;
    812 
    813     }
    814     else if(u4_mb_type == BDIRECT)
    815     {
    816         /* pointer to mb header storage space */
    817         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    818 
    819         /* mb type plus mode */
    820         *pu1_ptr++ = u4_mb_type;
    821 
    822         /* cbp */
    823         *pu1_ptr++ = ps_proc->u4_cbp;
    824 
    825         /* mb qp delta */
    826         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    827 
    828         ps_proc->pv_mb_header_data = pu1_ptr;
    829 
    830     }
    831     else if(u4_mb_type == BSKIP)
    832     {
    833         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
    834 
    835         /* pointer to mb header storage space */
    836         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    837 
    838         /* mb type plus mode */
    839         *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
    840 
    841         /* end of mb layer */
    842         ps_proc->pv_mb_header_data = pu1_ptr;
    843     }
    844 
    845     return IH264E_SUCCESS;
    846 }
    847 
    848 /**
    849 *******************************************************************************
    850 *
    851 * @brief   update process context after encoding an mb. This involves preserving
    852 * the current mb information for later use, initialize the proc ctxt elements to
    853 * encode next mb.
    854 *
    855 * @par Description:
    856 *  This function performs house keeping tasks after encoding an mb.
    857 *  After encoding an mb, various elements of the process context needs to be
    858 *  updated to encode the next mb. For instance, the source, recon and reference
    859 *  pointers, mb indices have to be adjusted to the next mb. The slice index of
    860 *  the current mb needs to be updated. If mb qp modulation is enabled, then if
    861 *  the qp changes the quant param structure needs to be updated. Also to encoding
    862 *  the next mb, the current mb info is used as part of mode prediction or mv
    863 *  prediction. Hence the current mb info has to preserved at top/top left/left
    864 *  locations.
    865 *
    866 * @param[in] ps_proc
    867 *  Pointer to the current process context
    868 *
    869 * @returns none
    870 *
    871 * @remarks none
    872 *
    873 *******************************************************************************
    874 */
    875 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
    876 {
    877     /* error status */
    878     WORD32 error_status = IH264_SUCCESS;
    879 
    880     /* codec context */
    881     codec_t *ps_codec = ps_proc->ps_codec;
    882 
    883     /* curr mb indices */
    884     WORD32 i4_mb_x = ps_proc->i4_mb_x;
    885     WORD32 i4_mb_y = ps_proc->i4_mb_y;
    886 
    887     /* mb syntax elements of neighbors */
    888     mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
    889     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
    890     mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
    891 
    892     /* curr mb type */
    893     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
    894 
    895     /* curr mb type */
    896     UWORD32 u4_is_intra = ps_proc->u4_is_intra;
    897 
    898     /* width in mbs */
    899     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
    900 
    901     /*height in mbs*/
    902     WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
    903 
    904     /* proc map */
    905     UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
    906 
    907     /* deblk context */
    908     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
    909 
    910     /* deblk bs context */
    911     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
    912 
    913     /* top row motion vector info */
    914     enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
    915 
    916     /* top left mb motion vector */
    917     enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
    918 
    919     /* left mb motion vector */
    920     enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
    921 
    922     /* sub mb modes */
    923     UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
    924 
    925     /*************************************************************/
    926     /* During MV prediction, when top right mb is not available, */
    927     /* top left mb info. is used for prediction. Hence the curr  */
    928     /* top, which will be top left for the next mb needs to be   */
    929     /* preserved before updating it with curr mb info.           */
    930     /*************************************************************/
    931 
    932     /* mb type, mb class, csbp */
    933     *ps_top_left_syn = *ps_top_syn;
    934 
    935     if (ps_proc->i4_slice_type != ISLICE)
    936     {
    937         /*****************************************/
    938         /* update top left with top info results */
    939         /*****************************************/
    940         /* mv */
    941         *ps_top_left_mb_pu = *ps_top_row_pu;
    942     }
    943 
    944     /*************************************************/
    945     /* update top and left with curr mb info results */
    946     /*************************************************/
    947 
    948     /* mb type */
    949     ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
    950 
    951     /* mb class */
    952     ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
    953 
    954     /* csbp */
    955     ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
    956 
    957     /* distortion */
    958     ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
    959 
    960     if (u4_is_intra)
    961     {
    962         /* mb / sub mb modes */
    963         if (I16x16 == u4_mb_type)
    964         {
    965             pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
    966         }
    967         else if (I4x4 == u4_mb_type)
    968         {
    969             ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
    970             ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
    971         }
    972         else if (I8x8 == u4_mb_type)
    973         {
    974             memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
    975             memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
    976         }
    977 
    978         if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
    979         {
    980             /* mv */
    981             *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
    982         }
    983 
    984         *ps_proc->pu4_mb_pu_cnt = 1;
    985     }
    986     else
    987     {
    988         /* mv */
    989         *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
    990     }
    991 
    992     /*
    993      * Mark that the MB has been coded intra
    994      * So that future AIRs can skip it
    995      */
    996     ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
    997 
    998     /**************************************************/
    999     /* pack mb header info. for entropy coding        */
   1000     /**************************************************/
   1001     ih264e_pack_header_data(ps_proc);
   1002 
   1003     /* update previous mb qp */
   1004     ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
   1005 
   1006     /* store qp */
   1007     ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
   1008 
   1009     /*
   1010      * We need to sync the cache to make sure that the nmv content of proc
   1011      * is updated to cache properly
   1012      */
   1013     DATA_SYNC();
   1014 
   1015     /* Just before finishing the row, enqueue the job in to entropy queue.
   1016      * The master thread depending on its convenience shall dequeue it and
   1017      * performs entropy.
   1018      *
   1019      * WARN !! Placing this block post proc map update can cause queuing of
   1020      * entropy jobs in out of order.
   1021      */
   1022     if (i4_mb_x == i4_wd_mbs - 1)
   1023     {
   1024         /* job structures */
   1025         job_t s_job;
   1026 
   1027         /* job class */
   1028         s_job.i4_cmd = CMD_ENTROPY;
   1029 
   1030         /* number of mbs to be processed in the current job */
   1031         s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
   1032 
   1033         /* job start index x */
   1034         s_job.i2_mb_x = 0;
   1035 
   1036         /* job start index y */
   1037         s_job.i2_mb_y = ps_proc->i4_mb_y;
   1038 
   1039         /* proc base idx */
   1040         s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
   1041 
   1042         /* queue the job */
   1043         error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
   1044 
   1045         if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
   1046             ih264_list_terminate(ps_codec->pv_entropy_jobq);
   1047     }
   1048 
   1049     /* update proc map */
   1050     pu1_proc_map[i4_mb_x] = 1;
   1051 
   1052     /**************************************************/
   1053     /* update proc ctxt elements for encoding next mb */
   1054     /**************************************************/
   1055     /* update indices */
   1056     i4_mb_x ++;
   1057     ps_proc->i4_mb_x = i4_mb_x;
   1058 
   1059     if (ps_proc->i4_mb_x == i4_wd_mbs)
   1060     {
   1061         ps_proc->i4_mb_y++;
   1062         ps_proc->i4_mb_x = 0;
   1063     }
   1064 
   1065     /* update slice index */
   1066     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
   1067 
   1068     /* update buffers pointers */
   1069     ps_proc->pu1_src_buf_luma += MB_SIZE;
   1070     ps_proc->pu1_rec_buf_luma += MB_SIZE;
   1071     ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
   1072     ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
   1073 
   1074     /*
   1075      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
   1076      * the stride per MB is MB_SIZE
   1077      */
   1078     ps_proc->pu1_src_buf_chroma += MB_SIZE;
   1079     ps_proc->pu1_rec_buf_chroma += MB_SIZE;
   1080     ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
   1081     ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
   1082 
   1083 
   1084 
   1085     /* Reset cost, distortion params */
   1086     ps_proc->i4_mb_cost = INT_MAX;
   1087     ps_proc->i4_mb_distortion = SHRT_MAX;
   1088 
   1089     ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
   1090 
   1091     ps_proc->pu4_mb_pu_cnt += 1;
   1092 
   1093     /* Update colocated pu */
   1094     if (ps_proc->i4_slice_type == BSLICE)
   1095         ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
   1096 
   1097     /* deblk ctxts */
   1098     if (ps_proc->u4_disable_deblock_level != 1)
   1099     {
   1100         /* indices */
   1101         ps_bs->i4_mb_x = ps_proc->i4_mb_x;
   1102         ps_bs->i4_mb_y = ps_proc->i4_mb_y;
   1103 
   1104 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
   1105         ps_deblk->i4_mb_x ++;
   1106 
   1107         ps_deblk->pu1_cur_pic_luma += MB_SIZE;
   1108         /*
   1109          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
   1110          * the stride per MB is MB_SIZE
   1111          */
   1112         ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
   1113 #endif
   1114     }
   1115 
   1116     return error_status;
   1117 }
   1118 
   1119 /**
   1120 *******************************************************************************
   1121 *
   1122 * @brief   initialize process context.
   1123 *
   1124 * @par Description:
   1125 *  Before dispatching the current job to process thread, the process context
   1126 *  associated with the job is initialized. Usually every job aims to encode one
   1127 *  row of mb's. Basing on the row indices provided by the job, the process
   1128 *  context's buffer ptrs, slice indices and other elements that are necessary
   1129 *  during core-coding are initialized.
   1130 *
   1131 * @param[in] ps_proc
   1132 *  Pointer to the current process context
   1133 *
   1134 * @returns error status
   1135 *
   1136 * @remarks none
   1137 *
   1138 *******************************************************************************
   1139 */
   1140 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
   1141 {
   1142     /* codec context */
   1143     codec_t *ps_codec = ps_proc->ps_codec;
   1144 
   1145     /* nmb processing context*/
   1146     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
   1147 
   1148     /* indices */
   1149     WORD32 i4_mb_x, i4_mb_y;
   1150 
   1151     /* strides */
   1152     WORD32 i4_src_strd = ps_proc->i4_src_strd;
   1153     WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
   1154     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
   1155 
   1156     /* quant params */
   1157     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
   1158 
   1159     /* deblk ctxt */
   1160     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
   1161 
   1162     /* deblk bs context */
   1163     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
   1164 
   1165     /* Pointer to mv_buffer of current frame */
   1166     mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
   1167 
   1168     /* Pointers for color space conversion */
   1169     UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
   1170 
   1171     /* Pad the MB to support non standard sizes */
   1172     UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
   1173     UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
   1174     UWORD16 u2_num_rows = MB_SIZE;
   1175     WORD32 convert_uv_only;
   1176 
   1177     /********************************************************************/
   1178     /*                            BEGIN INIT                            */
   1179     /********************************************************************/
   1180 
   1181     i4_mb_x = ps_proc->i4_mb_x;
   1182     i4_mb_y = ps_proc->i4_mb_y;
   1183 
   1184     /* Number of mbs processed in one loop of process function */
   1185     ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
   1186     ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
   1187 
   1188     /* init buffer pointers */
   1189     convert_uv_only = 1;
   1190     if (u4_pad_bottom_sz || u4_pad_right_sz ||
   1191         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
   1192     {
   1193         if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
   1194             u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
   1195         ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
   1196         i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
   1197         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
   1198         convert_uv_only = 0;
   1199     }
   1200     else
   1201     {
   1202         i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
   1203         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
   1204     }
   1205 
   1206 
   1207     if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
   1208         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
   1209         ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
   1210         u4_pad_bottom_sz || u4_pad_right_sz)
   1211     {
   1212         if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
   1213             (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
   1214             ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
   1215 
   1216         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
   1217         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
   1218     }
   1219     else
   1220     {
   1221         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
   1222         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
   1223     }
   1224 
   1225     ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
   1226     ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
   1227 
   1228     /* Tempral back and forward reference buffer */
   1229     ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
   1230     ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
   1231     ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
   1232     ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
   1233 
   1234     /*
   1235      * Do color space conversion
   1236      * NOTE : We assume there that the number of MB's to process will not span multiple rows
   1237      */
   1238     switch (ps_codec->s_cfg.e_inp_color_fmt)
   1239     {
   1240         case IV_YUV_420SP_UV:
   1241         case IV_YUV_420SP_VU:
   1242             /* In case of 420 semi-planar input, copy last few rows to intermediate
   1243                buffer as chroma trans functions access one extra byte due to interleaved input.
   1244                This data will be padded if required */
   1245             if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
   1246             {
   1247                 WORD32 num_rows = MB_SIZE;
   1248                 UWORD8 *pu1_src;
   1249                 UWORD8 *pu1_dst;
   1250                 WORD32 i;
   1251                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
   1252                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
   1253 
   1254                 pu1_dst = ps_proc->pu1_src_buf_luma;
   1255 
   1256                 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
   1257                 if (u4_pad_bottom_sz || u4_pad_right_sz) {
   1258                     if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
   1259                         num_rows = MB_SIZE - u4_pad_bottom_sz;
   1260                     for (i = 0; i < num_rows; i++)
   1261                     {
   1262                         memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
   1263                         pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
   1264                         pu1_dst += ps_proc->i4_src_strd;
   1265                     }
   1266                 }
   1267                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
   1268                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
   1269                 pu1_dst = ps_proc->pu1_src_buf_chroma;
   1270 
   1271                 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
   1272                  * due to interleaved input
   1273                  */
   1274                 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
   1275                     num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
   1276                 else
   1277                     num_rows = BLK8x8SIZE;
   1278                 for (i = 0; i < num_rows; i++)
   1279                 {
   1280                     memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
   1281                     pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
   1282                     pu1_dst += ps_proc->i4_src_chroma_strd;
   1283                 }
   1284 
   1285             }
   1286             break;
   1287 
   1288         case IV_YUV_420P :
   1289             pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
   1290                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
   1291 
   1292             pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
   1293                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
   1294 
   1295             pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
   1296                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
   1297 
   1298             ps_codec->pf_ih264e_conv_420p_to_420sp(
   1299                             pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
   1300                             ps_proc->pu1_src_buf_luma,
   1301                             ps_proc->pu1_src_buf_chroma, u2_num_rows,
   1302                             ps_codec->s_cfg.u4_disp_wd,
   1303                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
   1304                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
   1305                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
   1306                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
   1307                             convert_uv_only);
   1308             break;
   1309 
   1310         case IV_YUV_422ILE :
   1311             pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
   1312                               + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
   1313 
   1314             ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
   1315                             ps_proc->pu1_src_buf_luma,
   1316                             ps_proc->pu1_src_buf_chroma,
   1317                             ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
   1318                             ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
   1319                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
   1320                             ps_proc->i4_src_chroma_strd,
   1321                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
   1322             break;
   1323 
   1324         default:
   1325             break;
   1326     }
   1327 
   1328     if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
   1329     {
   1330         UWORD32 u4_pad_wd, u4_pad_ht;
   1331         u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
   1332         u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
   1333         u4_pad_ht = MB_SIZE;
   1334         if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
   1335             u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
   1336 
   1337         ih264_pad_right_luma(
   1338                         ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
   1339                         ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
   1340 
   1341         ih264_pad_right_chroma(
   1342                         ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
   1343                         ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
   1344     }
   1345 
   1346     /* pad bottom edge */
   1347     if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
   1348     {
   1349         ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
   1350                          ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
   1351 
   1352         ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
   1353                          ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
   1354     }
   1355 
   1356 
   1357     /* packed mb coeff data */
   1358     ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
   1359 
   1360     /* packed mb header data */
   1361     ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
   1362 
   1363     /* slice index */
   1364     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
   1365 
   1366     /*********************************************************************/
   1367     /* ih264e_init_quant_params() routine is called at the pic init level*/
   1368     /* this would have initialized the qp.                               */
   1369     /* TODO_LATER: currently it is assumed that quant params donot change*/
   1370     /* across mb's. When they do calculate update ps_qp_params accordingly*/
   1371     /*********************************************************************/
   1372 
   1373     /* init mv buffer ptr */
   1374     ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
   1375                      ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
   1376 
   1377     /* Init co-located mv buffer */
   1378     ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
   1379                         ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
   1380 
   1381     if (i4_mb_y == 0)
   1382     {
   1383         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
   1384     }
   1385     else
   1386     {
   1387         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
   1388                                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
   1389     }
   1390 
   1391     ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
   1392 
   1393     /* mb type */
   1394     ps_proc->u4_mb_type = I16x16;
   1395 
   1396     /* lambda */
   1397     ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
   1398 
   1399     /* mb distortion */
   1400     ps_proc->i4_mb_distortion = SHRT_MAX;
   1401 
   1402     if (i4_mb_x == 0)
   1403     {
   1404         ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
   1405 
   1406         ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
   1407 
   1408         ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
   1409 
   1410         if (i4_mb_y == 0)
   1411         {
   1412             memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
   1413         }
   1414     }
   1415 
   1416     /* mb cost */
   1417     ps_proc->i4_mb_cost = INT_MAX;
   1418 
   1419     /**********************/
   1420     /* init deblk context */
   1421     /**********************/
   1422     ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
   1423     /* deblk lags the current mb proc by 1 row */
   1424     /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
   1425     /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
   1426     /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
   1427     ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
   1428 
   1429     /* buffer ptrs */
   1430     ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
   1431     ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
   1432 
   1433     /* init deblk bs context */
   1434     /* mb indices */
   1435     ps_bs->i4_mb_x = ps_proc->i4_mb_x;
   1436     ps_bs->i4_mb_y = ps_proc->i4_mb_y;
   1437 
   1438     /* init n_mb_process  context */
   1439     ps_n_mb_ctxt->i4_mb_x = 0;
   1440     ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
   1441     ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
   1442 
   1443     return IH264E_SUCCESS;
   1444 }
   1445 
   1446 /**
   1447 *******************************************************************************
   1448 *
   1449 * @brief This function performs luma & chroma padding
   1450 *
   1451 * @par Description:
   1452 *
   1453 * @param[in] ps_proc
   1454 *  Process context corresponding to the job
   1455 *
   1456 * @param[in] pu1_curr_pic_luma
   1457 *  Pointer to luma buffer
   1458 *
   1459 * @param[in] pu1_curr_pic_chroma
   1460 *  Pointer to chroma buffer
   1461 *
   1462 * @param[in] i4_mb_x
   1463 *  mb index x
   1464 *
   1465 * @param[in] i4_mb_y
   1466 *  mb index y
   1467 *
   1468 *  @param[in] i4_pad_ht
   1469 *  number of rows to be padded
   1470 *
   1471 * @returns  error status
   1472 *
   1473 * @remarks none
   1474 *
   1475 *******************************************************************************
   1476 */
   1477 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
   1478                                        UWORD8 *pu1_curr_pic_luma,
   1479                                        UWORD8 *pu1_curr_pic_chroma,
   1480                                        WORD32 i4_mb_x,
   1481                                        WORD32 i4_mb_y,
   1482                                        WORD32 i4_pad_ht)
   1483 {
   1484     /* codec context */
   1485     codec_t *ps_codec = ps_proc->ps_codec;
   1486 
   1487     /* strides */
   1488     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
   1489 
   1490     if (i4_mb_x == 0)
   1491     {
   1492         /* padding left luma */
   1493         ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
   1494 
   1495         /* padding left chroma */
   1496         ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
   1497     }
   1498     if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
   1499     {
   1500         /* padding right luma */
   1501         ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
   1502 
   1503         /* padding right chroma */
   1504         ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
   1505 
   1506         if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
   1507         {
   1508             UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
   1509             UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
   1510 
   1511             /* padding bottom luma */
   1512             ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
   1513 
   1514             /* padding bottom chroma */
   1515             ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
   1516         }
   1517     }
   1518 
   1519     if (i4_mb_y == 0)
   1520     {
   1521         UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
   1522         UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
   1523         WORD32 wd = MB_SIZE;
   1524 
   1525         if (i4_mb_x == 0)
   1526         {
   1527             pu1_rec_luma -= PAD_LEFT;
   1528             pu1_rec_chroma -= PAD_LEFT;
   1529 
   1530             wd += PAD_LEFT;
   1531         }
   1532         if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
   1533         {
   1534             wd += PAD_RIGHT;
   1535         }
   1536 
   1537         /* padding top luma */
   1538         ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
   1539 
   1540         /* padding top chroma */
   1541         ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
   1542     }
   1543 
   1544     return IH264E_SUCCESS;
   1545 }
   1546 
   1547 
   1548 
   1549 
   1550 /**
   1551 *******************************************************************************
   1552 *
   1553 * @brief This function performs deblocking, padding and halfpel generation for
   1554 *  'n' MBs
   1555 *
   1556 * @par Description:
   1557 *
   1558 * @param[in] ps_proc
   1559 *  Process context corresponding to the job
   1560 *
   1561 * @param[in] pu1_curr_pic_luma
   1562 * Current MB being processed(Luma)
   1563 *
   1564 * @param[in] pu1_curr_pic_chroma
   1565 * Current MB being processed(Chroma)
   1566 *
   1567 * @param[in] i4_mb_x
   1568 * Column value of current MB processed
   1569 *
   1570 * @param[in] i4_mb_y
   1571 * Curent row processed
   1572 *
   1573 * @returns  error status
   1574 *
   1575 * @remarks none
   1576 *
   1577 *******************************************************************************
   1578 */
   1579 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
   1580                                                      UWORD8 *pu1_curr_pic_luma,
   1581                                                      UWORD8 *pu1_curr_pic_chroma,
   1582                                                      WORD32 i4_mb_x,
   1583                                                      WORD32 i4_mb_y)
   1584 {
   1585     /* codec context */
   1586     codec_t *ps_codec = ps_proc->ps_codec;
   1587 
   1588     /* n_mb processing context */
   1589     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
   1590 
   1591     /* deblk context */
   1592     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
   1593 
   1594     /* strides */
   1595     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
   1596 
   1597     /* loop variables */
   1598     WORD32 row, i, j, col;
   1599 
   1600     /* Padding Width */
   1601     UWORD32 u4_pad_wd;
   1602 
   1603     /* deblk_map of the row being deblocked */
   1604     UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
   1605 
   1606     /* deblk_map_previous row */
   1607     UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
   1608 
   1609     WORD32 u4_pad_top = 0;
   1610 
   1611     WORD32 u4_deblk_prev_row = 0;
   1612 
   1613     /* Number of mbs to be processed */
   1614     WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
   1615 
   1616     /* Number of mbs  actually processed
   1617      * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
   1618     WORD32 i4_n_mb_process_count = 0;
   1619 
   1620     UWORD8 *pu1_pad_bottom_src = NULL;
   1621 
   1622     UWORD8 *pu1_pad_src_luma = NULL;
   1623     UWORD8 *pu1_pad_src_chroma = NULL;
   1624 
   1625     if (ps_proc->u4_disable_deblock_level == 1)
   1626     {
   1627         /* If left most MB is processed, then pad left */
   1628         if (i4_mb_x == 0)
   1629         {
   1630             /* padding left luma */
   1631             ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
   1632 
   1633             /* padding left chroma */
   1634             ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
   1635         }
   1636         /*last col*/
   1637         if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
   1638         {
   1639             /* padding right luma */
   1640             ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
   1641 
   1642             /* padding right chroma */
   1643             ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
   1644         }
   1645     }
   1646 
   1647     if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
   1648     {
   1649         /* if number of mb's to be processed are less than 'N', go back.
   1650          * exception to the above clause is end of row */
   1651         if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
   1652         {
   1653             return IH264E_SUCCESS;
   1654         }
   1655         else
   1656         {
   1657             i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
   1658 
   1659             /* performing deblocking for required number of MBs */
   1660             if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
   1661             {
   1662                 u4_deblk_prev_row = 1;
   1663 
   1664                 /* checking whether the top rows are deblocked */
   1665                 for (col = 0; col < i4_n_mb_process_count; col++)
   1666                 {
   1667                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
   1668                 }
   1669 
   1670                 /* checking whether the top right MB is deblocked */
   1671                 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
   1672                 {
   1673                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
   1674                 }
   1675 
   1676                 /* Top or Top right MBs not deblocked */
   1677                 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
   1678                 {
   1679                     return IH264E_SUCCESS;
   1680                 }
   1681 
   1682                 for (row = 0; row < i4_n_mb_process_count; row++)
   1683                 {
   1684                     ih264e_deblock_mb(ps_proc, ps_deblk);
   1685 
   1686                     pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
   1687 
   1688                     if (ps_deblk->i4_mb_y > 0)
   1689                     {
   1690                         if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
   1691                         {
   1692                             /* padding left luma */
   1693                             ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
   1694 
   1695                             /* padding left chroma */
   1696                             ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
   1697                         }
   1698 
   1699                         if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
   1700                         {
   1701                             /* padding right luma */
   1702                             ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
   1703 
   1704                             /* padding right chroma */
   1705                             ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
   1706                         }
   1707                     }
   1708                     ps_deblk->i4_mb_x++;
   1709 
   1710                     ps_deblk->pu1_cur_pic_luma += MB_SIZE;
   1711                     ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
   1712 
   1713                 }
   1714             }
   1715             else if(i4_mb_y > 0)
   1716             {
   1717                 ps_deblk->i4_mb_x += i4_n_mb_process_count;
   1718 
   1719                 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
   1720                 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
   1721             }
   1722 
   1723             if (i4_mb_y == 2)
   1724             {
   1725                 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
   1726                 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
   1727 
   1728                 if (ps_n_mb_ctxt->i4_mb_x == 0)
   1729                 {
   1730                     u4_pad_wd += PAD_LEFT;
   1731                     u4_pad_top = -PAD_LEFT;
   1732                 }
   1733 
   1734                 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
   1735                 {
   1736                     u4_pad_wd += PAD_RIGHT;
   1737                 }
   1738 
   1739                 /* padding top luma */
   1740                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
   1741 
   1742                 /* padding top chroma */
   1743                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
   1744             }
   1745 
   1746             ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
   1747 
   1748             if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
   1749             {
   1750                 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
   1751                 {
   1752                     /* Bottom Padding is done in one stretch for the entire width */
   1753                     if (ps_proc->u4_disable_deblock_level != 1)
   1754                     {
   1755                         ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
   1756 
   1757                         ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
   1758 
   1759                         ps_n_mb_ctxt->i4_mb_x = 0;
   1760                         ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
   1761                         ps_deblk->i4_mb_x = 0;
   1762                         ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
   1763 
   1764                         /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
   1765                         ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
   1766 
   1767                         i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
   1768 
   1769                         j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
   1770 
   1771                         for (i = 0; i < j; i++)
   1772                         {
   1773                             for (col = 0; col < i4_n_mbs; col++)
   1774                             {
   1775                                 ih264e_deblock_mb(ps_proc, ps_deblk);
   1776 
   1777                                 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
   1778 
   1779                                 ps_deblk->i4_mb_x++;
   1780                                 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
   1781                                 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
   1782                                 ps_n_mb_ctxt->i4_mb_x++;
   1783                             }
   1784                         }
   1785 
   1786                         for (col = 0; col < i4_n_mb_process_count; col++)
   1787                         {
   1788                             ih264e_deblock_mb(ps_proc, ps_deblk);
   1789 
   1790                             pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
   1791 
   1792                             ps_deblk->i4_mb_x++;
   1793                             ps_deblk->pu1_cur_pic_luma += MB_SIZE;
   1794                             ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
   1795                             ps_n_mb_ctxt->i4_mb_x++;
   1796                         }
   1797 
   1798                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
   1799 
   1800                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
   1801 
   1802                         /* padding left luma */
   1803                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
   1804 
   1805                         /* padding left chroma */
   1806                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
   1807 
   1808                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
   1809                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
   1810 
   1811                         /* padding left luma */
   1812                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
   1813 
   1814                         /* padding left chroma */
   1815                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
   1816 
   1817                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
   1818 
   1819                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
   1820 
   1821                         /* padding right luma */
   1822                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
   1823 
   1824                         /* padding right chroma */
   1825                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
   1826 
   1827                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
   1828                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
   1829 
   1830                         /* padding right luma */
   1831                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
   1832 
   1833                         /* padding right chroma */
   1834                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
   1835 
   1836                     }
   1837 
   1838                     /* In case height is less than 2 MBs pad top */
   1839                     if (ps_proc->i4_ht_mbs <= 2)
   1840                     {
   1841                         UWORD8 *pu1_pad_top_src;
   1842                         /* padding top luma */
   1843                         pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
   1844                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
   1845 
   1846                         /* padding top chroma */
   1847                         pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
   1848                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
   1849                     }
   1850 
   1851                     /* padding bottom luma */
   1852                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
   1853                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
   1854 
   1855                     /* padding bottom chroma */
   1856                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
   1857                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
   1858                 }
   1859             }
   1860         }
   1861     }
   1862 
   1863     return IH264E_SUCCESS;
   1864 }
   1865 
   1866 
   1867 /**
   1868 *******************************************************************************
   1869 *
   1870 * @brief This function performs luma & chroma core coding for a set of mb's.
   1871 *
   1872 * @par Description:
   1873 *  The mb to be coded is taken and is evaluated over a predefined set of modes
   1874 *  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
   1875 *  is selected and using intra/inter prediction filters, prediction is carried out.
   1876 *  The deviation between src and pred signal constitutes error signal. This error
   1877 *  signal is transformed (hierarchical transform if necessary) and quantized. The
   1878 *  quantized residue is packed in to entropy buffer for entropy coding. This is
   1879 *  repeated for all the mb's enlisted under the job.
   1880 *
   1881 * @param[in] ps_proc
   1882 *  Process context corresponding to the job
   1883 *
   1884 * @returns  error status
   1885 *
   1886 * @remarks none
   1887 *
   1888 *******************************************************************************
   1889 */
   1890 WORD32 ih264e_process(process_ctxt_t *ps_proc)
   1891 {
   1892     /* error status */
   1893     WORD32 error_status = IH264_SUCCESS;
   1894 
   1895     /* codec context */
   1896     codec_t *ps_codec = ps_proc->ps_codec;
   1897 
   1898     /* cbp luma, chroma */
   1899     UWORD32 u4_cbp_l, u4_cbp_c;
   1900 
   1901     /* width in mbs */
   1902     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
   1903 
   1904     /* loop var */
   1905     WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
   1906 
   1907     /* valid modes */
   1908     UWORD32 u4_valid_modes = 0;
   1909 
   1910     /* gate threshold */
   1911     WORD32 i4_gate_threshold = 0;
   1912 
   1913     /* is intra */
   1914     WORD32 luma_idx, chroma_idx, is_intra;
   1915 
   1916     /* temp variables */
   1917     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
   1918 
   1919     /*
   1920      * list of modes for evaluation
   1921      * -------------------------------------------------------------------------
   1922      * Note on enabling I4x4 and I16x16
   1923      * At very low QP's the hadamard transform in I16x16 will push up the maximum
   1924      * coeff value very high. CAVLC may not be able to represent the value and
   1925      * hence the stream may not be decodable in some clips.
   1926      * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
   1927      */
   1928     if (ps_proc->i4_slice_type == ISLICE)
   1929     {
   1930         if (ps_proc->u4_frame_qp > 10)
   1931         {
   1932             /* enable intra 16x16 */
   1933             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
   1934 
   1935             /* enable intra 8x8 */
   1936             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
   1937         }
   1938 
   1939         /* enable intra 4x4 */
   1940         u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
   1941         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
   1942 
   1943     }
   1944     else if (ps_proc->i4_slice_type == PSLICE)
   1945     {
   1946         if (ps_proc->u4_frame_qp > 10)
   1947         {
   1948             /* enable intra 16x16 */
   1949             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
   1950         }
   1951 
   1952         /* enable intra 4x4 */
   1953         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
   1954         {
   1955             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
   1956         }
   1957         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
   1958 
   1959         /* enable inter P16x16 */
   1960         u4_valid_modes |= (1 << P16x16);
   1961     }
   1962     else if (ps_proc->i4_slice_type == BSLICE)
   1963     {
   1964         if (ps_proc->u4_frame_qp > 10)
   1965         {
   1966             /* enable intra 16x16 */
   1967             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
   1968         }
   1969 
   1970         /* enable intra 4x4 */
   1971         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
   1972         {
   1973             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
   1974         }
   1975         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
   1976 
   1977         /* enable inter B16x16 */
   1978         u4_valid_modes |= (1 << B16x16);
   1979     }
   1980 
   1981 
   1982     /* init entropy */
   1983     ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
   1984     ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
   1985     ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
   1986 
   1987     /* compute recon when :
   1988      *   1. current frame is to be used as a reference
   1989      *   2. dump recon for bit stream sanity check
   1990      */
   1991     ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
   1992                                 ps_codec->s_cfg.u4_enable_recon;
   1993 
   1994     /* Encode 'n' macroblocks,
   1995      * 'n' being the number of mbs dictated by current proc ctxt */
   1996     for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
   1997     {
   1998         /* since we have not yet found sad, we have not yet got min sad */
   1999         /* we need to initialize these variables for each MB */
   2000         /* TODO how to get the min sad into the codec */
   2001         ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
   2002         ps_proc->u4_min_sad_reached = 0;
   2003 
   2004         /* mb analysis */
   2005         {
   2006             /* temp var */
   2007             WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
   2008 
   2009             /* force intra refresh ? */
   2010             WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
   2011                             (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) ||
   2012                             (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
   2013 
   2014             /* evaluate inter 16x16 modes */
   2015             if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
   2016             {
   2017                 /* compute nmb me */
   2018                 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
   2019                 {
   2020                     ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
   2021                                                        i4_wd_mbs - ps_proc->i4_mb_x));
   2022                 }
   2023 
   2024                 /* set pointers to ME data appropriately for other modules to use */
   2025                 {
   2026                     UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
   2027 
   2028                     /* get the min sad condition for current mb */
   2029                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
   2030                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
   2031 
   2032                     ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
   2033                     ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
   2034                     ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
   2035 
   2036                     ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
   2037                     ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
   2038                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
   2039                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
   2040                     ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
   2041 
   2042                     /* get the best sub pel buffer */
   2043                     ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
   2044                     ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
   2045                 }
   2046                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
   2047             }
   2048             else
   2049             {
   2050                 /* Derive neighbor availability for the current macroblock */
   2051                 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
   2052 
   2053                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
   2054             }
   2055 
   2056             /*
   2057              * If air says intra, we need to force the following code path to evaluate intra
   2058              * The easy way is just to say that the inter cost is too much
   2059              */
   2060             if (!i4_air_enable_inter)
   2061             {
   2062                 ps_proc->u4_min_sad_reached = 0;
   2063                 ps_proc->i4_mb_cost = INT_MAX;
   2064                 ps_proc->i4_mb_distortion = INT_MAX;
   2065             }
   2066             else if (ps_proc->u4_mb_type == PSKIP)
   2067             {
   2068                 goto UPDATE_MB_INFO;
   2069             }
   2070 
   2071             /* wait until the proc of [top + 1] mb is computed.
   2072              * We wait till the proc dependencies are satisfied */
   2073              if(ps_proc->i4_mb_y > 0)
   2074              {
   2075                 /* proc map */
   2076                 UWORD8  *pu1_proc_map_top;
   2077 
   2078                 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
   2079 
   2080                 while (1)
   2081                 {
   2082                     volatile UWORD8 *pu1_buf;
   2083                     WORD32 idx = i4_mb_idx + 1;
   2084 
   2085                     idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
   2086                     pu1_buf =  pu1_proc_map_top + idx;
   2087                     if(*pu1_buf)
   2088                         break;
   2089                     ithread_yield();
   2090                 }
   2091             }
   2092 
   2093             /* If we already have the minimum sad, there is no point in searching for sad again */
   2094             if (ps_proc->u4_min_sad_reached == 0)
   2095             {
   2096                 /* intra gating in inter slices */
   2097                 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
   2098                 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
   2099                 {
   2100                     /* distortion of neighboring blocks */
   2101                     WORD32 i4_distortion[4];
   2102 
   2103                     i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
   2104 
   2105                     i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
   2106 
   2107                     i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
   2108 
   2109                     i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
   2110 
   2111                     i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
   2112 
   2113                 }
   2114 
   2115 
   2116                 /* If we are going to force intra we need to evaluate intra irrespective of gating */
   2117                 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
   2118                 {
   2119                     /* evaluate intra 4x4 modes */
   2120                     if (u4_valid_modes & (1 << I4x4))
   2121                     {
   2122                         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
   2123                         {
   2124                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
   2125                         }
   2126                         else
   2127                         {
   2128                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
   2129                         }
   2130                     }
   2131 
   2132                     /* evaluate intra 16x16 modes */
   2133                     if (u4_valid_modes & (1 << I16x16))
   2134                     {
   2135                         ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
   2136                     }
   2137 
   2138                     /* evaluate intra 8x8 modes */
   2139                     if (u4_valid_modes & (1 << I8x8))
   2140                     {
   2141                         ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
   2142                     }
   2143 
   2144                 }
   2145         }
   2146      }
   2147 
   2148         /* is intra */
   2149         if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
   2150         {
   2151             luma_idx = ps_proc->u4_mb_type;
   2152             chroma_idx = 0;
   2153             is_intra = 1;
   2154 
   2155             /* evaluate chroma blocks for intra */
   2156             ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
   2157         }
   2158         else
   2159         {
   2160             luma_idx = 3;
   2161             chroma_idx = 1;
   2162             is_intra = 0;
   2163         }
   2164         ps_proc->u4_is_intra = is_intra;
   2165         ps_proc->ps_pu->b1_intra_flag = is_intra;
   2166 
   2167         /* redo MV pred of neighbors in the case intra mb */
   2168         /* TODO : currently called unconditionally, needs to be called only in the case of intra
   2169          * to modify neighbors */
   2170         if (ps_proc->i4_slice_type != ISLICE)
   2171         {
   2172             ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
   2173         }
   2174 
   2175         /* Perform luma mb core coding */
   2176         u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
   2177 
   2178         /* Perform luma mb core coding */
   2179         u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
   2180 
   2181         /* coded block pattern */
   2182         ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
   2183 
   2184         if (!ps_proc->u4_is_intra)
   2185         {
   2186             if (ps_proc->i4_slice_type == BSLICE)
   2187             {
   2188                 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
   2189                 {
   2190                     ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
   2191                 }
   2192             }
   2193             else if(!ps_proc->u4_cbp)
   2194             {
   2195                 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
   2196                 {
   2197                     ps_proc->u4_mb_type = PSKIP;
   2198                 }
   2199             }
   2200         }
   2201 
   2202 UPDATE_MB_INFO:
   2203 
   2204         /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
   2205         ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
   2206 
   2207         /**********************************************************************/
   2208         /* if disable deblock level is '0' this implies enable deblocking for */
   2209         /* all edges of all macroblocks with out any restrictions             */
   2210         /*                                                                    */
   2211         /* if disable deblock level is '1' this implies disable deblocking for*/
   2212         /* all edges of all macroblocks with out any restrictions             */
   2213         /*                                                                    */
   2214         /* if disable deblock level is '2' this implies enable deblocking for */
   2215         /* all edges of all macroblocks except edges overlapping with slice   */
   2216         /* boundaries. This option is not currently supported by the encoder  */
   2217         /* hence the slice map should be of no significance to perform debloc */
   2218         /* king                                                               */
   2219         /**********************************************************************/
   2220 
   2221         if (ps_proc->u4_compute_recon)
   2222         {
   2223             /* deblk context */
   2224             /* src pointers */
   2225             UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
   2226             UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
   2227 
   2228             /* src indices */
   2229             UWORD32 i4_mb_x = ps_proc->i4_mb_x;
   2230             UWORD32 i4_mb_y = ps_proc->i4_mb_y;
   2231 
   2232             /* compute blocking strength */
   2233             if (ps_proc->u4_disable_deblock_level != 1)
   2234             {
   2235                 ih264e_compute_bs(ps_proc);
   2236             }
   2237 
   2238             /* nmb deblocking and hpel and padding */
   2239             ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
   2240                                                   pu1_cur_pic_chroma, i4_mb_x,
   2241                                                   i4_mb_y);
   2242         }
   2243 
   2244         /* update the context after for coding next mb */
   2245         error_status |= ih264e_update_proc_ctxt(ps_proc);
   2246 
   2247         /* Once the last row is processed, mark the buffer status appropriately */
   2248         if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
   2249         {
   2250             /* Pointer to current picture buffer structure */
   2251             pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
   2252 
   2253             /* Pointer to current picture's mv buffer structure */
   2254             mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
   2255 
   2256             /**********************************************************************/
   2257             /* if disable deblock level is '0' this implies enable deblocking for */
   2258             /* all edges of all macroblocks with out any restrictions             */
   2259             /*                                                                    */
   2260             /* if disable deblock level is '1' this implies disable deblocking for*/
   2261             /* all edges of all macroblocks with out any restrictions             */
   2262             /*                                                                    */
   2263             /* if disable deblock level is '2' this implies enable deblocking for */
   2264             /* all edges of all macroblocks except edges overlapping with slice   */
   2265             /* boundaries. This option is not currently supported by the encoder  */
   2266             /* hence the slice map should be of no significance to perform debloc */
   2267             /* king                                                               */
   2268             /**********************************************************************/
   2269             error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
   2270 
   2271             error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
   2272 
   2273             if (ps_codec->s_cfg.u4_enable_recon)
   2274             {
   2275                 /* pic cnt */
   2276                 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
   2277 
   2278                 /* rec buffers */
   2279                 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
   2280 
   2281                 /* is last? */
   2282                 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
   2283 
   2284                 /* frame time stamp */
   2285                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
   2286                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
   2287             }
   2288 
   2289         }
   2290     }
   2291 
   2292     DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
   2293 
   2294     return error_status;
   2295 }
   2296 
   2297 /**
   2298 *******************************************************************************
   2299 *
   2300 * @brief
   2301 *  Function to update rc context after encoding
   2302 *
   2303 * @par   Description
   2304 *  This function updates the rate control context after the frame is encoded.
   2305 *  Number of bits consumed by the current frame, frame distortion, frame cost,
   2306 *  number of intra/inter mb's, ... are passed on to rate control context for
   2307 *  updating the rc model.
   2308 *
   2309 * @param[in] ps_codec
   2310 *  Handle to codec context
   2311 *
   2312 * @param[in] ctxt_sel
   2313 *  frame context selector
   2314 *
   2315 * @param[in] pic_cnt
   2316 *  pic count
   2317 *
   2318 * @returns i4_stuffing_byte
   2319 *  number of stuffing bytes (if necessary)
   2320 *
   2321 * @remarks
   2322 *
   2323 *******************************************************************************
   2324 */
   2325 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
   2326 {
   2327     /* proc set base idx */
   2328     WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
   2329 
   2330     /* proc ctxt */
   2331     process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
   2332 
   2333     /* frame qp */
   2334     UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
   2335 
   2336     /* cbr rc return status */
   2337     WORD32 i4_stuffing_byte = 0;
   2338 
   2339     /* current frame stats */
   2340     frame_info_t s_frame_info;
   2341     picture_type_e rc_pic_type;
   2342 
   2343     /* temp var */
   2344     WORD32 i, j;
   2345 
   2346     /********************************************************************/
   2347     /*                            BEGIN INIT                            */
   2348     /********************************************************************/
   2349 
   2350     /* init frame info */
   2351     irc_init_frame_info(&s_frame_info);
   2352 
   2353     /* get frame info */
   2354     for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
   2355     {
   2356         /*****************************************************************/
   2357         /* One frame can be encoded by max of u4_num_cores threads       */
   2358         /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
   2359         /* u4_num_cores threads                                          */
   2360         /*****************************************************************/
   2361         for (j = 0; j< MAX_MB_TYPE; j++)
   2362         {
   2363             s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
   2364 
   2365             s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
   2366 
   2367             s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
   2368         }
   2369 
   2370         s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
   2371 
   2372         s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
   2373 
   2374         /*****************************************************************/
   2375         /* gather number of residue and header bits consumed by the frame*/
   2376         /*****************************************************************/
   2377         ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
   2378     }
   2379 
   2380     /* get pic type */
   2381     switch (ps_codec->pic_type)
   2382     {
   2383         case PIC_I:
   2384         case PIC_IDR:
   2385             rc_pic_type = I_PIC;
   2386             break;
   2387         case PIC_P:
   2388             rc_pic_type = P_PIC;
   2389             break;
   2390         case PIC_B:
   2391             rc_pic_type = B_PIC;
   2392             break;
   2393         default:
   2394             assert(0);
   2395             break;
   2396     }
   2397 
   2398     /* update rc lib with current frame stats */
   2399     i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
   2400                                           &(s_frame_info),
   2401                                           ps_codec->s_rate_control.pps_pd_frm_rate,
   2402                                           ps_codec->s_rate_control.pps_time_stamp,
   2403                                           ps_codec->s_rate_control.pps_frame_time,
   2404                                           (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
   2405                                           &rc_pic_type,
   2406                                           i4_is_first_frm,
   2407                                           &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
   2408                                           u1_frame_qp,
   2409                                           &ps_codec->s_rate_control.num_intra_in_prev_frame,
   2410                                           &ps_codec->s_rate_control.i4_avg_activity);
   2411     return i4_stuffing_byte;
   2412 }
   2413 
   2414 /**
   2415 *******************************************************************************
   2416 *
   2417 * @brief
   2418 *  entry point of a spawned encoder thread
   2419 *
   2420 * @par Description:
   2421 *  The encoder thread dequeues a proc/entropy job from the encoder queue and
   2422 *  calls necessary routines.
   2423 *
   2424 * @param[in] pv_proc
   2425 *  Process context corresponding to the thread
   2426 *
   2427 * @returns  error status
   2428 *
   2429 * @remarks
   2430 *
   2431 *******************************************************************************
   2432 */
   2433 WORD32 ih264e_process_thread(void *pv_proc)
   2434 {
   2435     /* error status */
   2436     IH264_ERROR_T ret = IH264_SUCCESS;
   2437     WORD32 error_status = IH264_SUCCESS;
   2438 
   2439     /* proc ctxt */
   2440     process_ctxt_t *ps_proc = pv_proc;
   2441 
   2442     /* codec ctxt */
   2443     codec_t *ps_codec = ps_proc->ps_codec;
   2444 
   2445     /* structure to represent a processing job entry */
   2446     job_t s_job;
   2447 
   2448     /* blocking call : entropy dequeue is non-blocking till all
   2449      * the proc jobs are processed */
   2450     WORD32 is_blocking = 0;
   2451 
   2452     /* set affinity */
   2453     ithread_set_affinity(ps_proc->i4_id);
   2454 
   2455     while(1)
   2456     {
   2457         /* dequeue a job from the entropy queue */
   2458         {
   2459             int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
   2460 
   2461             /* codec context selector */
   2462             WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
   2463 
   2464             volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
   2465 
   2466             /* have the lock */
   2467             if (error == 0)
   2468             {
   2469                 if (*pu4_buf == 0)
   2470                 {
   2471                     /* no entropy threads are active, try dequeuing a job from the entropy queue */
   2472                     ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
   2473                     if (IH264_SUCCESS == ret)
   2474                     {
   2475                         *pu4_buf = 1;
   2476                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
   2477                         goto WORKER;
   2478                     }
   2479                     else if(is_blocking)
   2480                     {
   2481                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
   2482                         break;
   2483                     }
   2484                 }
   2485                 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
   2486             }
   2487         }
   2488 
   2489         /* dequeue a job from the process queue */
   2490         ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
   2491         if (IH264_SUCCESS != ret)
   2492         {
   2493             if(ps_proc->i4_id)
   2494                 break;
   2495             else
   2496             {
   2497                 is_blocking = 1;
   2498                 continue;
   2499             }
   2500         }
   2501 
   2502 WORKER:
   2503         /* choose appropriate proc context based on proc_base_idx */
   2504         ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
   2505 
   2506         switch (s_job.i4_cmd)
   2507         {
   2508             case CMD_PROCESS:
   2509                 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
   2510                 ps_proc->i4_mb_x = s_job.i2_mb_x;
   2511                 ps_proc->i4_mb_y = s_job.i2_mb_y;
   2512 
   2513                 /* init process context */
   2514                 ih264e_init_proc_ctxt(ps_proc);
   2515 
   2516                 /* core code all mbs enlisted under the current job */
   2517                 error_status |= ih264e_process(ps_proc);
   2518                 break;
   2519 
   2520             case CMD_ENTROPY:
   2521                 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
   2522                 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
   2523                 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
   2524 
   2525                 /* init entropy */
   2526                 ih264e_init_entropy_ctxt(ps_proc);
   2527 
   2528                 /* entropy code all mbs enlisted under the current job */
   2529                 error_status |= ih264e_entropy(ps_proc);
   2530                 break;
   2531 
   2532             default:
   2533                 error_status |= IH264_FAIL;
   2534                 break;
   2535         }
   2536     }
   2537 
   2538     /* send error code */
   2539     ps_proc->i4_error_code = error_status;
   2540     return ret;
   2541 }
   2542