Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 
     21 /**
     22 *******************************************************************************
     23 * @file
     24 *  ih264e_process.c
     25 *
     26 * @brief
     27 *  Contains functions for codec thread
     28 *
     29 * @author
     30 *  Harish
     31 *
     32 * @par List of Functions:
     33 * - ih264e_generate_sps_pps()
     34 * - ih264e_init_entropy_ctxt()
     35 * - ih264e_entropy()
     36 * - ih264e_pack_header_data()
     37 * - ih264e_update_proc_ctxt()
     38 * - ih264e_init_proc_ctxt()
     39 * - ih264e_pad_recon_buffer()
     40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
     41 * - ih264e_process()
     42 * - ih264e_set_rc_pic_params()
     43 * - ih264e_update_rc_post_enc()
     44 * - ih264e_process_thread()
     45 *
     46 * @remarks
     47 *  None
     48 *
     49 *******************************************************************************
     50 */
     51 
     52 /*****************************************************************************/
     53 /* File Includes                                                             */
     54 /*****************************************************************************/
     55 
     56 /* System include files */
     57 #include <stdio.h>
     58 #include <stddef.h>
     59 #include <stdlib.h>
     60 #include <string.h>
     61 #include <limits.h>
     62 #include <assert.h>
     63 
     64 /* User include files */
     65 #include "ih264_typedefs.h"
     66 #include "iv2.h"
     67 #include "ive2.h"
     68 #include "ih264_defs.h"
     69 #include "ih264_debug.h"
     70 #include "ime_distortion_metrics.h"
     71 #include "ime_defs.h"
     72 #include "ime_structs.h"
     73 #include "ih264_error.h"
     74 #include "ih264_structs.h"
     75 #include "ih264_trans_quant_itrans_iquant.h"
     76 #include "ih264_inter_pred_filters.h"
     77 #include "ih264_mem_fns.h"
     78 #include "ih264_padding.h"
     79 #include "ih264_intra_pred_filters.h"
     80 #include "ih264_deblk_edge_filters.h"
     81 #include "ih264_cabac_tables.h"
     82 #include "ih264_platform_macros.h"
     83 #include "ih264_macros.h"
     84 #include "ih264_buf_mgr.h"
     85 #include "ih264e_error.h"
     86 #include "ih264e_bitstream.h"
     87 #include "ih264_common_tables.h"
     88 #include "ih264_list.h"
     89 #include "ih264e_defs.h"
     90 #include "irc_cntrl_param.h"
     91 #include "irc_frame_info_collector.h"
     92 #include "ih264e_rate_control.h"
     93 #include "ih264e_cabac_structs.h"
     94 #include "ih264e_structs.h"
     95 #include "ih264e_cabac.h"
     96 #include "ih264e_process.h"
     97 #include "ithread.h"
     98 #include "ih264e_intra_modes_eval.h"
     99 #include "ih264e_encode_header.h"
    100 #include "ih264e_globals.h"
    101 #include "ih264e_config.h"
    102 #include "ih264e_trace.h"
    103 #include "ih264e_statistics.h"
    104 #include "ih264_cavlc_tables.h"
    105 #include "ih264e_cavlc.h"
    106 #include "ih264e_deblk.h"
    107 #include "ih264e_me.h"
    108 #include "ih264e_debug.h"
    109 #include "ih264e_master.h"
    110 #include "ih264e_utils.h"
    111 #include "irc_mem_req_and_acq.h"
    112 #include "irc_rate_control_api.h"
    113 #include "ih264e_platform_macros.h"
    114 #include "ime_statistics.h"
    115 
    116 
    117 /*****************************************************************************/
    118 /* Function Definitions                                                      */
    119 /*****************************************************************************/
    120 
    121 /**
    122 ******************************************************************************
    123 *
    124 *  @brief This function generates sps, pps set on request
    125 *
    126 *  @par   Description
    127 *  When the encoder is set in header generation mode, the following function
    128 *  is called. This generates sps and pps headers and returns the control back
    129 *  to caller.
    130 *
    131 *  @param[in]    ps_codec
    132 *  pointer to codec context
    133 *
    134 *  @return      success or failure error code
    135 *
    136 ******************************************************************************
    137 */
    138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
    139 {
    140     /* choose between ping-pong process buffer set */
    141     WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
    142 
    143     /* entropy ctxt */
    144     entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
    145 
    146     /* Bitstream structure */
    147     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
    148 
    149     /* sps */
    150     sps_t *ps_sps = NULL;
    151 
    152     /* pps */
    153     pps_t *ps_pps = NULL;
    154 
    155     /* output buff */
    156     out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
    157 
    158 
    159     /********************************************************************/
    160     /*      initialize the bit stream buffer                            */
    161     /********************************************************************/
    162     ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
    163 
    164     /********************************************************************/
    165     /*                    BEGIN HEADER GENERATION                       */
    166     /********************************************************************/
    167     /*ps_codec->i4_pps_id ++;*/
    168     ps_codec->i4_pps_id %= MAX_PPS_CNT;
    169 
    170     /*ps_codec->i4_sps_id ++;*/
    171     ps_codec->i4_sps_id %= MAX_SPS_CNT;
    172 
    173     /* populate sps header */
    174     ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
    175     ih264e_populate_sps(ps_codec, ps_sps);
    176 
    177     /* populate pps header */
    178     ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
    179     ih264e_populate_pps(ps_codec, ps_pps);
    180 
    181     ps_entropy->i4_error_code = IH264E_SUCCESS;
    182 
    183     /* generate sps */
    184     ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
    185                                                      &ps_codec->s_cfg.s_vui);
    186 
    187     /* generate pps */
    188     ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
    189 
    190     /* queue output buffer */
    191     ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
    192 
    193     return ps_entropy->i4_error_code;
    194 }
    195 
    196 /**
    197 *******************************************************************************
    198 *
    199 * @brief   initialize entropy context.
    200 *
    201 * @par Description:
    202 *  Before invoking the call to perform to entropy coding the entropy context
    203 *  associated with the job needs to be initialized. This involves the start
    204 *  mb address, end mb address, slice index and the pointer to location at
    205 *  which the mb residue info and mb header info are packed.
    206 *
    207 * @param[in] ps_proc
    208 *  Pointer to the current process context
    209 *
    210 * @returns error status
    211 *
    212 * @remarks none
    213 *
    214 *******************************************************************************
    215 */
    216 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
    217 {
    218     /* codec context */
    219     codec_t *ps_codec = ps_proc->ps_codec;
    220 
    221     /* entropy ctxt */
    222     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
    223 
    224     /* start address */
    225     ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
    226 
    227     /* end address */
    228     ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
    229 
    230     /* slice index */
    231     ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
    232 
    233     /* sof */
    234     /* @ start of frame or start of a new slice, set sof flag */
    235     if (ps_entropy->i4_mb_start_add == 0)
    236     {
    237         ps_entropy->i4_sof = 1;
    238     }
    239 
    240     if (ps_entropy->i4_mb_x == 0)
    241     {
    242         /* packed mb coeff data */
    243         ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
    244                         ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
    245 
    246         /* packed mb header data */
    247         ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
    248                         ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
    249     }
    250 
    251     return IH264E_SUCCESS;
    252 }
    253 
    254 /**
    255 *******************************************************************************
    256 *
    257 * @brief entry point for entropy coding
    258 *
    259 * @par Description
    260 *  This function calls lower level functions to perform entropy coding for a
    261 *  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
    262 *  back the control, updates the ctxt and calls lower level functions again.
    263 *  This process is repeated till all the rows or group of mb's (which ever is
    264 *  minimum) are coded
    265 *
    266 * @param[in] ps_proc
    267 *  process context
    268 *
    269 * @returns  error status
    270 *
    271 * @remarks
    272 *
    273 *******************************************************************************
    274 */
    275 
    276 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
    277 {
    278     /* codec context */
    279     codec_t *ps_codec = ps_proc->ps_codec;
    280 
    281     /* entropy context */
    282     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
    283 
    284     /* cabac context */
    285     cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
    286 
    287     /* sps */
    288     sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
    289 
    290     /* pps */
    291     pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
    292 
    293     /* slice header */
    294     slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
    295 
    296     /* slice type */
    297     WORD32 i4_slice_type = ps_proc->i4_slice_type;
    298 
    299     /* Bitstream structure */
    300     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
    301 
    302     /* output buff */
    303     out_buf_t s_out_buf;
    304 
    305     /* proc map */
    306     UWORD8  *pu1_proc_map;
    307 
    308     /* entropy map */
    309     UWORD8  *pu1_entropy_map_curr;
    310 
    311     /* proc base idx */
    312     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
    313 
    314     /* temp var */
    315     WORD32 i4_wd_mbs, i4_ht_mbs;
    316     UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
    317     WORD32 bitstream_start_offset, bitstream_end_offset;
    318     /********************************************************************/
    319     /*                            BEGIN INIT                            */
    320     /********************************************************************/
    321 
    322     /* entropy encode start address */
    323     u4_mb_idx = ps_entropy->i4_mb_start_add;
    324 
    325     /* entropy encode end address */
    326     u4_mb_end_idx = ps_entropy->i4_mb_end_add;
    327 
    328     /* width in mbs */
    329     i4_wd_mbs = ps_entropy->i4_wd_mbs;
    330 
    331     /* height in mbs */
    332     i4_ht_mbs = ps_entropy->i4_ht_mbs;
    333 
    334     /* total mb cnt */
    335     u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
    336 
    337     /* proc map */
    338     pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
    339 
    340     /* entropy map */
    341     pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
    342 
    343     /********************************************************************/
    344     /* @ start of frame / slice,                                        */
    345     /*      initialize the output buffer,                               */
    346     /*      initialize the bit stream buffer,                           */
    347     /*      check if sps and pps headers have to be generated,          */
    348     /*      populate and generate slice header                          */
    349     /********************************************************************/
    350     if (ps_entropy->i4_sof)
    351     {
    352         /********************************************************************/
    353         /*      initialize the output buffer                                */
    354         /********************************************************************/
    355         s_out_buf = ps_codec->as_out_buf[ctxt_sel];
    356 
    357         /* is last frame to encode */
    358         s_out_buf.u4_is_last = ps_entropy->u4_is_last;
    359 
    360         /* frame idx */
    361         s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
    362         s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
    363 
    364         /********************************************************************/
    365         /*      initialize the bit stream buffer                            */
    366         /********************************************************************/
    367         ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
    368 
    369         /********************************************************************/
    370         /*                    BEGIN HEADER GENERATION                       */
    371         /********************************************************************/
    372         if (1 == ps_entropy->i4_gen_header)
    373         {
    374             /* generate sps */
    375             ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
    376                                                              &ps_codec->s_cfg.s_vui);
    377             /* generate pps */
    378             ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
    379 
    380             /* reset i4_gen_header */
    381             ps_entropy->i4_gen_header = 0;
    382         }
    383 
    384         /* populate slice header */
    385         ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
    386 
    387         /* generate slice header */
    388         ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
    389                                                                   ps_pps, ps_sps);
    390 
    391         /* once start of frame / slice is done, you can reset it */
    392         /* it is the responsibility of the caller to set this flag */
    393         ps_entropy->i4_sof = 0;
    394 
    395         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
    396         {
    397             BITSTREAM_BYTE_ALIGN(ps_bitstrm);
    398             BITSTREAM_FLUSH(ps_bitstrm);
    399             ih264e_init_cabac_ctxt(ps_entropy);
    400         }
    401     }
    402 
    403     /* begin entropy coding for the mb set */
    404     while (u4_mb_idx < u4_mb_end_idx)
    405     {
    406         /* init ptrs/indices */
    407         if (ps_entropy->i4_mb_x == i4_wd_mbs)
    408         {
    409             ps_entropy->i4_mb_y++;
    410             ps_entropy->i4_mb_x = 0;
    411 
    412             /* packed mb coeff data */
    413             ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
    414                             ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
    415 
    416             /* packed mb header data */
    417             ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
    418                             ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
    419 
    420             /* proc map */
    421             pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
    422 
    423             /* entropy map */
    424             pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
    425         }
    426 
    427         DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
    428         ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
    429         ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
    430 
    431         /* wait until the curr mb is core coded */
    432         /* The wait for curr mb to be core coded is essential when entropy is launched
    433          * as a separate job
    434          */
    435         while (1)
    436         {
    437             volatile UWORD8 *pu1_buf1;
    438             WORD32 idx = ps_entropy->i4_mb_x;
    439 
    440             pu1_buf1 = pu1_proc_map + idx;
    441             if (*pu1_buf1)
    442                 break;
    443             ithread_yield();
    444         }
    445 
    446 
    447         /* write mb layer */
    448         ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
    449         /* Starting bitstream offset for header in bits */
    450         bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
    451 
    452         /* set entropy map */
    453         pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
    454 
    455         u4_mb_idx++;
    456         ps_entropy->i4_mb_x++;
    457         /* check for eof */
    458         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
    459         {
    460             if (ps_entropy->i4_mb_x < i4_wd_mbs)
    461             {
    462                 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
    463             }
    464         }
    465 
    466         if (ps_entropy->i4_mb_x == i4_wd_mbs)
    467         {
    468             /* if slices are enabled */
    469             if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
    470             {
    471                 /* current slice index */
    472                 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
    473 
    474                 /* slice map */
    475                 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
    476 
    477                 /* No need to open a slice at end of frame. The current slice can be closed at the time
    478                  * of signaling eof flag.
    479                  */
    480                 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
    481                                                 != pu1_slice_idx[u4_mb_idx]))
    482                 {
    483                     if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
    484                     { /* mb skip run */
    485                         if ((i4_slice_type != ISLICE)
    486                                         && *ps_entropy->pi4_mb_skip_run)
    487                         {
    488                             if (*ps_entropy->pi4_mb_skip_run)
    489                             {
    490                             PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
    491                                 *ps_entropy->pi4_mb_skip_run = 0;
    492                             }
    493                         }
    494                         /* put rbsp trailing bits for the previous slice */
    495                                  ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
    496                     }
    497                     else
    498                     {
    499                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
    500                     }
    501 
    502                     /* update slice header pointer */
    503                     i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
    504                     ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
    505                     ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
    506 
    507                     /* populate slice header */
    508                     ps_entropy->i4_mb_start_add = u4_mb_idx;
    509                     ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
    510                                                  ps_sps);
    511 
    512                     /* generate slice header */
    513                     ps_entropy->i4_error_code |= ih264e_generate_slice_header(
    514                                     ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
    515                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
    516                     {
    517                         BITSTREAM_BYTE_ALIGN(ps_bitstrm);
    518                         BITSTREAM_FLUSH(ps_bitstrm);
    519                         ih264e_init_cabac_ctxt(ps_entropy);
    520                     }
    521                 }
    522                 else
    523                 {
    524                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
    525                                     && u4_mb_idx != u4_mb_cnt)
    526                     {
    527                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
    528                     }
    529                 }
    530             }
    531             /* Dont execute any further instructions until store synchronization took place */
    532             DATA_SYNC();
    533         }
    534 
    535         /* Ending bitstream offset for header in bits */
    536         bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
    537         ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
    538                         bitstream_end_offset - bitstream_start_offset;
    539     }
    540 
    541     /* check for eof */
    542     if (u4_mb_idx == u4_mb_cnt)
    543     {
    544         /* set end of frame flag */
    545         ps_entropy->i4_eof = 1;
    546     }
    547     else
    548     {
    549         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
    550                         && ps_codec->s_cfg.e_slice_mode
    551                                         != IVE_SLICE_MODE_BLOCKS)
    552         {
    553             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
    554         }
    555     }
    556 
    557     if (ps_entropy->i4_eof)
    558     {
    559         if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
    560         {
    561             /* mb skip run */
    562             if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
    563             {
    564                 if (*ps_entropy->pi4_mb_skip_run)
    565                 {
    566                     PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
    567                                  ps_entropy->i4_error_code, "mb skip run");
    568                     *ps_entropy->pi4_mb_skip_run = 0;
    569                 }
    570             }
    571             /* put rbsp trailing bits */
    572              ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
    573         }
    574         else
    575         {
    576             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
    577         }
    578 
    579         /* update current frame stats to rc library */
    580         {
    581             /* number of bytes to stuff */
    582             WORD32 i4_stuff_bytes;
    583 
    584             /* update */
    585             i4_stuff_bytes = ih264e_update_rc_post_enc(
    586                             ps_codec, ctxt_sel,
    587                             (ps_proc->ps_codec->i4_poc == 0));
    588 
    589             /* cbr rc - house keeping */
    590             if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
    591             {
    592                 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
    593             }
    594             else if (i4_stuff_bytes)
    595             {
    596                 /* add filler nal units */
    597                 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
    598             }
    599         }
    600 
    601         /*
    602          *Frame number is to be incremented only if the current frame is a
    603          * reference frame. After each successful frame encode, we increment
    604          * frame number by 1
    605          */
    606         if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
    607                         && ps_codec->u4_is_curr_frm_ref)
    608         {
    609             ps_codec->i4_frame_num++;
    610         }
    611         /********************************************************************/
    612         /*      signal the output                                           */
    613         /********************************************************************/
    614         ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
    615                         ps_entropy->ps_bitstrm->u4_strm_buf_offset;
    616 
    617         DEBUG("entropy status %x", ps_entropy->i4_error_code);
    618     }
    619 
    620     /* allow threads to dequeue entropy jobs */
    621     ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
    622 
    623     return ps_entropy->i4_error_code;
    624 }
    625 
    626 /**
    627 *******************************************************************************
    628 *
    629 * @brief Packs header information of a mb in to a buffer
    630 *
    631 * @par Description:
    632 *  After the deciding the mode info of a macroblock, the syntax elements
    633 *  associated with the mb are packed and stored. The entropy thread unpacks
    634 *  this buffer and generates the end bit stream.
    635 *
    636 * @param[in] ps_proc
    637 *  Pointer to the current process context
    638 *
    639 * @returns error status
    640 *
    641 * @remarks none
    642 *
    643 *******************************************************************************
    644 */
    645 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
    646 {
    647     /* curr mb type */
    648     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
    649 
    650     /* pack mb syntax layer of curr mb (used for entropy coding) */
    651     if (u4_mb_type == I4x4)
    652     {
    653         /* pointer to mb header storage space */
    654         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    655         mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data;
    656 
    657         /* temp var */
    658         WORD32 i4, byte;
    659 
    660         /* mb type plus mode */
    661         ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
    662 
    663         /* cbp */
    664         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
    665 
    666         /* mb qp delta */
    667         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    668 
    669         /* sub mb modes */
    670         for (i4 = 0; i4 < 16; i4 ++)
    671         {
    672             byte = 0;
    673 
    674             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
    675                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
    676             {
    677                 byte |= 1;
    678             }
    679             else
    680             {
    681 
    682                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
    683                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
    684                 {
    685                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
    686                 }
    687                 else
    688                 {
    689                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
    690                 }
    691             }
    692 
    693             i4++;
    694 
    695             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
    696                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
    697             {
    698                 byte |= 16;
    699             }
    700             else
    701             {
    702 
    703                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
    704                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
    705                 {
    706                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
    707                 }
    708                 else
    709                 {
    710                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
    711                 }
    712             }
    713 
    714             ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] =  byte;
    715         }
    716 
    717         /* end of mb layer */
    718         pu1_ptr += sizeof(mb_hdr_i4x4_t);
    719         ps_proc->pv_mb_header_data = pu1_ptr;
    720     }
    721     else if (u4_mb_type == I16x16)
    722     {
    723         /* pointer to mb header storage space */
    724         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    725         mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data;
    726 
    727         /* mb type plus mode */
    728         ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
    729 
    730         /* cbp */
    731         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
    732 
    733         /* mb qp delta */
    734         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    735 
    736         /* end of mb layer */
    737         pu1_ptr += sizeof(mb_hdr_i16x16_t);
    738         ps_proc->pv_mb_header_data = pu1_ptr;
    739     }
    740     else if (u4_mb_type == P16x16)
    741     {
    742         /* pointer to mb header storage space */
    743         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    744         mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data;
    745 
    746         /* mb type */
    747         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
    748 
    749         /* cbp */
    750         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
    751 
    752         /* mb qp delta */
    753         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    754 
    755         ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
    756 
    757         ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
    758 
    759         /* end of mb layer */
    760         pu1_ptr += sizeof(mb_hdr_p16x16_t);
    761         ps_proc->pv_mb_header_data = pu1_ptr;
    762     }
    763     else if (u4_mb_type == PSKIP)
    764     {
    765         /* pointer to mb header storage space */
    766         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    767         mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data;
    768 
    769         /* mb type */
    770         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
    771 
    772         /* end of mb layer */
    773         pu1_ptr += sizeof(mb_hdr_pskip_t);
    774         ps_proc->pv_mb_header_data = pu1_ptr;
    775     }
    776     else if(u4_mb_type == B16x16)
    777     {
    778 
    779         /* pointer to mb header storage space */
    780         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    781         mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data;
    782 
    783         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
    784 
    785         /* mb type plus mode */
    786         ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
    787 
    788         /* cbp */
    789         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
    790 
    791         /* mb qp delta */
    792         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    793 
    794         /* l0 & l1 me data */
    795         if (u4_pred_mode != PRED_L1)
    796         {
    797             ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
    798                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
    799 
    800             ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
    801                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
    802         }
    803         if (u4_pred_mode != PRED_L0)
    804         {
    805             ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
    806                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
    807 
    808             ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
    809                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
    810         }
    811 
    812         /* end of mb layer */
    813         pu1_ptr += sizeof(mb_hdr_b16x16_t);
    814         ps_proc->pv_mb_header_data = pu1_ptr;
    815 
    816     }
    817     else if(u4_mb_type == BDIRECT)
    818     {
    819         /* pointer to mb header storage space */
    820         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    821         mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data;
    822 
    823         /* mb type plus mode */
    824         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
    825 
    826         /* cbp */
    827         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
    828 
    829         /* mb qp delta */
    830         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
    831 
    832         /* end of mb layer */
    833         pu1_ptr += sizeof(mb_hdr_bdirect_t);
    834         ps_proc->pv_mb_header_data = pu1_ptr;
    835 
    836     }
    837     else if(u4_mb_type == BSKIP)
    838     {
    839         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
    840 
    841         /* pointer to mb header storage space */
    842         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
    843         mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data;
    844 
    845         /* mb type plus mode */
    846         ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
    847 
    848         /* end of mb layer */
    849         pu1_ptr += sizeof(mb_hdr_bskip_t);
    850         ps_proc->pv_mb_header_data = pu1_ptr;
    851     }
    852 
    853     return IH264E_SUCCESS;
    854 }
    855 
    856 /**
    857 *******************************************************************************
    858 *
    859 * @brief   update process context after encoding an mb. This involves preserving
    860 * the current mb information for later use, initialize the proc ctxt elements to
    861 * encode next mb.
    862 *
    863 * @par Description:
    864 *  This function performs house keeping tasks after encoding an mb.
    865 *  After encoding an mb, various elements of the process context needs to be
    866 *  updated to encode the next mb. For instance, the source, recon and reference
    867 *  pointers, mb indices have to be adjusted to the next mb. The slice index of
    868 *  the current mb needs to be updated. If mb qp modulation is enabled, then if
    869 *  the qp changes the quant param structure needs to be updated. Also to encoding
    870 *  the next mb, the current mb info is used as part of mode prediction or mv
    871 *  prediction. Hence the current mb info has to preserved at top/top left/left
    872 *  locations.
    873 *
    874 * @param[in] ps_proc
    875 *  Pointer to the current process context
    876 *
    877 * @returns none
    878 *
    879 * @remarks none
    880 *
    881 *******************************************************************************
    882 */
    883 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
    884 {
    885     /* error status */
    886     WORD32 error_status = IH264_SUCCESS;
    887 
    888     /* codec context */
    889     codec_t *ps_codec = ps_proc->ps_codec;
    890 
    891     /* curr mb indices */
    892     WORD32 i4_mb_x = ps_proc->i4_mb_x;
    893     WORD32 i4_mb_y = ps_proc->i4_mb_y;
    894 
    895     /* mb syntax elements of neighbors */
    896     mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
    897     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
    898     mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
    899 
    900     /* curr mb type */
    901     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
    902 
    903     /* curr mb type */
    904     UWORD32 u4_is_intra = ps_proc->u4_is_intra;
    905 
    906     /* width in mbs */
    907     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
    908 
    909     /*height in mbs*/
    910     WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
    911 
    912     /* proc map */
    913     UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
    914 
    915     /* deblk context */
    916     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
    917 
    918     /* deblk bs context */
    919     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
    920 
    921     /* top row motion vector info */
    922     enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
    923 
    924     /* top left mb motion vector */
    925     enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
    926 
    927     /* left mb motion vector */
    928     enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
    929 
    930     /* sub mb modes */
    931     UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
    932 
    933     /*************************************************************/
    934     /* During MV prediction, when top right mb is not available, */
    935     /* top left mb info. is used for prediction. Hence the curr  */
    936     /* top, which will be top left for the next mb needs to be   */
    937     /* preserved before updating it with curr mb info.           */
    938     /*************************************************************/
    939 
    940     /* mb type, mb class, csbp */
    941     *ps_top_left_syn = *ps_top_syn;
    942 
    943     if (ps_proc->i4_slice_type != ISLICE)
    944     {
    945         /*****************************************/
    946         /* update top left with top info results */
    947         /*****************************************/
    948         /* mv */
    949         *ps_top_left_mb_pu = *ps_top_row_pu;
    950     }
    951 
    952     /*************************************************/
    953     /* update top and left with curr mb info results */
    954     /*************************************************/
    955 
    956     /* mb type */
    957     ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
    958 
    959     /* mb class */
    960     ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
    961 
    962     /* csbp */
    963     ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
    964 
    965     /* distortion */
    966     ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
    967 
    968     if (u4_is_intra)
    969     {
    970         /* mb / sub mb modes */
    971         if (I16x16 == u4_mb_type)
    972         {
    973             pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
    974         }
    975         else if (I4x4 == u4_mb_type)
    976         {
    977             ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
    978             ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
    979         }
    980         else if (I8x8 == u4_mb_type)
    981         {
    982             memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
    983             memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
    984         }
    985 
    986         if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
    987         {
    988             /* mv */
    989             *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
    990         }
    991 
    992         *ps_proc->pu4_mb_pu_cnt = 1;
    993     }
    994     else
    995     {
    996         /* mv */
    997         *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
    998     }
    999 
   1000     /*
   1001      * Mark that the MB has been coded intra
   1002      * So that future AIRs can skip it
   1003      */
   1004     ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
   1005 
   1006     /**************************************************/
   1007     /* pack mb header info. for entropy coding        */
   1008     /**************************************************/
   1009     ih264e_pack_header_data(ps_proc);
   1010 
   1011     /* update previous mb qp */
   1012     ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
   1013 
   1014     /* store qp */
   1015     ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
   1016 
   1017     /*
   1018      * We need to sync the cache to make sure that the nmv content of proc
   1019      * is updated to cache properly
   1020      */
   1021     DATA_SYNC();
   1022 
   1023     /* Just before finishing the row, enqueue the job in to entropy queue.
   1024      * The master thread depending on its convenience shall dequeue it and
   1025      * performs entropy.
   1026      *
   1027      * WARN !! Placing this block post proc map update can cause queuing of
   1028      * entropy jobs in out of order.
   1029      */
   1030     if (i4_mb_x == i4_wd_mbs - 1)
   1031     {
   1032         /* job structures */
   1033         job_t s_job;
   1034 
   1035         /* job class */
   1036         s_job.i4_cmd = CMD_ENTROPY;
   1037 
   1038         /* number of mbs to be processed in the current job */
   1039         s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
   1040 
   1041         /* job start index x */
   1042         s_job.i2_mb_x = 0;
   1043 
   1044         /* job start index y */
   1045         s_job.i2_mb_y = ps_proc->i4_mb_y;
   1046 
   1047         /* proc base idx */
   1048         s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
   1049 
   1050         /* queue the job */
   1051         error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
   1052 
   1053         if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
   1054             ih264_list_terminate(ps_codec->pv_entropy_jobq);
   1055     }
   1056 
   1057     /* update proc map */
   1058     pu1_proc_map[i4_mb_x] = 1;
   1059 
   1060     /**************************************************/
   1061     /* update proc ctxt elements for encoding next mb */
   1062     /**************************************************/
   1063     /* update indices */
   1064     i4_mb_x ++;
   1065     ps_proc->i4_mb_x = i4_mb_x;
   1066 
   1067     if (ps_proc->i4_mb_x == i4_wd_mbs)
   1068     {
   1069         ps_proc->i4_mb_y++;
   1070         ps_proc->i4_mb_x = 0;
   1071     }
   1072 
   1073     /* update slice index */
   1074     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
   1075 
   1076     /* update buffers pointers */
   1077     ps_proc->pu1_src_buf_luma += MB_SIZE;
   1078     ps_proc->pu1_rec_buf_luma += MB_SIZE;
   1079     ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
   1080     ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
   1081 
   1082     /*
   1083      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
   1084      * the stride per MB is MB_SIZE
   1085      */
   1086     ps_proc->pu1_src_buf_chroma += MB_SIZE;
   1087     ps_proc->pu1_rec_buf_chroma += MB_SIZE;
   1088     ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
   1089     ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
   1090 
   1091 
   1092 
   1093     /* Reset cost, distortion params */
   1094     ps_proc->i4_mb_cost = INT_MAX;
   1095     ps_proc->i4_mb_distortion = SHRT_MAX;
   1096 
   1097     ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
   1098 
   1099     ps_proc->pu4_mb_pu_cnt += 1;
   1100 
   1101     /* Update colocated pu */
   1102     if (ps_proc->i4_slice_type == BSLICE)
   1103         ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
   1104 
   1105     /* deblk ctxts */
   1106     if (ps_proc->u4_disable_deblock_level != 1)
   1107     {
   1108         /* indices */
   1109         ps_bs->i4_mb_x = ps_proc->i4_mb_x;
   1110         ps_bs->i4_mb_y = ps_proc->i4_mb_y;
   1111 
   1112 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
   1113         ps_deblk->i4_mb_x ++;
   1114 
   1115         ps_deblk->pu1_cur_pic_luma += MB_SIZE;
   1116         /*
   1117          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
   1118          * the stride per MB is MB_SIZE
   1119          */
   1120         ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
   1121 #endif
   1122     }
   1123 
   1124     return error_status;
   1125 }
   1126 
   1127 /**
   1128 *******************************************************************************
   1129 *
   1130 * @brief   initialize process context.
   1131 *
   1132 * @par Description:
   1133 *  Before dispatching the current job to process thread, the process context
   1134 *  associated with the job is initialized. Usually every job aims to encode one
   1135 *  row of mb's. Basing on the row indices provided by the job, the process
   1136 *  context's buffer ptrs, slice indices and other elements that are necessary
   1137 *  during core-coding are initialized.
   1138 *
   1139 * @param[in] ps_proc
   1140 *  Pointer to the current process context
   1141 *
   1142 * @returns error status
   1143 *
   1144 * @remarks none
   1145 *
   1146 *******************************************************************************
   1147 */
   1148 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
   1149 {
   1150     /* codec context */
   1151     codec_t *ps_codec = ps_proc->ps_codec;
   1152 
   1153     /* nmb processing context*/
   1154     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
   1155 
   1156     /* indices */
   1157     WORD32 i4_mb_x, i4_mb_y;
   1158 
   1159     /* strides */
   1160     WORD32 i4_src_strd = ps_proc->i4_src_strd;
   1161     WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
   1162     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
   1163 
   1164     /* quant params */
   1165     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
   1166 
   1167     /* deblk ctxt */
   1168     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
   1169 
   1170     /* deblk bs context */
   1171     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
   1172 
   1173     /* Pointer to mv_buffer of current frame */
   1174     mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
   1175 
   1176     /* Pointers for color space conversion */
   1177     UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
   1178 
   1179     /* Pad the MB to support non standard sizes */
   1180     UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
   1181     UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
   1182     UWORD16 u2_num_rows = MB_SIZE;
   1183     WORD32 convert_uv_only;
   1184 
   1185     /********************************************************************/
   1186     /*                            BEGIN INIT                            */
   1187     /********************************************************************/
   1188 
   1189     i4_mb_x = ps_proc->i4_mb_x;
   1190     i4_mb_y = ps_proc->i4_mb_y;
   1191 
   1192     /* Number of mbs processed in one loop of process function */
   1193     ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
   1194     ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
   1195 
   1196     /* init buffer pointers */
   1197     convert_uv_only = 1;
   1198     if (u4_pad_bottom_sz || u4_pad_right_sz ||
   1199         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
   1200     {
   1201         if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
   1202             u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
   1203         ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
   1204         i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
   1205         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
   1206         convert_uv_only = 0;
   1207     }
   1208     else
   1209     {
   1210         i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
   1211         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
   1212     }
   1213 
   1214 
   1215     if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
   1216         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
   1217         ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
   1218         u4_pad_bottom_sz || u4_pad_right_sz)
   1219     {
   1220         if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
   1221             (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
   1222             ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
   1223 
   1224         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
   1225         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
   1226     }
   1227     else
   1228     {
   1229         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
   1230         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
   1231     }
   1232 
   1233     ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
   1234     ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
   1235 
   1236     /* Tempral back and forward reference buffer */
   1237     ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
   1238     ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
   1239     ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
   1240     ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
   1241 
   1242     /*
   1243      * Do color space conversion
   1244      * NOTE : We assume there that the number of MB's to process will not span multiple rows
   1245      */
   1246     switch (ps_codec->s_cfg.e_inp_color_fmt)
   1247     {
   1248         case IV_YUV_420SP_UV:
   1249         case IV_YUV_420SP_VU:
   1250             /* In case of 420 semi-planar input, copy last few rows to intermediate
   1251                buffer as chroma trans functions access one extra byte due to interleaved input.
   1252                This data will be padded if required */
   1253             if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
   1254             {
   1255                 WORD32 num_rows = MB_SIZE;
   1256                 UWORD8 *pu1_src;
   1257                 UWORD8 *pu1_dst;
   1258                 WORD32 i;
   1259                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
   1260                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
   1261 
   1262                 pu1_dst = ps_proc->pu1_src_buf_luma;
   1263 
   1264                 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
   1265                 if (u4_pad_bottom_sz || u4_pad_right_sz) {
   1266                     if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
   1267                         num_rows = MB_SIZE - u4_pad_bottom_sz;
   1268                     for (i = 0; i < num_rows; i++)
   1269                     {
   1270                         memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
   1271                         pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
   1272                         pu1_dst += ps_proc->i4_src_strd;
   1273                     }
   1274                 }
   1275                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
   1276                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
   1277                 pu1_dst = ps_proc->pu1_src_buf_chroma;
   1278 
   1279                 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
   1280                  * due to interleaved input
   1281                  */
   1282                 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
   1283                     num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
   1284                 else
   1285                     num_rows = BLK8x8SIZE;
   1286                 for (i = 0; i < num_rows; i++)
   1287                 {
   1288                     memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
   1289                     pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
   1290                     pu1_dst += ps_proc->i4_src_chroma_strd;
   1291                 }
   1292 
   1293             }
   1294             break;
   1295 
   1296         case IV_YUV_420P :
   1297             pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
   1298                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
   1299 
   1300             pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
   1301                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
   1302 
   1303             pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
   1304                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
   1305 
   1306             ps_codec->pf_ih264e_conv_420p_to_420sp(
   1307                             pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
   1308                             ps_proc->pu1_src_buf_luma,
   1309                             ps_proc->pu1_src_buf_chroma, u2_num_rows,
   1310                             ps_codec->s_cfg.u4_disp_wd,
   1311                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
   1312                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
   1313                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
   1314                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
   1315                             convert_uv_only);
   1316             break;
   1317 
   1318         case IV_YUV_422ILE :
   1319             pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
   1320                               + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
   1321 
   1322             ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
   1323                             ps_proc->pu1_src_buf_luma,
   1324                             ps_proc->pu1_src_buf_chroma,
   1325                             ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
   1326                             ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
   1327                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
   1328                             ps_proc->i4_src_chroma_strd,
   1329                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
   1330             break;
   1331 
   1332         default:
   1333             break;
   1334     }
   1335 
   1336     if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
   1337     {
   1338         UWORD32 u4_pad_wd, u4_pad_ht;
   1339         u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
   1340         u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
   1341         u4_pad_ht = MB_SIZE;
   1342         if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
   1343             u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
   1344 
   1345         ih264_pad_right_luma(
   1346                         ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
   1347                         ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
   1348 
   1349         ih264_pad_right_chroma(
   1350                         ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
   1351                         ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
   1352     }
   1353 
   1354     /* pad bottom edge */
   1355     if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
   1356     {
   1357         ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
   1358                          ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
   1359 
   1360         ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
   1361                          ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
   1362     }
   1363 
   1364 
   1365     /* packed mb coeff data */
   1366     ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
   1367 
   1368     /* packed mb header data */
   1369     ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
   1370 
   1371     /* slice index */
   1372     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
   1373 
   1374     /*********************************************************************/
   1375     /* ih264e_init_quant_params() routine is called at the pic init level*/
   1376     /* this would have initialized the qp.                               */
   1377     /* TODO_LATER: currently it is assumed that quant params donot change*/
   1378     /* across mb's. When they do calculate update ps_qp_params accordingly*/
   1379     /*********************************************************************/
   1380 
   1381     /* init mv buffer ptr */
   1382     ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
   1383                      ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
   1384 
   1385     /* Init co-located mv buffer */
   1386     ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
   1387                         ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
   1388 
   1389     if (i4_mb_y == 0)
   1390     {
   1391         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
   1392     }
   1393     else
   1394     {
   1395         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
   1396                                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
   1397     }
   1398 
   1399     ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
   1400 
   1401     /* mb type */
   1402     ps_proc->u4_mb_type = I16x16;
   1403 
   1404     /* lambda */
   1405     ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
   1406 
   1407     /* mb distortion */
   1408     ps_proc->i4_mb_distortion = SHRT_MAX;
   1409 
   1410     if (i4_mb_x == 0)
   1411     {
   1412         ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
   1413 
   1414         ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
   1415 
   1416         ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
   1417 
   1418         if (i4_mb_y == 0)
   1419         {
   1420             memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
   1421         }
   1422     }
   1423 
   1424     /* mb cost */
   1425     ps_proc->i4_mb_cost = INT_MAX;
   1426 
   1427     /**********************/
   1428     /* init deblk context */
   1429     /**********************/
   1430     ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
   1431     /* deblk lags the current mb proc by 1 row */
   1432     /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
   1433     /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
   1434     /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
   1435     ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
   1436 
   1437     /* buffer ptrs */
   1438     ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
   1439     ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
   1440 
   1441     /* init deblk bs context */
   1442     /* mb indices */
   1443     ps_bs->i4_mb_x = ps_proc->i4_mb_x;
   1444     ps_bs->i4_mb_y = ps_proc->i4_mb_y;
   1445 
   1446     /* init n_mb_process  context */
   1447     ps_n_mb_ctxt->i4_mb_x = 0;
   1448     ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
   1449     ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
   1450 
   1451     return IH264E_SUCCESS;
   1452 }
   1453 
   1454 /**
   1455 *******************************************************************************
   1456 *
   1457 * @brief This function performs luma & chroma padding
   1458 *
   1459 * @par Description:
   1460 *
   1461 * @param[in] ps_proc
   1462 *  Process context corresponding to the job
   1463 *
   1464 * @param[in] pu1_curr_pic_luma
   1465 *  Pointer to luma buffer
   1466 *
   1467 * @param[in] pu1_curr_pic_chroma
   1468 *  Pointer to chroma buffer
   1469 *
   1470 * @param[in] i4_mb_x
   1471 *  mb index x
   1472 *
   1473 * @param[in] i4_mb_y
   1474 *  mb index y
   1475 *
   1476 *  @param[in] i4_pad_ht
   1477 *  number of rows to be padded
   1478 *
   1479 * @returns  error status
   1480 *
   1481 * @remarks none
   1482 *
   1483 *******************************************************************************
   1484 */
   1485 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
   1486                                        UWORD8 *pu1_curr_pic_luma,
   1487                                        UWORD8 *pu1_curr_pic_chroma,
   1488                                        WORD32 i4_mb_x,
   1489                                        WORD32 i4_mb_y,
   1490                                        WORD32 i4_pad_ht)
   1491 {
   1492     /* codec context */
   1493     codec_t *ps_codec = ps_proc->ps_codec;
   1494 
   1495     /* strides */
   1496     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
   1497 
   1498     if (i4_mb_x == 0)
   1499     {
   1500         /* padding left luma */
   1501         ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
   1502 
   1503         /* padding left chroma */
   1504         ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
   1505     }
   1506     if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
   1507     {
   1508         /* padding right luma */
   1509         ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
   1510 
   1511         /* padding right chroma */
   1512         ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
   1513 
   1514         if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
   1515         {
   1516             UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
   1517             UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
   1518 
   1519             /* padding bottom luma */
   1520             ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
   1521 
   1522             /* padding bottom chroma */
   1523             ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
   1524         }
   1525     }
   1526 
   1527     if (i4_mb_y == 0)
   1528     {
   1529         UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
   1530         UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
   1531         WORD32 wd = MB_SIZE;
   1532 
   1533         if (i4_mb_x == 0)
   1534         {
   1535             pu1_rec_luma -= PAD_LEFT;
   1536             pu1_rec_chroma -= PAD_LEFT;
   1537 
   1538             wd += PAD_LEFT;
   1539         }
   1540         if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
   1541         {
   1542             wd += PAD_RIGHT;
   1543         }
   1544 
   1545         /* padding top luma */
   1546         ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
   1547 
   1548         /* padding top chroma */
   1549         ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
   1550     }
   1551 
   1552     return IH264E_SUCCESS;
   1553 }
   1554 
   1555 
   1556 
   1557 
   1558 /**
   1559 *******************************************************************************
   1560 *
   1561 * @brief This function performs deblocking, padding and halfpel generation for
   1562 *  'n' MBs
   1563 *
   1564 * @par Description:
   1565 *
   1566 * @param[in] ps_proc
   1567 *  Process context corresponding to the job
   1568 *
   1569 * @param[in] pu1_curr_pic_luma
   1570 * Current MB being processed(Luma)
   1571 *
   1572 * @param[in] pu1_curr_pic_chroma
   1573 * Current MB being processed(Chroma)
   1574 *
   1575 * @param[in] i4_mb_x
   1576 * Column value of current MB processed
   1577 *
   1578 * @param[in] i4_mb_y
   1579 * Curent row processed
   1580 *
   1581 * @returns  error status
   1582 *
   1583 * @remarks none
   1584 *
   1585 *******************************************************************************
   1586 */
   1587 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
   1588                                                      UWORD8 *pu1_curr_pic_luma,
   1589                                                      UWORD8 *pu1_curr_pic_chroma,
   1590                                                      WORD32 i4_mb_x,
   1591                                                      WORD32 i4_mb_y)
   1592 {
   1593     /* codec context */
   1594     codec_t *ps_codec = ps_proc->ps_codec;
   1595 
   1596     /* n_mb processing context */
   1597     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
   1598 
   1599     /* deblk context */
   1600     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
   1601 
   1602     /* strides */
   1603     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
   1604 
   1605     /* loop variables */
   1606     WORD32 row, i, j, col;
   1607 
   1608     /* Padding Width */
   1609     UWORD32 u4_pad_wd;
   1610 
   1611     /* deblk_map of the row being deblocked */
   1612     UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
   1613 
   1614     /* deblk_map_previous row */
   1615     UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
   1616 
   1617     WORD32 u4_pad_top = 0;
   1618 
   1619     WORD32 u4_deblk_prev_row = 0;
   1620 
   1621     /* Number of mbs to be processed */
   1622     WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
   1623 
   1624     /* Number of mbs  actually processed
   1625      * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
   1626     WORD32 i4_n_mb_process_count = 0;
   1627 
   1628     UWORD8 *pu1_pad_bottom_src = NULL;
   1629 
   1630     UWORD8 *pu1_pad_src_luma = NULL;
   1631     UWORD8 *pu1_pad_src_chroma = NULL;
   1632 
   1633     if (ps_proc->u4_disable_deblock_level == 1)
   1634     {
   1635         /* If left most MB is processed, then pad left */
   1636         if (i4_mb_x == 0)
   1637         {
   1638             /* padding left luma */
   1639             ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
   1640 
   1641             /* padding left chroma */
   1642             ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
   1643         }
   1644         /*last col*/
   1645         if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
   1646         {
   1647             /* padding right luma */
   1648             ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
   1649 
   1650             /* padding right chroma */
   1651             ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
   1652         }
   1653     }
   1654 
   1655     if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
   1656     {
   1657         /* if number of mb's to be processed are less than 'N', go back.
   1658          * exception to the above clause is end of row */
   1659         if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
   1660         {
   1661             return IH264E_SUCCESS;
   1662         }
   1663         else
   1664         {
   1665             i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
   1666 
   1667             /* performing deblocking for required number of MBs */
   1668             if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
   1669             {
   1670                 u4_deblk_prev_row = 1;
   1671 
   1672                 /* checking whether the top rows are deblocked */
   1673                 for (col = 0; col < i4_n_mb_process_count; col++)
   1674                 {
   1675                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
   1676                 }
   1677 
   1678                 /* checking whether the top right MB is deblocked */
   1679                 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
   1680                 {
   1681                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
   1682                 }
   1683 
   1684                 /* Top or Top right MBs not deblocked */
   1685                 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
   1686                 {
   1687                     return IH264E_SUCCESS;
   1688                 }
   1689 
   1690                 for (row = 0; row < i4_n_mb_process_count; row++)
   1691                 {
   1692                     ih264e_deblock_mb(ps_proc, ps_deblk);
   1693 
   1694                     pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
   1695 
   1696                     if (ps_deblk->i4_mb_y > 0)
   1697                     {
   1698                         if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
   1699                         {
   1700                             /* padding left luma */
   1701                             ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
   1702 
   1703                             /* padding left chroma */
   1704                             ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
   1705                         }
   1706 
   1707                         if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
   1708                         {
   1709                             /* padding right luma */
   1710                             ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
   1711 
   1712                             /* padding right chroma */
   1713                             ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
   1714                         }
   1715                     }
   1716                     ps_deblk->i4_mb_x++;
   1717 
   1718                     ps_deblk->pu1_cur_pic_luma += MB_SIZE;
   1719                     ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
   1720 
   1721                 }
   1722             }
   1723             else if(i4_mb_y > 0)
   1724             {
   1725                 ps_deblk->i4_mb_x += i4_n_mb_process_count;
   1726 
   1727                 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
   1728                 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
   1729             }
   1730 
   1731             if (i4_mb_y == 2)
   1732             {
   1733                 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
   1734                 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
   1735 
   1736                 if (ps_n_mb_ctxt->i4_mb_x == 0)
   1737                 {
   1738                     u4_pad_wd += PAD_LEFT;
   1739                     u4_pad_top = -PAD_LEFT;
   1740                 }
   1741 
   1742                 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
   1743                 {
   1744                     u4_pad_wd += PAD_RIGHT;
   1745                 }
   1746 
   1747                 /* padding top luma */
   1748                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
   1749 
   1750                 /* padding top chroma */
   1751                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
   1752             }
   1753 
   1754             ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
   1755 
   1756             if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
   1757             {
   1758                 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
   1759                 {
   1760                     /* Bottom Padding is done in one stretch for the entire width */
   1761                     if (ps_proc->u4_disable_deblock_level != 1)
   1762                     {
   1763                         ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
   1764 
   1765                         ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
   1766 
   1767                         ps_n_mb_ctxt->i4_mb_x = 0;
   1768                         ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
   1769                         ps_deblk->i4_mb_x = 0;
   1770                         ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
   1771 
   1772                         /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
   1773                         ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
   1774 
   1775                         i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
   1776 
   1777                         j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
   1778 
   1779                         for (i = 0; i < j; i++)
   1780                         {
   1781                             for (col = 0; col < i4_n_mbs; col++)
   1782                             {
   1783                                 ih264e_deblock_mb(ps_proc, ps_deblk);
   1784 
   1785                                 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
   1786 
   1787                                 ps_deblk->i4_mb_x++;
   1788                                 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
   1789                                 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
   1790                                 ps_n_mb_ctxt->i4_mb_x++;
   1791                             }
   1792                         }
   1793 
   1794                         for (col = 0; col < i4_n_mb_process_count; col++)
   1795                         {
   1796                             ih264e_deblock_mb(ps_proc, ps_deblk);
   1797 
   1798                             pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
   1799 
   1800                             ps_deblk->i4_mb_x++;
   1801                             ps_deblk->pu1_cur_pic_luma += MB_SIZE;
   1802                             ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
   1803                             ps_n_mb_ctxt->i4_mb_x++;
   1804                         }
   1805 
   1806                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
   1807 
   1808                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
   1809 
   1810                         /* padding left luma */
   1811                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
   1812 
   1813                         /* padding left chroma */
   1814                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
   1815 
   1816                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
   1817                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
   1818 
   1819                         /* padding left luma */
   1820                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
   1821 
   1822                         /* padding left chroma */
   1823                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
   1824 
   1825                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
   1826 
   1827                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
   1828 
   1829                         /* padding right luma */
   1830                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
   1831 
   1832                         /* padding right chroma */
   1833                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
   1834 
   1835                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
   1836                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
   1837 
   1838                         /* padding right luma */
   1839                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
   1840 
   1841                         /* padding right chroma */
   1842                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
   1843 
   1844                     }
   1845 
   1846                     /* In case height is less than 2 MBs pad top */
   1847                     if (ps_proc->i4_ht_mbs <= 2)
   1848                     {
   1849                         UWORD8 *pu1_pad_top_src;
   1850                         /* padding top luma */
   1851                         pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
   1852                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
   1853 
   1854                         /* padding top chroma */
   1855                         pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
   1856                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
   1857                     }
   1858 
   1859                     /* padding bottom luma */
   1860                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
   1861                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
   1862 
   1863                     /* padding bottom chroma */
   1864                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
   1865                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
   1866                 }
   1867             }
   1868         }
   1869     }
   1870 
   1871     return IH264E_SUCCESS;
   1872 }
   1873 
   1874 
   1875 /**
   1876 *******************************************************************************
   1877 *
   1878 * @brief This function performs luma & chroma core coding for a set of mb's.
   1879 *
   1880 * @par Description:
   1881 *  The mb to be coded is taken and is evaluated over a predefined set of modes
   1882 *  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
   1883 *  is selected and using intra/inter prediction filters, prediction is carried out.
   1884 *  The deviation between src and pred signal constitutes error signal. This error
   1885 *  signal is transformed (hierarchical transform if necessary) and quantized. The
   1886 *  quantized residue is packed in to entropy buffer for entropy coding. This is
   1887 *  repeated for all the mb's enlisted under the job.
   1888 *
   1889 * @param[in] ps_proc
   1890 *  Process context corresponding to the job
   1891 *
   1892 * @returns  error status
   1893 *
   1894 * @remarks none
   1895 *
   1896 *******************************************************************************
   1897 */
   1898 WORD32 ih264e_process(process_ctxt_t *ps_proc)
   1899 {
   1900     /* error status */
   1901     WORD32 error_status = IH264_SUCCESS;
   1902 
   1903     /* codec context */
   1904     codec_t *ps_codec = ps_proc->ps_codec;
   1905 
   1906     /* cbp luma, chroma */
   1907     UWORD32 u4_cbp_l, u4_cbp_c;
   1908 
   1909     /* width in mbs */
   1910     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
   1911 
   1912     /* loop var */
   1913     WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
   1914 
   1915     /* valid modes */
   1916     UWORD32 u4_valid_modes = 0;
   1917 
   1918     /* gate threshold */
   1919     WORD32 i4_gate_threshold = 0;
   1920 
   1921     /* is intra */
   1922     WORD32 luma_idx, chroma_idx, is_intra;
   1923 
   1924     /* temp variables */
   1925     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
   1926 
   1927     /*
   1928      * list of modes for evaluation
   1929      * -------------------------------------------------------------------------
   1930      * Note on enabling I4x4 and I16x16
   1931      * At very low QP's the hadamard transform in I16x16 will push up the maximum
   1932      * coeff value very high. CAVLC may not be able to represent the value and
   1933      * hence the stream may not be decodable in some clips.
   1934      * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
   1935      */
   1936     if (ps_proc->i4_slice_type == ISLICE)
   1937     {
   1938         if (ps_proc->u4_frame_qp > 10)
   1939         {
   1940             /* enable intra 16x16 */
   1941             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
   1942 
   1943             /* enable intra 8x8 */
   1944             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
   1945         }
   1946 
   1947         /* enable intra 4x4 */
   1948         u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
   1949         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
   1950 
   1951     }
   1952     else if (ps_proc->i4_slice_type == PSLICE)
   1953     {
   1954         if (ps_proc->u4_frame_qp > 10)
   1955         {
   1956             /* enable intra 16x16 */
   1957             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
   1958         }
   1959 
   1960         /* enable intra 4x4 */
   1961         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
   1962         {
   1963             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
   1964         }
   1965         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
   1966 
   1967         /* enable inter P16x16 */
   1968         u4_valid_modes |= (1 << P16x16);
   1969     }
   1970     else if (ps_proc->i4_slice_type == BSLICE)
   1971     {
   1972         if (ps_proc->u4_frame_qp > 10)
   1973         {
   1974             /* enable intra 16x16 */
   1975             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
   1976         }
   1977 
   1978         /* enable intra 4x4 */
   1979         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
   1980         {
   1981             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
   1982         }
   1983         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
   1984 
   1985         /* enable inter B16x16 */
   1986         u4_valid_modes |= (1 << B16x16);
   1987     }
   1988 
   1989 
   1990     /* init entropy */
   1991     ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
   1992     ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
   1993     ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
   1994 
   1995     /* compute recon when :
   1996      *   1. current frame is to be used as a reference
   1997      *   2. dump recon for bit stream sanity check
   1998      */
   1999     ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
   2000                                 ps_codec->s_cfg.u4_enable_recon;
   2001 
   2002     /* Encode 'n' macroblocks,
   2003      * 'n' being the number of mbs dictated by current proc ctxt */
   2004     for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
   2005     {
   2006         /* since we have not yet found sad, we have not yet got min sad */
   2007         /* we need to initialize these variables for each MB */
   2008         /* TODO how to get the min sad into the codec */
   2009         ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
   2010         ps_proc->u4_min_sad_reached = 0;
   2011 
   2012         /* mb analysis */
   2013         {
   2014             /* temp var */
   2015             WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
   2016 
   2017             /* force intra refresh ? */
   2018             WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
   2019                             (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
   2020 
   2021             /* evaluate inter 16x16 modes */
   2022             if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
   2023             {
   2024                 /* compute nmb me */
   2025                 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
   2026                 {
   2027                     ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
   2028                                                        i4_wd_mbs - ps_proc->i4_mb_x));
   2029                 }
   2030 
   2031                 /* set pointers to ME data appropriately for other modules to use */
   2032                 {
   2033                     UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
   2034 
   2035                     /* get the min sad condition for current mb */
   2036                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
   2037                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
   2038 
   2039                     ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
   2040                     ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
   2041                     ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
   2042 
   2043                     ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
   2044                     ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
   2045                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
   2046                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
   2047                     ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
   2048 
   2049                     /* get the best sub pel buffer */
   2050                     ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
   2051                     ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
   2052                 }
   2053                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
   2054             }
   2055             else
   2056             {
   2057                 /* Derive neighbor availability for the current macroblock */
   2058                 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
   2059 
   2060                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
   2061             }
   2062 
   2063             /*
   2064              * If air says intra, we need to force the following code path to evaluate intra
   2065              * The easy way is just to say that the inter cost is too much
   2066              */
   2067             if (!i4_air_enable_inter)
   2068             {
   2069                 ps_proc->u4_min_sad_reached = 0;
   2070                 ps_proc->i4_mb_cost = INT_MAX;
   2071                 ps_proc->i4_mb_distortion = INT_MAX;
   2072             }
   2073             else if (ps_proc->u4_mb_type == PSKIP)
   2074             {
   2075                 goto UPDATE_MB_INFO;
   2076             }
   2077 
   2078             /* wait until the proc of [top + 1] mb is computed.
   2079              * We wait till the proc dependencies are satisfied */
   2080              if(ps_proc->i4_mb_y > 0)
   2081              {
   2082                 /* proc map */
   2083                 UWORD8  *pu1_proc_map_top;
   2084 
   2085                 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
   2086 
   2087                 while (1)
   2088                 {
   2089                     volatile UWORD8 *pu1_buf;
   2090                     WORD32 idx = i4_mb_idx + 1;
   2091 
   2092                     idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
   2093                     pu1_buf =  pu1_proc_map_top + idx;
   2094                     if(*pu1_buf)
   2095                         break;
   2096                     ithread_yield();
   2097                 }
   2098             }
   2099 
   2100             /* If we already have the minimum sad, there is no point in searching for sad again */
   2101             if (ps_proc->u4_min_sad_reached == 0)
   2102             {
   2103                 /* intra gating in inter slices */
   2104                 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
   2105                 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
   2106                 {
   2107                     /* distortion of neighboring blocks */
   2108                     WORD32 i4_distortion[4];
   2109 
   2110                     i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
   2111 
   2112                     i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
   2113 
   2114                     i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
   2115 
   2116                     i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
   2117 
   2118                     i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
   2119 
   2120                 }
   2121 
   2122 
   2123                 /* If we are going to force intra we need to evaluate intra irrespective of gating */
   2124                 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
   2125                 {
   2126                     /* evaluate intra 4x4 modes */
   2127                     if (u4_valid_modes & (1 << I4x4))
   2128                     {
   2129                         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
   2130                         {
   2131                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
   2132                         }
   2133                         else
   2134                         {
   2135                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
   2136                         }
   2137                     }
   2138 
   2139                     /* evaluate intra 16x16 modes */
   2140                     if (u4_valid_modes & (1 << I16x16))
   2141                     {
   2142                         ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
   2143                     }
   2144 
   2145                     /* evaluate intra 8x8 modes */
   2146                     if (u4_valid_modes & (1 << I8x8))
   2147                     {
   2148                         ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
   2149                     }
   2150 
   2151                 }
   2152         }
   2153      }
   2154 
   2155         /* is intra */
   2156         if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
   2157         {
   2158             luma_idx = ps_proc->u4_mb_type;
   2159             chroma_idx = 0;
   2160             is_intra = 1;
   2161 
   2162             /* evaluate chroma blocks for intra */
   2163             ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
   2164         }
   2165         else
   2166         {
   2167             luma_idx = 3;
   2168             chroma_idx = 1;
   2169             is_intra = 0;
   2170         }
   2171         ps_proc->u4_is_intra = is_intra;
   2172         ps_proc->ps_pu->b1_intra_flag = is_intra;
   2173 
   2174         /* redo MV pred of neighbors in the case intra mb */
   2175         /* TODO : currently called unconditionally, needs to be called only in the case of intra
   2176          * to modify neighbors */
   2177         if (ps_proc->i4_slice_type != ISLICE)
   2178         {
   2179             ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
   2180         }
   2181 
   2182         /* Perform luma mb core coding */
   2183         u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
   2184 
   2185         /* Perform luma mb core coding */
   2186         u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
   2187 
   2188         /* coded block pattern */
   2189         ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
   2190 
   2191         if (!ps_proc->u4_is_intra)
   2192         {
   2193             if (ps_proc->i4_slice_type == BSLICE)
   2194             {
   2195                 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
   2196                 {
   2197                     ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
   2198                 }
   2199             }
   2200             else if(!ps_proc->u4_cbp)
   2201             {
   2202                 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
   2203                 {
   2204                     ps_proc->u4_mb_type = PSKIP;
   2205                 }
   2206             }
   2207         }
   2208 
   2209 UPDATE_MB_INFO:
   2210 
   2211         /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
   2212         ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
   2213 
   2214         /**********************************************************************/
   2215         /* if disable deblock level is '0' this implies enable deblocking for */
   2216         /* all edges of all macroblocks with out any restrictions             */
   2217         /*                                                                    */
   2218         /* if disable deblock level is '1' this implies disable deblocking for*/
   2219         /* all edges of all macroblocks with out any restrictions             */
   2220         /*                                                                    */
   2221         /* if disable deblock level is '2' this implies enable deblocking for */
   2222         /* all edges of all macroblocks except edges overlapping with slice   */
   2223         /* boundaries. This option is not currently supported by the encoder  */
   2224         /* hence the slice map should be of no significance to perform debloc */
   2225         /* king                                                               */
   2226         /**********************************************************************/
   2227 
   2228         if (ps_proc->u4_compute_recon)
   2229         {
   2230             /* deblk context */
   2231             /* src pointers */
   2232             UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
   2233             UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
   2234 
   2235             /* src indices */
   2236             UWORD32 i4_mb_x = ps_proc->i4_mb_x;
   2237             UWORD32 i4_mb_y = ps_proc->i4_mb_y;
   2238 
   2239             /* compute blocking strength */
   2240             if (ps_proc->u4_disable_deblock_level != 1)
   2241             {
   2242                 ih264e_compute_bs(ps_proc);
   2243             }
   2244 
   2245             /* nmb deblocking and hpel and padding */
   2246             ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
   2247                                                   pu1_cur_pic_chroma, i4_mb_x,
   2248                                                   i4_mb_y);
   2249         }
   2250 
   2251         /* update the context after for coding next mb */
   2252         error_status |= ih264e_update_proc_ctxt(ps_proc);
   2253 
   2254         /* Once the last row is processed, mark the buffer status appropriately */
   2255         if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
   2256         {
   2257             /* Pointer to current picture buffer structure */
   2258             pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
   2259 
   2260             /* Pointer to current picture's mv buffer structure */
   2261             mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
   2262 
   2263             /**********************************************************************/
   2264             /* if disable deblock level is '0' this implies enable deblocking for */
   2265             /* all edges of all macroblocks with out any restrictions             */
   2266             /*                                                                    */
   2267             /* if disable deblock level is '1' this implies disable deblocking for*/
   2268             /* all edges of all macroblocks with out any restrictions             */
   2269             /*                                                                    */
   2270             /* if disable deblock level is '2' this implies enable deblocking for */
   2271             /* all edges of all macroblocks except edges overlapping with slice   */
   2272             /* boundaries. This option is not currently supported by the encoder  */
   2273             /* hence the slice map should be of no significance to perform debloc */
   2274             /* king                                                               */
   2275             /**********************************************************************/
   2276             error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
   2277 
   2278             error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
   2279 
   2280             if (ps_codec->s_cfg.u4_enable_recon)
   2281             {
   2282                 /* pic cnt */
   2283                 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
   2284 
   2285                 /* rec buffers */
   2286                 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
   2287 
   2288                 /* is last? */
   2289                 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
   2290 
   2291                 /* frame time stamp */
   2292                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
   2293                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
   2294             }
   2295 
   2296         }
   2297     }
   2298 
   2299     DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
   2300 
   2301     return error_status;
   2302 }
   2303 
   2304 /**
   2305 *******************************************************************************
   2306 *
   2307 * @brief
   2308 *  Function to update rc context after encoding
   2309 *
   2310 * @par   Description
   2311 *  This function updates the rate control context after the frame is encoded.
   2312 *  Number of bits consumed by the current frame, frame distortion, frame cost,
   2313 *  number of intra/inter mb's, ... are passed on to rate control context for
   2314 *  updating the rc model.
   2315 *
   2316 * @param[in] ps_codec
   2317 *  Handle to codec context
   2318 *
   2319 * @param[in] ctxt_sel
   2320 *  frame context selector
   2321 *
   2322 * @param[in] pic_cnt
   2323 *  pic count
   2324 *
   2325 * @returns i4_stuffing_byte
   2326 *  number of stuffing bytes (if necessary)
   2327 *
   2328 * @remarks
   2329 *
   2330 *******************************************************************************
   2331 */
   2332 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
   2333 {
   2334     /* proc set base idx */
   2335     WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
   2336 
   2337     /* proc ctxt */
   2338     process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
   2339 
   2340     /* frame qp */
   2341     UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
   2342 
   2343     /* cbr rc return status */
   2344     WORD32 i4_stuffing_byte = 0;
   2345 
   2346     /* current frame stats */
   2347     frame_info_t s_frame_info;
   2348     picture_type_e rc_pic_type;
   2349 
   2350     /* temp var */
   2351     WORD32 i, j;
   2352 
   2353     /********************************************************************/
   2354     /*                            BEGIN INIT                            */
   2355     /********************************************************************/
   2356 
   2357     /* init frame info */
   2358     irc_init_frame_info(&s_frame_info);
   2359 
   2360     /* get frame info */
   2361     for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
   2362     {
   2363         /*****************************************************************/
   2364         /* One frame can be encoded by max of u4_num_cores threads       */
   2365         /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
   2366         /* u4_num_cores threads                                          */
   2367         /*****************************************************************/
   2368         for (j = 0; j< MAX_MB_TYPE; j++)
   2369         {
   2370             s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
   2371 
   2372             s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
   2373 
   2374             s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
   2375         }
   2376 
   2377         s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
   2378 
   2379         s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
   2380 
   2381         /*****************************************************************/
   2382         /* gather number of residue and header bits consumed by the frame*/
   2383         /*****************************************************************/
   2384         ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
   2385     }
   2386 
   2387     /* get pic type */
   2388     switch (ps_codec->pic_type)
   2389     {
   2390         case PIC_I:
   2391         case PIC_IDR:
   2392             rc_pic_type = I_PIC;
   2393             break;
   2394         case PIC_P:
   2395             rc_pic_type = P_PIC;
   2396             break;
   2397         case PIC_B:
   2398             rc_pic_type = B_PIC;
   2399             break;
   2400         default:
   2401             assert(0);
   2402             break;
   2403     }
   2404 
   2405     /* update rc lib with current frame stats */
   2406     i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
   2407                                           &(s_frame_info),
   2408                                           ps_codec->s_rate_control.pps_pd_frm_rate,
   2409                                           ps_codec->s_rate_control.pps_time_stamp,
   2410                                           ps_codec->s_rate_control.pps_frame_time,
   2411                                           (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
   2412                                           &rc_pic_type,
   2413                                           i4_is_first_frm,
   2414                                           &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
   2415                                           u1_frame_qp,
   2416                                           &ps_codec->s_rate_control.num_intra_in_prev_frame,
   2417                                           &ps_codec->s_rate_control.i4_avg_activity);
   2418     return i4_stuffing_byte;
   2419 }
   2420 
   2421 /**
   2422 *******************************************************************************
   2423 *
   2424 * @brief
   2425 *  entry point of a spawned encoder thread
   2426 *
   2427 * @par Description:
   2428 *  The encoder thread dequeues a proc/entropy job from the encoder queue and
   2429 *  calls necessary routines.
   2430 *
   2431 * @param[in] pv_proc
   2432 *  Process context corresponding to the thread
   2433 *
   2434 * @returns  error status
   2435 *
   2436 * @remarks
   2437 *
   2438 *******************************************************************************
   2439 */
   2440 WORD32 ih264e_process_thread(void *pv_proc)
   2441 {
   2442     /* error status */
   2443     IH264_ERROR_T ret = IH264_SUCCESS;
   2444     WORD32 error_status = IH264_SUCCESS;
   2445 
   2446     /* proc ctxt */
   2447     process_ctxt_t *ps_proc = pv_proc;
   2448 
   2449     /* codec ctxt */
   2450     codec_t *ps_codec = ps_proc->ps_codec;
   2451 
   2452     /* structure to represent a processing job entry */
   2453     job_t s_job;
   2454 
   2455     /* blocking call : entropy dequeue is non-blocking till all
   2456      * the proc jobs are processed */
   2457     WORD32 is_blocking = 0;
   2458 
   2459     /* set affinity */
   2460     ithread_set_affinity(ps_proc->i4_id);
   2461 
   2462     while(1)
   2463     {
   2464         /* dequeue a job from the entropy queue */
   2465         {
   2466             int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
   2467 
   2468             /* codec context selector */
   2469             WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
   2470 
   2471             volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
   2472 
   2473             /* have the lock */
   2474             if (error == 0)
   2475             {
   2476                 if (*pu4_buf == 0)
   2477                 {
   2478                     /* no entropy threads are active, try dequeuing a job from the entropy queue */
   2479                     ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
   2480                     if (IH264_SUCCESS == ret)
   2481                     {
   2482                         *pu4_buf = 1;
   2483                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
   2484                         goto WORKER;
   2485                     }
   2486                     else if(is_blocking)
   2487                     {
   2488                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
   2489                         break;
   2490                     }
   2491                 }
   2492                 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
   2493             }
   2494         }
   2495 
   2496         /* dequeue a job from the process queue */
   2497         ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
   2498         if (IH264_SUCCESS != ret)
   2499         {
   2500             if(ps_proc->i4_id)
   2501                 break;
   2502             else
   2503             {
   2504                 is_blocking = 1;
   2505                 continue;
   2506             }
   2507         }
   2508 
   2509 WORKER:
   2510         /* choose appropriate proc context based on proc_base_idx */
   2511         ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
   2512 
   2513         switch (s_job.i4_cmd)
   2514         {
   2515             case CMD_PROCESS:
   2516                 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
   2517                 ps_proc->i4_mb_x = s_job.i2_mb_x;
   2518                 ps_proc->i4_mb_y = s_job.i2_mb_y;
   2519 
   2520                 /* init process context */
   2521                 ih264e_init_proc_ctxt(ps_proc);
   2522 
   2523                 /* core code all mbs enlisted under the current job */
   2524                 error_status |= ih264e_process(ps_proc);
   2525                 break;
   2526 
   2527             case CMD_ENTROPY:
   2528                 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
   2529                 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
   2530                 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
   2531 
   2532                 /* init entropy */
   2533                 ih264e_init_entropy_ctxt(ps_proc);
   2534 
   2535                 /* entropy code all mbs enlisted under the current job */
   2536                 error_status |= ih264e_entropy(ps_proc);
   2537                 break;
   2538 
   2539             default:
   2540                 error_status |= IH264_FAIL;
   2541                 break;
   2542         }
   2543     }
   2544 
   2545     /* send error code */
   2546     ps_proc->i4_error_code = error_status;
   2547     return ret;
   2548 }
   2549