Home | History | Annotate | Download | only in encoder
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21 *******************************************************************************
     22 * @file
     23 *  ihevce_multi_thread_funcs.c
     24 *
     25 * @brief
     26 *  Contains functions related to Job Ques and others, required for multi threading
     27 *
     28 * @author
     29 *  Ittiam
     30 *
     31 * @par List of Functions:
     32 *  <TODO: TO BE ADDED>
     33 *
     34 * @remarks
     35 *  None
     36 *
     37 *******************************************************************************
     38 */
     39 /*****************************************************************************/
     40 /* File Includes                                                             */
     41 /*****************************************************************************/
     42 /* System include files */
     43 #include <stdio.h>
     44 #include <string.h>
     45 #include <stdlib.h>
     46 #include <assert.h>
     47 #include <stdarg.h>
     48 #include <math.h>
     49 
     50 /* User include files */
     51 #include "ihevc_typedefs.h"
     52 #include "itt_video_api.h"
     53 #include "ihevce_api.h"
     54 
     55 #include "rc_cntrl_param.h"
     56 #include "rc_frame_info_collector.h"
     57 #include "rc_look_ahead_params.h"
     58 
     59 #include "ihevc_defs.h"
     60 #include "ihevc_structs.h"
     61 #include "ihevc_platform_macros.h"
     62 #include "ihevc_deblk.h"
     63 #include "ihevc_itrans_recon.h"
     64 #include "ihevc_chroma_itrans_recon.h"
     65 #include "ihevc_chroma_intra_pred.h"
     66 #include "ihevc_intra_pred.h"
     67 #include "ihevc_inter_pred.h"
     68 #include "ihevc_mem_fns.h"
     69 #include "ihevc_padding.h"
     70 #include "ihevc_weighted_pred.h"
     71 #include "ihevc_sao.h"
     72 #include "ihevc_resi_trans.h"
     73 #include "ihevc_quant_iquant_ssd.h"
     74 #include "ihevc_cabac_tables.h"
     75 
     76 #include "ihevce_defs.h"
     77 #include "ihevce_lap_enc_structs.h"
     78 #include "ihevce_multi_thrd_structs.h"
     79 #include "ihevce_multi_thrd_funcs.h"
     80 #include "ihevce_me_common_defs.h"
     81 #include "ihevce_had_satd.h"
     82 #include "ihevce_error_codes.h"
     83 #include "ihevce_bitstream.h"
     84 #include "ihevce_cabac.h"
     85 #include "ihevce_rdoq_macros.h"
     86 #include "ihevce_function_selector.h"
     87 #include "ihevce_enc_structs.h"
     88 #include "ihevce_entropy_structs.h"
     89 #include "ihevce_cmn_utils_instr_set_router.h"
     90 #include "ihevce_enc_loop_structs.h"
     91 #include "ihevce_bs_compute_ctb.h"
     92 #include "ihevce_global_tables.h"
     93 #include "ihevce_dep_mngr_interface.h"
     94 #include "hme_datatype.h"
     95 #include "hme_interface.h"
     96 #include "hme_common_defs.h"
     97 #include "hme_defs.h"
     98 #include "ihevce_me_instr_set_router.h"
     99 #include "ihevce_ipe_instr_set_router.h"
    100 #include "ihevce_ipe_structs.h"
    101 #include "ihevce_coarse_me_pass.h"
    102 
    103 #include "cast_types.h"
    104 #include "osal.h"
    105 #include "osal_defaults.h"
    106 
    107 /********************************************************************/
    108 /*Macros                                                            */
    109 /********************************************************************/
    110 #define MULT_FACT 100
    111 
    112 /*****************************************************************************/
    113 /* Function Definitions                                                      */
    114 /*****************************************************************************/
    115 
    116 /**
    117 *******************************************************************************
    118 *
    119 * @brief Function Pops out the next Job in the appropriate Job Que
    120 *
    121 * @par Description: Does under mutex lock to ensure thread safe
    122 *
    123 * @param[inout] pv_multi_thrd_ctxt
    124 *  Pointer to Multi thread context
    125 *
    126 * @param[in] i4_job_type
    127 *   Job type from which a job needs to be popped out
    128 *
    129 * @param[in] i4_blocking_mode
    130 *   Mode of operation
    131 *
    132 * @returns
    133 *  None
    134 *
    135 * @remarks
    136 *
    137 *******************************************************************************
    138 */
    139 void *ihevce_pre_enc_grp_get_next_job(
    140     void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_ping_pong)
    141 {
    142     /* Local variables */
    143     multi_thrd_ctxt_t *ps_multi_thrd;
    144     job_queue_handle_t *ps_job_queue_hdl;
    145     void *pv_next = NULL;
    146     UWORD8 au1_in_dep_cmp[MAX_IN_DEP] = { 0 };
    147     void *pv_job_q_mutex_hdl_pre_enc = NULL;
    148 
    149     /* Derive local variables */
    150     ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
    151     ps_job_queue_hdl =
    152         (job_queue_handle_t *)&ps_multi_thrd->as_job_que_preenc_hdls[i4_ping_pong][i4_job_type];
    153 
    154     /* lock the mutex for Q access */
    155     /* As design must facilitate for parallelism in each stage,
    156     It is recommended to have seperate mutex for each stage*/
    157     if(i4_job_type < ME_JOB_LYR4)
    158     {
    159         pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_decomp;
    160     }
    161     else if(i4_job_type < IPE_JOB_LYR0)
    162     {
    163         pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_hme;
    164     }
    165     else
    166     {
    167         pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_l0ipe;
    168     }
    169 
    170     osal_mutex_lock(pv_job_q_mutex_hdl_pre_enc);
    171     /* Get the next */
    172     pv_next = ps_job_queue_hdl->pv_next;
    173 
    174     /* Update the next by checking input dependency */
    175     if(NULL != pv_next)
    176     {
    177         job_queue_t *ps_job_queue = (job_queue_t *)pv_next;
    178 
    179         /* check for input dependencies to be resolved            */
    180         /* this can be blocking or non blocking based on use case */
    181         /* if non blocking then the function returns NULL         */
    182 
    183         if(1 == i4_blocking_mode)
    184         {
    185             volatile WORD32 mem_diff;
    186             volatile UWORD8 *pu1_ref_buf = &au1_in_dep_cmp[0];
    187             volatile UWORD8 *pu1_curr_buf = &ps_job_queue->au1_in_dep[0];
    188 
    189             mem_diff = memcmp((void *)pu1_ref_buf, (void *)pu1_curr_buf, MAX_IN_DEP);
    190 
    191             /* wait until all dependency is resolved */
    192             while(0 != mem_diff)
    193             {
    194                 mem_diff = memcmp((void *)pu1_ref_buf, (void *)pu1_curr_buf, MAX_IN_DEP);
    195             }
    196 
    197             /* update the next job in the queue */
    198             ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
    199         }
    200         else
    201         {
    202             /* check for input dependency resolved */
    203             if((0 != memcmp(&au1_in_dep_cmp[0], &ps_job_queue->au1_in_dep[0], MAX_IN_DEP)))
    204             {
    205                 /* return null */
    206                 pv_next = NULL;
    207             }
    208             else
    209             {
    210                 /* update the next job in the queue */
    211                 ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
    212             }
    213         }
    214     }
    215 
    216     /* unlock the mutex */
    217     osal_mutex_unlock(pv_job_q_mutex_hdl_pre_enc);
    218 
    219     /* Return */
    220     return (pv_next);
    221 
    222 } /* End of get_next_job */
    223 
    224 /**
    225 *******************************************************************************
    226 *
    227 * @brief Function Pops out the next Job in the appropriate Job Que
    228 *
    229 * @par Description: Does under mutex lock to ensure thread safe
    230 *
    231 * @param[inout] pv_multi_thrd_ctxt
    232 *  Pointer to Multi thread context
    233 *
    234 * @param[in] i4_job_type
    235 *   Job type from which a job needs to be popped out
    236 *
    237 * @param[in] i4_blocking_mode
    238 *   Mode of operation
    239 *
    240 * @returns
    241 *  None
    242 *
    243 * @remarks
    244 *
    245 *******************************************************************************
    246 */
    247 void *ihevce_enc_grp_get_next_job(
    248     void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_curr_frm_id)
    249 {
    250     /* Local variables */
    251     multi_thrd_ctxt_t *ps_multi_thrd;
    252     job_queue_handle_t *ps_job_queue_hdl;
    253     void *pv_next = NULL;
    254     void *pv_job_q_mutex_hdl_enc_grp;
    255     UWORD8 au1_in_dep_cmp[MAX_IN_DEP] = { 0 };
    256 
    257     /* Derive local variables */
    258     ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
    259 
    260     if(ME_JOB_ENC_LYR == i4_job_type)
    261     {
    262         pv_job_q_mutex_hdl_enc_grp = ps_multi_thrd->pv_job_q_mutex_hdl_enc_grp_me;
    263 
    264         ps_job_queue_hdl = (job_queue_handle_t *)&ps_multi_thrd->aps_cur_out_me_prms[i4_curr_frm_id]
    265                                ->as_job_que_enc_hdls[i4_job_type];
    266     }
    267     else
    268     {
    269         pv_job_q_mutex_hdl_enc_grp = ps_multi_thrd->pv_job_q_mutex_hdl_enc_grp_enc_loop;
    270         ps_job_queue_hdl =
    271             (job_queue_handle_t *)&ps_multi_thrd->aps_cur_inp_enc_prms[i4_curr_frm_id]
    272                 ->as_job_que_enc_hdls[i4_job_type];
    273     }
    274 
    275     /* lock the mutex for Q access */
    276     osal_mutex_lock(pv_job_q_mutex_hdl_enc_grp);
    277 
    278     /* Get the next */
    279     pv_next = ps_job_queue_hdl->pv_next;
    280 
    281     /* Update the next by checking input dependency */
    282     if(NULL != pv_next)
    283     {
    284         job_queue_t *ps_job_queue = (job_queue_t *)pv_next;
    285 
    286         /* check for input dependencies to be resolved            */
    287         /* this can be blocking or non blocking based on use case */
    288         /* if non blocking then the function returns NULL         */
    289 
    290         if(1 == i4_blocking_mode)
    291         {
    292             volatile WORD32 mem_diff;
    293             volatile UWORD8 *pu1_ref_buf = &au1_in_dep_cmp[0];
    294             volatile UWORD8 *pu1_curr_buf = &ps_job_queue->au1_in_dep[0];
    295 
    296             mem_diff = memcmp((void *)pu1_ref_buf, (void *)pu1_curr_buf, MAX_IN_DEP);
    297 
    298             /* wait until all dependency is resolved */
    299             while(0 != mem_diff)
    300             {
    301                 mem_diff = memcmp((void *)pu1_ref_buf, (void *)pu1_curr_buf, MAX_IN_DEP);
    302             }
    303 
    304             /* update the next job in the queue */
    305             ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
    306         }
    307         else
    308         {
    309             /* check for input dependency resolved */
    310             if((0 != memcmp(&au1_in_dep_cmp[0], &ps_job_queue->au1_in_dep[0], MAX_IN_DEP)))
    311             {
    312                 /* return null */
    313                 pv_next = NULL;
    314             }
    315             else
    316             {
    317                 /* update the next job in the queue */
    318                 ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
    319             }
    320         }
    321     }
    322 
    323     /* unlock the mutex */
    324     osal_mutex_unlock(pv_job_q_mutex_hdl_enc_grp);
    325 
    326     /* Return */
    327     return (pv_next);
    328 
    329 } /* End of get_next_job */
    330 
    331 /**
    332 *******************************************************************************
    333 *
    334 * @brief Set the output dependency to done state
    335 *
    336 * @par Description: same as brief
    337 *
    338 * @param[inout] pv_multi_thrd_ctxt
    339 *  Pointer to Multi thread context
    340 *
    341 * @param[in] ps_curr_job
    342 *  Current finished Job pointer
    343 *
    344 * @returns
    345 *  None
    346 *
    347 * @remarks
    348 *
    349 *******************************************************************************
    350 */
    351 void ihevce_pre_enc_grp_job_set_out_dep(
    352     void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_ping_pong)
    353 {
    354     /* local vareiables */
    355     WORD32 ctr;
    356     multi_thrd_ctxt_t *ps_multi_thrd;
    357 
    358     ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
    359 
    360     /* loop over number output dependencies */
    361     for(ctr = 0; ctr < ps_curr_job->i4_num_output_dep; ctr++)
    362     {
    363         UWORD8 *pu1_ptr;
    364 
    365         pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_job_q_pre_enc[i4_ping_pong];
    366         pu1_ptr += ps_curr_job->au4_out_ofsts[ctr];
    367         *pu1_ptr = 0;
    368     }
    369 
    370     return;
    371 }
    372 
    373 /**
    374 *******************************************************************************
    375 *
    376 * @brief Set the output dependency to done state
    377 *
    378 * @par Description: same as brief
    379 *
    380 * @param[inout] pv_multi_thrd_ctxt
    381 *  Pointer to Multi thread context
    382 *
    383 * @param[in] ps_curr_job
    384 *   Current finished Job pointer
    385 *
    386 * @returns
    387 *  None
    388 *
    389 * @remarks
    390 *
    391 *******************************************************************************
    392 */
    393 void ihevce_enc_grp_job_set_out_dep(
    394     void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_curr_frm_id)
    395 {
    396     /* local vareiables */
    397     WORD32 ctr;
    398     UWORD8 *pu1_ptr;
    399     multi_thrd_ctxt_t *ps_multi_thrd;
    400 
    401     ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
    402 
    403     if(ME_JOB_ENC_LYR == ps_curr_job->i4_task_type)
    404     {
    405         pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc;
    406     }
    407     else
    408     {
    409         pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_cur_inp_enc_prms[i4_curr_frm_id]->ps_job_q_enc;
    410     }
    411 
    412     /* loop over number output dependencies */
    413     for(ctr = 0; ctr < ps_curr_job->i4_num_output_dep; ctr++)
    414     {
    415         WORD32 i4_off;
    416         i4_off = ps_curr_job->au4_out_ofsts[ctr];
    417         pu1_ptr[i4_off] = 0;
    418     }
    419 
    420     return;
    421 }
    422 
    423 /**
    424 *******************************************************************************
    425 *
    426 * @brief Function prepares the Job Queues for all the passes of encoder
    427 *
    428 * @par Description: Based on picture type sets the input and output dependency
    429 *
    430 * @param[inout] pv_enc_ctxt
    431 *  Pointer to encoder context
    432 *
    433 * @param[in] ps_curr_inp
    434 *  Current Input buffer pointer
    435 *
    436 * @returns
    437 *  None
    438 *
    439 * @remarks
    440 *
    441 *******************************************************************************
    442 */
    443 void ihevce_prepare_job_queue(
    444     void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_curr_frm_id)
    445 {
    446     /* local variables */
    447     enc_ctxt_t *ps_ctxt;
    448     job_queue_t *ps_me_job_queue_lyr0;
    449     job_queue_t *ps_enc_loop_job_queue;
    450     WORD32 pass;
    451     WORD32 num_jobs, col_tile_ctr;
    452     WORD32 num_ctb_vert_rows;
    453     WORD32 i4_pic_type;
    454     WORD32 i;  //counter for bitrate
    455     WORD32 i4_num_bitrate_instances;
    456     WORD32 i4_num_tile_col;
    457 
    458     /* derive local varaibles */
    459     ps_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
    460     num_ctb_vert_rows = ps_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert;
    461     i4_num_bitrate_instances = ps_ctxt->i4_num_bitrates;
    462 
    463     i4_num_tile_col = 1;
    464     if(1 == ps_ctxt->ps_tile_params_base->i4_tiles_enabled_flag)
    465     {
    466         i4_num_tile_col = ps_ctxt->ps_tile_params_base->i4_num_tile_cols;
    467     }
    468     /* memset the entire job que buffer to zero */
    469     memset(
    470         ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc,
    471         0,
    472         MAX_NUM_VERT_UNITS_FRM * NUM_ENC_JOBS_QUES * i4_num_tile_col * sizeof(job_queue_t));
    473 
    474     /* get the start address of  Job queues */
    475     ps_me_job_queue_lyr0 = ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc;
    476     ps_enc_loop_job_queue = ps_me_job_queue_lyr0 + (i4_num_tile_col * MAX_NUM_VERT_UNITS_FRM);
    477 
    478     /* store the JOB queue in the Job handle */
    479     ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
    480         ->as_job_que_enc_hdls[ME_JOB_ENC_LYR]
    481         .pv_next = (void *)ps_me_job_queue_lyr0;
    482     /* store the JOB queue in the Job handle for reenc */
    483     ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
    484         ->as_job_que_enc_hdls_reenc[ME_JOB_ENC_LYR]
    485         .pv_next = (void *)ps_me_job_queue_lyr0;
    486 
    487     for(i = 0; i < i4_num_bitrate_instances; i++)
    488     {
    489         ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
    490             ->as_job_que_enc_hdls[ENC_LOOP_JOB + i]
    491             .pv_next = (void *)ps_enc_loop_job_queue;
    492         ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
    493             ->as_job_que_enc_hdls_reenc[ENC_LOOP_JOB + i]
    494             .pv_next = (void *)ps_enc_loop_job_queue;
    495         ps_enc_loop_job_queue += (i4_num_tile_col * MAX_NUM_VERT_UNITS_FRM);
    496     }
    497 
    498     i4_pic_type = ps_curr_inp->s_lap_out.i4_pic_type;
    499 
    500     //prepare ME JOB queue first
    501     //for(pass = 0; pass < NUM_ENC_JOBS_QUES; pass++)
    502     {
    503         job_queue_t *ps_job_queue_curr;
    504         job_queue_t *ps_job_queue_next;
    505         WORD32 ctr;
    506         WORD32 inp_dep;
    507         WORD32 out_dep;
    508         WORD32 num_vert_units;
    509         HEVCE_ENC_JOB_TYPES_T task_type;
    510 
    511         pass = 0;  //= ENC_LOOP_JOB
    512 
    513         {
    514             /* num_ver_units of finest layer is stored at (num_hme_lyrs - 1)th index */
    515             num_vert_units = num_ctb_vert_rows;
    516             task_type = ME_JOB_ENC_LYR;
    517             ps_job_queue_curr = ps_me_job_queue_lyr0;
    518             ps_job_queue_next =
    519                 (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
    520                     ->as_job_que_enc_hdls[ENC_LOOP_JOB]
    521                     .pv_next;
    522             inp_dep = 0;
    523             out_dep = 1;  //set reference bit-rate's input dependency
    524         }
    525 
    526         if((ME_JOB_ENC_LYR == pass) &&
    527            ((IV_I_FRAME == i4_pic_type) || (IV_IDR_FRAME == i4_pic_type)) && !L0ME_IN_OPENLOOP_MODE)
    528         {
    529             //continue;
    530         }
    531         else
    532         {
    533             /* loop over all the vertical rows */
    534             for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++)
    535             {
    536                 /* loop over all the column tiles */
    537                 for(col_tile_ctr = 0; col_tile_ctr < i4_num_tile_col; col_tile_ctr++)
    538                 {
    539                     ULWORD64 u8_temp;
    540 
    541                     {
    542                         ps_job_queue_curr->s_job_info.s_me_job_info.i4_vert_unit_row_no = num_jobs;
    543                         ps_job_queue_curr->s_job_info.s_me_job_info.i4_tile_col_idx = col_tile_ctr;
    544                     }
    545 
    546                     ps_job_queue_curr->pv_next = (void *)(ps_job_queue_curr + 1);
    547 
    548                     ps_job_queue_curr->i4_task_type = task_type;
    549 
    550                     ps_job_queue_curr->i4_num_input_dep = inp_dep;
    551 
    552                     /* set the entire input dep buffer to default value 0 */
    553                     memset(&ps_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP);
    554 
    555                     /* set the input dep buffer to 1 for num inp dep */
    556                     if(0 != inp_dep)
    557                     {
    558                         memset(&ps_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep);
    559                     }
    560 
    561                     ps_job_queue_curr->i4_num_output_dep = out_dep;
    562 
    563                     /* set the entire offset buffer to default value */
    564                     memset(
    565                         &ps_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP);
    566 
    567                     for(ctr = 0; ctr < out_dep; ctr++)
    568                     {
    569                         /* col tile level dependency b/w ME & EncLoop */
    570                         u8_temp = (ULWORD64)(
    571                             &ps_job_queue_next[num_jobs * i4_num_tile_col + col_tile_ctr] -
    572                             ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc);
    573 
    574                         u8_temp *= sizeof(job_queue_t);
    575 
    576                         /* store the offset to the array */
    577                         ps_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp;
    578                     }
    579 
    580                     ps_job_queue_curr++;
    581                 }
    582             }  //for ends
    583 
    584             /* set the last pointer to NULL */
    585             ps_job_queue_curr--;
    586             ps_job_queue_curr->pv_next = (void *)NULL;
    587         }  //else ends
    588     }
    589 
    590     //prepare Enc_loop JOB queue for all bitrate instances
    591     //for(pass = 0; pass < NUM_ENC_JOBS_QUES; pass++)
    592     for(i = 0; i < i4_num_bitrate_instances; i++)
    593     {
    594         job_queue_t *ps_job_queue_curr;
    595         job_queue_t *ps_job_queue_next;
    596         WORD32 ctr;
    597         WORD32 inp_dep;
    598         WORD32 out_dep;
    599         WORD32 num_vert_units;
    600         HEVCE_ENC_JOB_TYPES_T task_type;
    601 
    602         /* In case of I or IDR pictures ME will not perform any processing */
    603         //if(ENC_LOOP_JOB == pass)
    604         {
    605             if(((IV_I_FRAME == i4_pic_type) || (IV_IDR_FRAME == i4_pic_type)) &&
    606                !L0ME_IN_OPENLOOP_MODE)
    607             {
    608                 inp_dep = 0;
    609             }
    610             else
    611             {
    612                 inp_dep = 1;
    613             }
    614 
    615             task_type = (HEVCE_ENC_JOB_TYPES_T)(ENC_LOOP_JOB + i);
    616             ps_job_queue_curr =
    617                 (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
    618                     ->as_job_que_enc_hdls[ENC_LOOP_JOB + i]
    619                     .pv_next;
    620             ps_job_queue_next =
    621                 (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
    622                     ->as_job_que_enc_hdls[ENC_LOOP_JOB + i + 1]
    623                     .pv_next;
    624             out_dep = 1;  //output dependecny is the next bit-rate instance's input dependency
    625             num_vert_units = num_ctb_vert_rows;
    626 
    627             if(i == i4_num_bitrate_instances - 1)  //for last bit-rate instance
    628             {
    629                 //clear output dependency
    630                 ps_job_queue_next = NULL;
    631                 out_dep = 0;
    632             }
    633         }
    634 
    635         /* loop over all the vertical rows */
    636         for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++)
    637         {
    638             /* loop over all the column tiles */
    639             for(col_tile_ctr = 0; col_tile_ctr < i4_num_tile_col; col_tile_ctr++)
    640             {
    641                 ULWORD64 u8_temp;
    642 
    643                 {
    644                     ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_ctb_row_no = num_jobs;
    645                     ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_tile_col_idx =
    646                         col_tile_ctr;
    647                     ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_bitrate_instance_no = i;
    648                 }
    649 
    650                 ps_job_queue_curr->pv_next = (void *)(ps_job_queue_curr + 1);
    651 
    652                 ps_job_queue_curr->i4_task_type = task_type;
    653 
    654                 ps_job_queue_curr->i4_num_input_dep = inp_dep;
    655 
    656                 /* set the entire input dep buffer to default value 0 */
    657                 memset(&ps_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP);
    658 
    659                 /* set the input dep buffer to 1 for num inp dep */
    660                 if(0 != inp_dep)
    661                 {
    662                     memset(&ps_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep);
    663                 }
    664 
    665                 ps_job_queue_curr->i4_num_output_dep = out_dep;
    666 
    667                 /* set the entire offset buffer to default value */
    668                 memset(&ps_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP);
    669 
    670                 for(ctr = 0; ctr < out_dep; ctr++)
    671                 {
    672                     /* col tile level dependency b/w EncLoops of MBR */
    673                     u8_temp = (ULWORD64)(
    674                         &ps_job_queue_next[num_jobs * i4_num_tile_col + col_tile_ctr] -
    675                         ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc);
    676 
    677                     u8_temp *= sizeof(job_queue_t);
    678 
    679                     /* store the offset to the array */
    680                     ps_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp;
    681                 }
    682 
    683                 ps_job_queue_curr++;
    684             }
    685         }
    686 
    687         /* set the last pointer to NULL */
    688         ps_job_queue_curr--;
    689         ps_job_queue_curr->pv_next = (void *)NULL;
    690     }
    691 
    692     return;
    693 
    694 } /* End of ihevce_prepare_job_queue */
    695 
    696 /**
    697 *******************************************************************************
    698 *
    699 * @brief Function prepares the Job Queues for all the passes of pre enc
    700 *
    701 * @par Description: Based on picture type sets the input and output dependency
    702 *
    703 * @param[inout] pv_enc_ctxt
    704 *  Pointer to encoder context
    705 *
    706 * @param[in] ps_curr_inp
    707 *   Current Input buffer pointer
    708 *
    709 * @returns
    710 *  None
    711 *
    712 * @remarks
    713 *
    714 *******************************************************************************
    715 */
    716 void ihevce_prepare_pre_enc_job_queue(
    717     void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_ping_pong)
    718 {
    719     /* local variables */
    720     enc_ctxt_t *ps_ctxt;
    721     job_queue_t *ps_decomp_job_queue_lyr0;
    722     job_queue_t *ps_decomp_job_queue_lyr1;
    723     job_queue_t *ps_decomp_job_queue_lyr2;
    724     job_queue_t *ps_decomp_job_queue_lyr3;
    725     job_queue_t *ps_me_job_queue_lyr1;
    726     job_queue_t *ps_me_job_queue_lyr2;
    727     job_queue_t *ps_me_job_queue_lyr3;
    728     job_queue_t *ps_me_job_queue_lyr4;
    729     job_queue_t *ps_ipe_job_queue;
    730     job_queue_t *aps_me_job_queues[MAX_NUM_HME_LAYERS];
    731     multi_thrd_me_job_q_prms_t *ps_me_job_q_prms;
    732     WORD32 ai4_decomp_num_vert_units_lyr[MAX_NUM_HME_LAYERS];
    733     WORD32 a14_decomp_lyr_unit_size[MAX_NUM_HME_LAYERS];
    734     WORD32 layer_no;
    735     WORD32 decomp_lyr_cnt;
    736     WORD32 num_jobs;
    737     WORD32 n_tot_layers;
    738     WORD32 a_wd[MAX_NUM_HME_LAYERS];
    739     WORD32 a_ht[MAX_NUM_HME_LAYERS];
    740     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS];
    741     WORD32 a_disp_ht[MAX_NUM_HME_LAYERS];
    742     WORD32 u4_log_ctb_size;
    743     WORD32 num_ctb_vert_rows;
    744     WORD32 pass;
    745     WORD32 me_lyr_cnt;
    746     WORD32 num_hme_lyrs;
    747     WORD32 ai4_me_num_vert_units_lyr[MAX_NUM_HME_LAYERS];
    748     WORD32 me_start_lyr_pass;
    749     WORD32 ctb_size;
    750     WORD32 me_coarsest_lyr_inp_dep = -1;
    751 
    752     (void)ps_curr_inp;
    753     /* derive local varaibles */
    754     ps_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
    755     num_ctb_vert_rows = ps_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert;
    756 
    757     /* CHANGE REQUIRED: change the pointer to the job queue buffer */
    758     /* memset the entire job que buffer to zero */
    759     memset(
    760         ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong],
    761         0,
    762         MAX_NUM_VERT_UNITS_FRM * NUM_PRE_ENC_JOBS_QUES * sizeof(job_queue_t));
    763 
    764     /* Get the number of vertical units in a layer from the resolution of the layer */
    765     a_wd[0] = ps_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd;
    766     a_ht[0] = ps_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht;
    767     n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
    768     GETRANGE(u4_log_ctb_size, ps_ctxt->s_frm_ctb_prms.i4_ctb_size);
    769 
    770     ASSERT(n_tot_layers >= 3);
    771 
    772     /*
    773     * Always force minimum layers as 4 so that we would have both l1 and l2
    774     * pre intra analysis
    775     */
    776     if(n_tot_layers == 3)
    777     {
    778         n_tot_layers = 4;
    779         a_wd[3] = CEIL16(a_wd[2] >> 1);
    780         a_ht[3] = CEIL16(a_ht[2] >> 1);
    781     }
    782 
    783     for(layer_no = 0; layer_no < n_tot_layers; layer_no++)
    784     {
    785         ctb_size = 1 << (u4_log_ctb_size - 1 - layer_no);
    786         ai4_decomp_num_vert_units_lyr[layer_no] = ((a_ht[layer_no] + ctb_size) & ~(ctb_size - 1)) >>
    787                                                   (u4_log_ctb_size - 1 - layer_no);
    788         a14_decomp_lyr_unit_size[layer_no] = 1 << (u4_log_ctb_size - 1 - layer_no);
    789     }
    790 
    791     /* get the start address of  Job queues */
    792     ps_decomp_job_queue_lyr0 = ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong];
    793     ps_decomp_job_queue_lyr1 = ps_decomp_job_queue_lyr0 + MAX_NUM_VERT_UNITS_FRM;
    794     ps_decomp_job_queue_lyr2 = ps_decomp_job_queue_lyr1 + MAX_NUM_VERT_UNITS_FRM;
    795     ps_decomp_job_queue_lyr3 = ps_decomp_job_queue_lyr2 + MAX_NUM_VERT_UNITS_FRM;
    796     ps_me_job_queue_lyr4 = ps_decomp_job_queue_lyr3 + MAX_NUM_VERT_UNITS_FRM;
    797     ps_me_job_queue_lyr3 = ps_me_job_queue_lyr4 + MAX_NUM_VERT_UNITS_FRM;
    798     ps_me_job_queue_lyr2 = ps_me_job_queue_lyr3 + MAX_NUM_VERT_UNITS_FRM;
    799     ps_me_job_queue_lyr1 = ps_me_job_queue_lyr2 + MAX_NUM_VERT_UNITS_FRM;
    800 
    801     ps_ipe_job_queue = ps_me_job_queue_lyr1 + MAX_NUM_VERT_UNITS_FRM;
    802 
    803     /* store the JOB queue in the Job handle */
    804     ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR0].pv_next =
    805         (void *)ps_decomp_job_queue_lyr0;
    806     ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR1].pv_next =
    807         (void *)ps_decomp_job_queue_lyr1;
    808     ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR2].pv_next =
    809         (void *)ps_decomp_job_queue_lyr2;
    810     ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR3].pv_next =
    811         (void *)ps_decomp_job_queue_lyr3;
    812     ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR4].pv_next =
    813         (void *)ps_me_job_queue_lyr4;
    814     ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR3].pv_next =
    815         (void *)ps_me_job_queue_lyr3;
    816     ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR2].pv_next =
    817         (void *)ps_me_job_queue_lyr2;
    818     ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR1].pv_next =
    819         (void *)ps_me_job_queue_lyr1;
    820     ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][IPE_JOB_LYR0].pv_next =
    821         (void *)ps_ipe_job_queue;
    822 
    823     /* store the ME Jobs que into array */
    824     aps_me_job_queues[0] = NULL;
    825     aps_me_job_queues[1] = ps_me_job_queue_lyr1;
    826     aps_me_job_queues[2] = ps_me_job_queue_lyr2;
    827     aps_me_job_queues[3] = ps_me_job_queue_lyr3;
    828     aps_me_job_queues[4] = ps_me_job_queue_lyr4;
    829     decomp_lyr_cnt = 0;
    830     /* Set the me_lyr_cnt to 0  */
    831     me_lyr_cnt = 0;
    832 
    833     /* call the ME function which returns the layer properties */
    834     ihevce_coarse_me_get_lyr_prms_job_que(
    835         ps_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
    836         ps_curr_inp,
    837         &num_hme_lyrs,
    838         &ai4_me_num_vert_units_lyr[0],
    839         &ps_ctxt->s_multi_thrd.as_me_job_q_prms[0][0]);
    840 
    841     ps_me_job_q_prms = &ps_ctxt->s_multi_thrd.as_me_job_q_prms[0][0];
    842 
    843     /* derive ME coarsest layer tak type */
    844     me_start_lyr_pass = ME_JOB_LYR4 + (MAX_NUM_HME_LAYERS - num_hme_lyrs);
    845 
    846     ps_ctxt->s_multi_thrd.i4_me_coarsest_lyr_type = me_start_lyr_pass;
    847 
    848     /* coarsest HME layer number of units should be less than or equal to max in dep in Job queue */
    849     /* this constraint is to take care of Coarsest layer requring entire layer to do FULL search */
    850     ASSERT(ai4_me_num_vert_units_lyr[0] <= MAX_IN_DEP);
    851     /* loop over all the passes in the encoder */
    852     for(pass = 0; pass < NUM_PRE_ENC_JOBS_QUES; pass++)
    853     {
    854         job_queue_t *ps_pre_enc_job_queue_curr;
    855         job_queue_t *ps_pre_enc_job_queue_next;
    856         WORD32 inp_dep_pass;
    857         WORD32 out_dep_pass;
    858         WORD32 num_vert_units;
    859         HEVCE_PRE_ENC_JOB_TYPES_T pre_enc_task_type;
    860         HEVCE_ENC_JOB_TYPES_T enc_task_type;
    861         WORD32 proc_valid_flag = 0;
    862 
    863         // num_vert_units = ai4_decomp_num_vert_units_lyr[decomp_lyr_cnt];
    864         /* Initializing the job queues for max no of rows among all the layers. And max would be for last layer*/
    865         num_vert_units = ai4_decomp_num_vert_units_lyr[n_tot_layers - 1];
    866 
    867         if(DECOMP_JOB_LYR0 == pass)
    868         {
    869             proc_valid_flag = 1;
    870             pre_enc_task_type = DECOMP_JOB_LYR0;
    871             enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
    872             ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr0;
    873 
    874             inp_dep_pass = 0;
    875             decomp_lyr_cnt++;
    876 
    877             /* If all the decomp layers are done next job queue will be ME job queue */
    878             if(decomp_lyr_cnt == (n_tot_layers - 1))
    879             {
    880                 /* Assumption : num_hme_lyrs > 1*/
    881                 ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1];
    882 
    883                 /* ME coarsest layer is currently made dependent on entire decomp layer */
    884                 out_dep_pass = ai4_me_num_vert_units_lyr[0];
    885                 me_coarsest_lyr_inp_dep = num_vert_units;
    886             }
    887             else
    888             {
    889                 ps_pre_enc_job_queue_next = ps_decomp_job_queue_lyr1;
    890                 out_dep_pass = 3;
    891             }
    892         }
    893         else if((DECOMP_JOB_LYR1 == pass) && (decomp_lyr_cnt != (n_tot_layers - 1)))
    894         {
    895             proc_valid_flag = 1;
    896             pre_enc_task_type = DECOMP_JOB_LYR1;
    897             enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
    898             ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr1;
    899 
    900             inp_dep_pass = 3;
    901             decomp_lyr_cnt++;
    902 
    903             /* If all the decomp layers are done next job queue will be ME job queue */
    904             if(decomp_lyr_cnt == (n_tot_layers - 1))
    905             {
    906                 /* Assumption : num_hme_lyrs > 1*/
    907                 ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1];
    908 
    909                 /* ME coarsest layer is currently made dependent on entire decomp layer */
    910                 out_dep_pass = ai4_me_num_vert_units_lyr[0];
    911                 me_coarsest_lyr_inp_dep = num_vert_units;
    912             }
    913             else
    914             {
    915                 ps_pre_enc_job_queue_next = ps_decomp_job_queue_lyr2;
    916                 out_dep_pass = 3;
    917             }
    918         }
    919         else if((DECOMP_JOB_LYR2 == pass) && (decomp_lyr_cnt != (n_tot_layers - 1)))
    920         {
    921             proc_valid_flag = 1;
    922             pre_enc_task_type = DECOMP_JOB_LYR2;
    923             enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
    924             ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr2;
    925 
    926             inp_dep_pass = 3;
    927             decomp_lyr_cnt++;
    928 
    929             /* If all the decomp layers are done next job queue will be ME job queue */
    930             if(decomp_lyr_cnt == (n_tot_layers - 1))
    931             {
    932                 /* Assumption : num_hme_lyrs > 1*/
    933                 ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1];
    934 
    935                 /* ME coarsest layer is currently made dependent on entire decomp layer */
    936                 out_dep_pass = ai4_me_num_vert_units_lyr[0];
    937                 me_coarsest_lyr_inp_dep = num_vert_units;
    938             }
    939             else
    940             {
    941                 /* right now MAX 4 layers worth of JOB queues are prepared */
    942                 ASSERT(0);
    943             }
    944         }
    945 
    946         else if(IPE_JOB_LYR0 == pass)
    947         {
    948             proc_valid_flag = 1;
    949             pre_enc_task_type = IPE_JOB_LYR0;
    950             enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
    951             ps_pre_enc_job_queue_curr = ps_ipe_job_queue;
    952             ps_pre_enc_job_queue_next = NULL;
    953             num_vert_units = num_ctb_vert_rows;
    954         }
    955         else if(((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1)) && (pass >= me_start_lyr_pass))
    956         {
    957             /* num_ver_units of coarsest layer is stored at 0th index */
    958             num_vert_units = ai4_me_num_vert_units_lyr[me_lyr_cnt];
    959             proc_valid_flag = 1;
    960 
    961             pre_enc_task_type =
    962                 (HEVCE_PRE_ENC_JOB_TYPES_T)((WORD32)ME_JOB_LYR1 - (num_hme_lyrs - me_lyr_cnt - 2));
    963 
    964             enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
    965 
    966             /* Assumption : num_hme_lyrs > 1*/
    967             ps_pre_enc_job_queue_curr = aps_me_job_queues[num_hme_lyrs - me_lyr_cnt - 1];
    968 
    969             if(me_lyr_cnt == (num_hme_lyrs - 2))
    970             {
    971                 ps_pre_enc_job_queue_next = ps_ipe_job_queue;
    972             }
    973             else
    974             {
    975                 ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - me_lyr_cnt - 2];
    976             }
    977             me_lyr_cnt++;
    978         }
    979 
    980         /* check for valid processing flag */
    981         if(0 == proc_valid_flag)
    982         {
    983             continue;
    984         }
    985 
    986         /* in the loop ps_me_job_q_prms get incremented for every row */
    987         /* so at the end of one layer the pointer will be correctly   */
    988         /* pointing to the start of next layer                        */
    989 
    990         /* loop over all the vertical rows */
    991         for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++)
    992         {
    993             ULWORD64 u8_temp;
    994             WORD32 inp_dep = 0;
    995             WORD32 out_dep = 0;
    996             WORD32 ctr;
    997             WORD32 job_off_ipe;
    998 
    999             if(IPE_JOB_LYR0 == pass)
   1000             {
   1001                 ps_pre_enc_job_queue_curr->s_job_info.s_ipe_job_info.i4_ctb_row_no = num_jobs;
   1002                 inp_dep = ps_me_job_q_prms->i4_num_inp_dep;
   1003                 out_dep = 0;
   1004             }
   1005             else if((pass >= DECOMP_JOB_LYR0) && (pass <= DECOMP_JOB_LYR3))
   1006             {
   1007                 ps_pre_enc_job_queue_curr->s_job_info.s_decomp_job_info.i4_vert_unit_row_no =
   1008                     num_jobs;
   1009 
   1010                 /* Input and output dependencies of 1st row and last row is 1 less than other rows*/
   1011                 inp_dep = inp_dep_pass;
   1012                 out_dep = out_dep_pass;
   1013 
   1014                 if(pass != DECOMP_JOB_LYR0)
   1015                 {
   1016                     if(((num_jobs == 0) || (num_jobs == num_vert_units - 1)))
   1017                     {
   1018                         inp_dep = inp_dep_pass - 1;
   1019                     }
   1020                 }
   1021 
   1022                 if(pass != (DECOMP_JOB_LYR0 + n_tot_layers - 2))
   1023                 {
   1024                     if(((num_jobs == 0) || (num_jobs == num_vert_units - 1)))
   1025                     {
   1026                         out_dep = out_dep_pass - 1;
   1027                     }
   1028                 }
   1029             }
   1030             else /* remaining all are ME JOBS */
   1031             {
   1032                 ps_pre_enc_job_queue_curr->s_job_info.s_me_job_info.i4_vert_unit_row_no = num_jobs;
   1033 
   1034                 if(pass == me_start_lyr_pass)
   1035                 {
   1036                     ASSERT(me_coarsest_lyr_inp_dep != -1);
   1037                     inp_dep = me_coarsest_lyr_inp_dep;
   1038                 }
   1039                 else
   1040                 {
   1041                     inp_dep = ps_me_job_q_prms->i4_num_inp_dep;
   1042                 }
   1043                 out_dep = ps_me_job_q_prms->i4_num_output_dep;
   1044             }
   1045             ps_pre_enc_job_queue_curr->pv_next = (void *)(ps_pre_enc_job_queue_curr + 1);
   1046 
   1047             ps_pre_enc_job_queue_curr->i4_pre_enc_task_type = pre_enc_task_type;
   1048             ps_pre_enc_job_queue_curr->i4_task_type = enc_task_type;
   1049 
   1050             /* Set the input dependencies */
   1051             ps_pre_enc_job_queue_curr->i4_num_input_dep = inp_dep;
   1052 
   1053             /* set the entire input dep buffer to default value 0 */
   1054             memset(&ps_pre_enc_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP);
   1055 
   1056             /* set the input dep buffer to 1 for num inp dep */
   1057             if(0 != inp_dep)
   1058             {
   1059                 memset(&ps_pre_enc_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep);
   1060             }
   1061 
   1062             /* If decomposition layer ends at this pass the no of out dependencies
   1063             * will be based on number of vertical units in the coarsets layer of HME
   1064             * This is because the search range in coarsest layer will be almost
   1065             * entire frame (search range of +-128 in vert direction is max supported
   1066             */
   1067             if(pass == (DECOMP_JOB_LYR0 + n_tot_layers - 2))
   1068             {
   1069                 job_off_ipe = 0;
   1070             }
   1071             else
   1072             {
   1073                 if(num_jobs == 0)
   1074                     job_off_ipe = num_jobs;
   1075 
   1076                 else
   1077                     job_off_ipe = num_jobs - 1;
   1078             }
   1079 
   1080             /* Set the offsets of output dependencies */
   1081             ps_pre_enc_job_queue_curr->i4_num_output_dep = out_dep;
   1082 
   1083             /* set the entire offset buffer to default value */
   1084             memset(
   1085                 &ps_pre_enc_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP);
   1086 
   1087             for(ctr = 0; ctr < out_dep; ctr++)
   1088             {
   1089                 /* if IPE or DECOMP loop the dep is 1 to 1*/
   1090                 if(((pass >= DECOMP_JOB_LYR0) && (pass <= DECOMP_JOB_LYR3)) ||
   1091                    (IPE_JOB_LYR0 == pass))
   1092                 {
   1093                     u8_temp = (ULWORD64)(
   1094                         &ps_pre_enc_job_queue_next[job_off_ipe] -
   1095                         ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong]);
   1096 
   1097                     u8_temp *= sizeof(job_queue_t);
   1098 
   1099                     /* add the excat inp dep byte for the next layer JOB */
   1100                     u8_temp += ps_pre_enc_job_queue_next[job_off_ipe].i4_num_input_dep;
   1101 
   1102                     /* increment the inp dep number for a given job */
   1103                     ps_pre_enc_job_queue_next[job_off_ipe].i4_num_input_dep++;
   1104 
   1105                     job_off_ipe++;
   1106                 }
   1107                 else if((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1))
   1108                 {
   1109                     /* ME layer Jobs */
   1110                     WORD32 job_off;
   1111 
   1112                     job_off = ps_me_job_q_prms->ai4_out_dep_unit_off[ctr];
   1113 
   1114                     u8_temp = (ULWORD64)(
   1115                         &ps_pre_enc_job_queue_next[job_off] -
   1116                         ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong]);
   1117 
   1118                     u8_temp *= sizeof(job_queue_t);
   1119 
   1120                     /* add the excat inp dep byte for the next layer JOB */
   1121                     u8_temp += ps_pre_enc_job_queue_next[job_off].i4_num_input_dep;
   1122 
   1123                     /* increment the inp dep number for a given job */
   1124                     ps_pre_enc_job_queue_next[job_off].i4_num_input_dep++;
   1125                 }
   1126                 /* store the offset to the array */
   1127                 ps_pre_enc_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp;
   1128             }
   1129             /* ME job q params is incremented only for ME jobs */
   1130             if(((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1)) || (IPE_JOB_LYR0 == pass))
   1131             {
   1132                 ps_me_job_q_prms++;
   1133             }
   1134             ps_pre_enc_job_queue_curr++;
   1135         }
   1136 
   1137         /* set the last pointer to NULL */
   1138         ps_pre_enc_job_queue_curr--;
   1139         ps_pre_enc_job_queue_curr->pv_next = (void *)NULL;
   1140     }
   1141 
   1142     /* reset the num ctb processed in every row  for IPE sync */
   1143     memset(
   1144         &ps_ctxt->s_multi_thrd.ai4_ctbs_in_row_proc_ipe_pass[0],
   1145         0,
   1146         (MAX_NUM_CTB_ROWS_FRM * sizeof(WORD32)));
   1147 
   1148 } /* End of ihevce_prepare_pre_enc_job_queue */
   1149