1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ******************************************************************************* 23 * @file 24 * ih264e_process.c 25 * 26 * @brief 27 * Contains functions for codec thread 28 * 29 * @author 30 * Harish 31 * 32 * @par List of Functions: 33 * - ih264e_generate_sps_pps() 34 * - ih264e_init_entropy_ctxt() 35 * - ih264e_entropy() 36 * - ih264e_pack_header_data() 37 * - ih264e_update_proc_ctxt() 38 * - ih264e_init_proc_ctxt() 39 * - ih264e_pad_recon_buffer() 40 * - ih264e_dblk_pad_hpel_processing_n_mbs() 41 * - ih264e_process() 42 * - ih264e_set_rc_pic_params() 43 * - ih264e_update_rc_post_enc() 44 * - ih264e_process_thread() 45 * 46 * @remarks 47 * None 48 * 49 ******************************************************************************* 50 */ 51 52 /*****************************************************************************/ 53 /* File Includes */ 54 /*****************************************************************************/ 55 56 /* System include files */ 57 #include <stdio.h> 58 #include <stddef.h> 59 #include <stdlib.h> 60 #include <string.h> 61 #include <limits.h> 62 #include <assert.h> 63 64 /* User include files */ 65 #include "ih264_typedefs.h" 66 #include "iv2.h" 67 #include "ive2.h" 68 #include "ih264_defs.h" 69 #include "ih264_debug.h" 70 #include "ime_distortion_metrics.h" 71 #include "ime_defs.h" 72 #include "ime_structs.h" 73 #include "ih264_error.h" 74 #include "ih264_structs.h" 75 #include "ih264_trans_quant_itrans_iquant.h" 76 #include "ih264_inter_pred_filters.h" 77 #include "ih264_mem_fns.h" 78 #include "ih264_padding.h" 79 #include "ih264_intra_pred_filters.h" 80 #include "ih264_deblk_edge_filters.h" 81 #include "ih264_cabac_tables.h" 82 #include "ih264_platform_macros.h" 83 #include "ih264_macros.h" 84 #include "ih264_buf_mgr.h" 85 #include "ih264e_error.h" 86 #include "ih264e_bitstream.h" 87 #include "ih264_common_tables.h" 88 #include "ih264_list.h" 89 #include "ih264e_defs.h" 90 #include "irc_cntrl_param.h" 91 #include "irc_frame_info_collector.h" 92 #include "ih264e_rate_control.h" 93 #include "ih264e_cabac_structs.h" 94 #include "ih264e_structs.h" 95 #include "ih264e_cabac.h" 96 #include "ih264e_process.h" 97 #include "ithread.h" 98 #include "ih264e_intra_modes_eval.h" 99 #include "ih264e_encode_header.h" 100 #include "ih264e_globals.h" 101 #include "ih264e_config.h" 102 #include "ih264e_trace.h" 103 #include "ih264e_statistics.h" 104 #include "ih264_cavlc_tables.h" 105 #include "ih264e_cavlc.h" 106 #include "ih264e_deblk.h" 107 #include "ih264e_me.h" 108 #include "ih264e_debug.h" 109 #include "ih264e_master.h" 110 #include "ih264e_utils.h" 111 #include "irc_mem_req_and_acq.h" 112 #include "irc_rate_control_api.h" 113 #include "ih264e_platform_macros.h" 114 #include "ime_statistics.h" 115 116 117 /*****************************************************************************/ 118 /* Function Definitions */ 119 /*****************************************************************************/ 120 121 /** 122 ****************************************************************************** 123 * 124 * @brief This function generates sps, pps set on request 125 * 126 * @par Description 127 * When the encoder is set in header generation mode, the following function 128 * is called. This generates sps and pps headers and returns the control back 129 * to caller. 130 * 131 * @param[in] ps_codec 132 * pointer to codec context 133 * 134 * @return success or failure error code 135 * 136 ****************************************************************************** 137 */ 138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec) 139 { 140 /* choose between ping-pong process buffer set */ 141 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; 142 143 /* entropy ctxt */ 144 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy; 145 146 /* Bitstream structure */ 147 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 148 149 /* sps */ 150 sps_t *ps_sps = NULL; 151 152 /* pps */ 153 pps_t *ps_pps = NULL; 154 155 /* output buff */ 156 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel]; 157 158 159 /********************************************************************/ 160 /* initialize the bit stream buffer */ 161 /********************************************************************/ 162 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize); 163 164 /********************************************************************/ 165 /* BEGIN HEADER GENERATION */ 166 /********************************************************************/ 167 /*ps_codec->i4_pps_id ++;*/ 168 ps_codec->i4_pps_id %= MAX_PPS_CNT; 169 170 /*ps_codec->i4_sps_id ++;*/ 171 ps_codec->i4_sps_id %= MAX_SPS_CNT; 172 173 /* populate sps header */ 174 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id; 175 ih264e_populate_sps(ps_codec, ps_sps); 176 177 /* populate pps header */ 178 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id; 179 ih264e_populate_pps(ps_codec, ps_pps); 180 181 ps_entropy->i4_error_code = IH264E_SUCCESS; 182 183 /* generate sps */ 184 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, 185 &ps_codec->s_cfg.s_vui); 186 187 /* generate pps */ 188 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 189 190 /* queue output buffer */ 191 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset; 192 193 return ps_entropy->i4_error_code; 194 } 195 196 /** 197 ******************************************************************************* 198 * 199 * @brief initialize entropy context. 200 * 201 * @par Description: 202 * Before invoking the call to perform to entropy coding the entropy context 203 * associated with the job needs to be initialized. This involves the start 204 * mb address, end mb address, slice index and the pointer to location at 205 * which the mb residue info and mb header info are packed. 206 * 207 * @param[in] ps_proc 208 * Pointer to the current process context 209 * 210 * @returns error status 211 * 212 * @remarks none 213 * 214 ******************************************************************************* 215 */ 216 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc) 217 { 218 /* codec context */ 219 codec_t *ps_codec = ps_proc->ps_codec; 220 221 /* entropy ctxt */ 222 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 223 224 /* start address */ 225 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x; 226 227 /* end address */ 228 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt; 229 230 /* slice index */ 231 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add]; 232 233 /* sof */ 234 /* @ start of frame or start of a new slice, set sof flag */ 235 if (ps_entropy->i4_mb_start_add == 0) 236 { 237 ps_entropy->i4_sof = 1; 238 } 239 240 if (ps_entropy->i4_mb_x == 0) 241 { 242 /* packed mb coeff data */ 243 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 244 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 245 246 /* packed mb header data */ 247 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 248 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 249 } 250 251 return IH264E_SUCCESS; 252 } 253 254 /** 255 ******************************************************************************* 256 * 257 * @brief entry point for entropy coding 258 * 259 * @par Description 260 * This function calls lower level functions to perform entropy coding for a 261 * group (n rows) of mb's. After encoding 1 row of mb's, the function takes 262 * back the control, updates the ctxt and calls lower level functions again. 263 * This process is repeated till all the rows or group of mb's (which ever is 264 * minimum) are coded 265 * 266 * @param[in] ps_proc 267 * process context 268 * 269 * @returns error status 270 * 271 * @remarks 272 * 273 ******************************************************************************* 274 */ 275 276 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) 277 { 278 /* codec context */ 279 codec_t *ps_codec = ps_proc->ps_codec; 280 281 /* entropy context */ 282 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 283 284 /* cabac context */ 285 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac; 286 287 /* sps */ 288 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT); 289 290 /* pps */ 291 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT); 292 293 /* slice header */ 294 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT); 295 296 /* slice type */ 297 WORD32 i4_slice_type = ps_proc->i4_slice_type; 298 299 /* Bitstream structure */ 300 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 301 302 /* output buff */ 303 out_buf_t s_out_buf; 304 305 /* proc map */ 306 UWORD8 *pu1_proc_map; 307 308 /* entropy map */ 309 UWORD8 *pu1_entropy_map_curr; 310 311 /* proc base idx */ 312 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; 313 314 /* temp var */ 315 WORD32 i4_wd_mbs, i4_ht_mbs; 316 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx; 317 WORD32 bitstream_start_offset, bitstream_end_offset; 318 /********************************************************************/ 319 /* BEGIN INIT */ 320 /********************************************************************/ 321 322 /* entropy encode start address */ 323 u4_mb_idx = ps_entropy->i4_mb_start_add; 324 325 /* entropy encode end address */ 326 u4_mb_end_idx = ps_entropy->i4_mb_end_add; 327 328 /* width in mbs */ 329 i4_wd_mbs = ps_entropy->i4_wd_mbs; 330 331 /* height in mbs */ 332 i4_ht_mbs = ps_entropy->i4_ht_mbs; 333 334 /* total mb cnt */ 335 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs; 336 337 /* proc map */ 338 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 339 340 /* entropy map */ 341 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 342 343 /********************************************************************/ 344 /* @ start of frame / slice, */ 345 /* initialize the output buffer, */ 346 /* initialize the bit stream buffer, */ 347 /* check if sps and pps headers have to be generated, */ 348 /* populate and generate slice header */ 349 /********************************************************************/ 350 if (ps_entropy->i4_sof) 351 { 352 /********************************************************************/ 353 /* initialize the output buffer */ 354 /********************************************************************/ 355 s_out_buf = ps_codec->as_out_buf[ctxt_sel]; 356 357 /* is last frame to encode */ 358 s_out_buf.u4_is_last = ps_entropy->u4_is_last; 359 360 /* frame idx */ 361 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high; 362 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low; 363 364 /********************************************************************/ 365 /* initialize the bit stream buffer */ 366 /********************************************************************/ 367 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize); 368 369 /********************************************************************/ 370 /* BEGIN HEADER GENERATION */ 371 /********************************************************************/ 372 if (1 == ps_entropy->i4_gen_header) 373 { 374 /* generate sps */ 375 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, 376 &ps_codec->s_cfg.s_vui); 377 /* generate pps */ 378 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 379 380 /* reset i4_gen_header */ 381 ps_entropy->i4_gen_header = 0; 382 } 383 384 /* populate slice header */ 385 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); 386 387 /* generate slice header */ 388 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr, 389 ps_pps, ps_sps); 390 391 /* once start of frame / slice is done, you can reset it */ 392 /* it is the responsibility of the caller to set this flag */ 393 ps_entropy->i4_sof = 0; 394 395 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 396 { 397 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 398 BITSTREAM_FLUSH(ps_bitstrm); 399 ih264e_init_cabac_ctxt(ps_entropy); 400 } 401 } 402 403 /* begin entropy coding for the mb set */ 404 while (u4_mb_idx < u4_mb_end_idx) 405 { 406 /* init ptrs/indices */ 407 if (ps_entropy->i4_mb_x == i4_wd_mbs) 408 { 409 ps_entropy->i4_mb_y++; 410 ps_entropy->i4_mb_x = 0; 411 412 /* packed mb coeff data */ 413 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 414 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 415 416 /* packed mb header data */ 417 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 418 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 419 420 /* proc map */ 421 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 422 423 /* entropy map */ 424 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 425 } 426 427 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y); 428 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x); 429 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y); 430 431 /* wait until the curr mb is core coded */ 432 /* The wait for curr mb to be core coded is essential when entropy is launched 433 * as a separate job 434 */ 435 while (1) 436 { 437 volatile UWORD8 *pu1_buf1; 438 WORD32 idx = ps_entropy->i4_mb_x; 439 440 pu1_buf1 = pu1_proc_map + idx; 441 if (*pu1_buf1) 442 break; 443 ithread_yield(); 444 } 445 446 447 /* write mb layer */ 448 ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy); 449 /* Starting bitstream offset for header in bits */ 450 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm); 451 452 /* set entropy map */ 453 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1; 454 455 u4_mb_idx++; 456 ps_entropy->i4_mb_x++; 457 /* check for eof */ 458 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 459 { 460 if (ps_entropy->i4_mb_x < i4_wd_mbs) 461 { 462 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 463 } 464 } 465 466 if (ps_entropy->i4_mb_x == i4_wd_mbs) 467 { 468 /* if slices are enabled */ 469 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) 470 { 471 /* current slice index */ 472 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx; 473 474 /* slice map */ 475 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx; 476 477 /* No need to open a slice at end of frame. The current slice can be closed at the time 478 * of signaling eof flag. 479 */ 480 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx 481 != pu1_slice_idx[u4_mb_idx])) 482 { 483 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 484 { /* mb skip run */ 485 if ((i4_slice_type != ISLICE) 486 && *ps_entropy->pi4_mb_skip_run) 487 { 488 if (*ps_entropy->pi4_mb_skip_run) 489 { 490 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); 491 *ps_entropy->pi4_mb_skip_run = 0; 492 } 493 } 494 /* put rbsp trailing bits for the previous slice */ 495 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 496 } 497 else 498 { 499 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 500 } 501 502 /* update slice header pointer */ 503 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx]; 504 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx; 505 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT); 506 507 /* populate slice header */ 508 ps_entropy->i4_mb_start_add = u4_mb_idx; 509 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, 510 ps_sps); 511 512 /* generate slice header */ 513 ps_entropy->i4_error_code |= ih264e_generate_slice_header( 514 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps); 515 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 516 { 517 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 518 BITSTREAM_FLUSH(ps_bitstrm); 519 ih264e_init_cabac_ctxt(ps_entropy); 520 } 521 } 522 else 523 { 524 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 525 && u4_mb_idx != u4_mb_cnt) 526 { 527 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 528 } 529 } 530 } 531 /* Dont execute any further instructions until store synchronization took place */ 532 DATA_SYNC(); 533 } 534 535 /* Ending bitstream offset for header in bits */ 536 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm); 537 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] += 538 bitstream_end_offset - bitstream_start_offset; 539 } 540 541 /* check for eof */ 542 if (u4_mb_idx == u4_mb_cnt) 543 { 544 /* set end of frame flag */ 545 ps_entropy->i4_eof = 1; 546 } 547 else 548 { 549 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 550 && ps_codec->s_cfg.e_slice_mode 551 != IVE_SLICE_MODE_BLOCKS) 552 { 553 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 554 } 555 } 556 557 if (ps_entropy->i4_eof) 558 { 559 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 560 { 561 /* mb skip run */ 562 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) 563 { 564 if (*ps_entropy->pi4_mb_skip_run) 565 { 566 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, 567 ps_entropy->i4_error_code, "mb skip run"); 568 *ps_entropy->pi4_mb_skip_run = 0; 569 } 570 } 571 /* put rbsp trailing bits */ 572 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 573 } 574 else 575 { 576 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 577 } 578 579 /* update current frame stats to rc library */ 580 { 581 /* number of bytes to stuff */ 582 WORD32 i4_stuff_bytes; 583 584 /* update */ 585 i4_stuff_bytes = ih264e_update_rc_post_enc( 586 ps_codec, ctxt_sel, 587 (ps_proc->ps_codec->i4_poc == 0)); 588 589 /* cbr rc - house keeping */ 590 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) 591 { 592 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0; 593 } 594 else if (i4_stuff_bytes) 595 { 596 /* add filler nal units */ 597 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes); 598 } 599 } 600 601 /* 602 *Frame number is to be incremented only if the current frame is a 603 * reference frame. After each successful frame encode, we increment 604 * frame number by 1 605 */ 606 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] 607 && ps_codec->u4_is_curr_frm_ref) 608 { 609 ps_codec->i4_frame_num++; 610 } 611 /********************************************************************/ 612 /* signal the output */ 613 /********************************************************************/ 614 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 615 ps_entropy->ps_bitstrm->u4_strm_buf_offset; 616 617 DEBUG("entropy status %x", ps_entropy->i4_error_code); 618 } 619 620 /* allow threads to dequeue entropy jobs */ 621 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; 622 623 return ps_entropy->i4_error_code; 624 } 625 626 /** 627 ******************************************************************************* 628 * 629 * @brief Packs header information of a mb in to a buffer 630 * 631 * @par Description: 632 * After the deciding the mode info of a macroblock, the syntax elements 633 * associated with the mb are packed and stored. The entropy thread unpacks 634 * this buffer and generates the end bit stream. 635 * 636 * @param[in] ps_proc 637 * Pointer to the current process context 638 * 639 * @returns error status 640 * 641 * @remarks none 642 * 643 ******************************************************************************* 644 */ 645 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc) 646 { 647 /* curr mb type */ 648 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 649 650 /* pack mb syntax layer of curr mb (used for entropy coding) */ 651 if (u4_mb_type == I4x4) 652 { 653 /* pointer to mb header storage space */ 654 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 655 mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data; 656 657 /* temp var */ 658 WORD32 i4, byte; 659 660 /* mb type plus mode */ 661 ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type; 662 663 /* cbp */ 664 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; 665 666 /* mb qp delta */ 667 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 668 669 /* sub mb modes */ 670 for (i4 = 0; i4 < 16; i4 ++) 671 { 672 byte = 0; 673 674 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 675 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 676 { 677 byte |= 1; 678 } 679 else 680 { 681 682 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 683 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 684 { 685 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1); 686 } 687 else 688 { 689 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1; 690 } 691 } 692 693 i4++; 694 695 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 696 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 697 { 698 byte |= 16; 699 } 700 else 701 { 702 703 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 704 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 705 { 706 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5); 707 } 708 else 709 { 710 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5; 711 } 712 } 713 714 ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] = byte; 715 } 716 717 /* end of mb layer */ 718 pu1_ptr += sizeof(mb_hdr_i4x4_t); 719 ps_proc->pv_mb_header_data = pu1_ptr; 720 } 721 else if (u4_mb_type == I16x16) 722 { 723 /* pointer to mb header storage space */ 724 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 725 mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data; 726 727 /* mb type plus mode */ 728 ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type; 729 730 /* cbp */ 731 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; 732 733 /* mb qp delta */ 734 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 735 736 /* end of mb layer */ 737 pu1_ptr += sizeof(mb_hdr_i16x16_t); 738 ps_proc->pv_mb_header_data = pu1_ptr; 739 } 740 else if (u4_mb_type == P16x16) 741 { 742 /* pointer to mb header storage space */ 743 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 744 mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data; 745 746 /* mb type */ 747 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; 748 749 /* cbp */ 750 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; 751 752 /* mb qp delta */ 753 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 754 755 ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 756 757 ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 758 759 /* end of mb layer */ 760 pu1_ptr += sizeof(mb_hdr_p16x16_t); 761 ps_proc->pv_mb_header_data = pu1_ptr; 762 } 763 else if (u4_mb_type == PSKIP) 764 { 765 /* pointer to mb header storage space */ 766 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 767 mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data; 768 769 /* mb type */ 770 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; 771 772 /* end of mb layer */ 773 pu1_ptr += sizeof(mb_hdr_pskip_t); 774 ps_proc->pv_mb_header_data = pu1_ptr; 775 } 776 else if(u4_mb_type == B16x16) 777 { 778 779 /* pointer to mb header storage space */ 780 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 781 mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data; 782 783 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 784 785 /* mb type plus mode */ 786 ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type; 787 788 /* cbp */ 789 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; 790 791 /* mb qp delta */ 792 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 793 794 /* l0 & l1 me data */ 795 if (u4_pred_mode != PRED_L1) 796 { 797 ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx 798 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 799 800 ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy 801 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 802 } 803 if (u4_pred_mode != PRED_L0) 804 { 805 ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx 806 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx; 807 808 ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy 809 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy; 810 } 811 812 /* end of mb layer */ 813 pu1_ptr += sizeof(mb_hdr_b16x16_t); 814 ps_proc->pv_mb_header_data = pu1_ptr; 815 816 } 817 else if(u4_mb_type == BDIRECT) 818 { 819 /* pointer to mb header storage space */ 820 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 821 mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data; 822 823 /* mb type plus mode */ 824 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type; 825 826 /* cbp */ 827 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp; 828 829 /* mb qp delta */ 830 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 831 832 /* end of mb layer */ 833 pu1_ptr += sizeof(mb_hdr_bdirect_t); 834 ps_proc->pv_mb_header_data = pu1_ptr; 835 836 } 837 else if(u4_mb_type == BSKIP) 838 { 839 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 840 841 /* pointer to mb header storage space */ 842 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 843 mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data; 844 845 /* mb type plus mode */ 846 ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type; 847 848 /* end of mb layer */ 849 pu1_ptr += sizeof(mb_hdr_bskip_t); 850 ps_proc->pv_mb_header_data = pu1_ptr; 851 } 852 853 return IH264E_SUCCESS; 854 } 855 856 /** 857 ******************************************************************************* 858 * 859 * @brief update process context after encoding an mb. This involves preserving 860 * the current mb information for later use, initialize the proc ctxt elements to 861 * encode next mb. 862 * 863 * @par Description: 864 * This function performs house keeping tasks after encoding an mb. 865 * After encoding an mb, various elements of the process context needs to be 866 * updated to encode the next mb. For instance, the source, recon and reference 867 * pointers, mb indices have to be adjusted to the next mb. The slice index of 868 * the current mb needs to be updated. If mb qp modulation is enabled, then if 869 * the qp changes the quant param structure needs to be updated. Also to encoding 870 * the next mb, the current mb info is used as part of mode prediction or mv 871 * prediction. Hence the current mb info has to preserved at top/top left/left 872 * locations. 873 * 874 * @param[in] ps_proc 875 * Pointer to the current process context 876 * 877 * @returns none 878 * 879 * @remarks none 880 * 881 ******************************************************************************* 882 */ 883 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) 884 { 885 /* error status */ 886 WORD32 error_status = IH264_SUCCESS; 887 888 /* codec context */ 889 codec_t *ps_codec = ps_proc->ps_codec; 890 891 /* curr mb indices */ 892 WORD32 i4_mb_x = ps_proc->i4_mb_x; 893 WORD32 i4_mb_y = ps_proc->i4_mb_y; 894 895 /* mb syntax elements of neighbors */ 896 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele; 897 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x; 898 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele; 899 900 /* curr mb type */ 901 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 902 903 /* curr mb type */ 904 UWORD32 u4_is_intra = ps_proc->u4_is_intra; 905 906 /* width in mbs */ 907 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 908 909 /*height in mbs*/ 910 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs; 911 912 /* proc map */ 913 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs); 914 915 /* deblk context */ 916 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 917 918 /* deblk bs context */ 919 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 920 921 /* top row motion vector info */ 922 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x; 923 924 /* top left mb motion vector */ 925 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu; 926 927 /* left mb motion vector */ 928 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu; 929 930 /* sub mb modes */ 931 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4); 932 933 /*************************************************************/ 934 /* During MV prediction, when top right mb is not available, */ 935 /* top left mb info. is used for prediction. Hence the curr */ 936 /* top, which will be top left for the next mb needs to be */ 937 /* preserved before updating it with curr mb info. */ 938 /*************************************************************/ 939 940 /* mb type, mb class, csbp */ 941 *ps_top_left_syn = *ps_top_syn; 942 943 if (ps_proc->i4_slice_type != ISLICE) 944 { 945 /*****************************************/ 946 /* update top left with top info results */ 947 /*****************************************/ 948 /* mv */ 949 *ps_top_left_mb_pu = *ps_top_row_pu; 950 } 951 952 /*************************************************/ 953 /* update top and left with curr mb info results */ 954 /*************************************************/ 955 956 /* mb type */ 957 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type; 958 959 /* mb class */ 960 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra; 961 962 /* csbp */ 963 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp; 964 965 /* distortion */ 966 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion; 967 968 if (u4_is_intra) 969 { 970 /* mb / sub mb modes */ 971 if (I16x16 == u4_mb_type) 972 { 973 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode; 974 } 975 else if (I4x4 == u4_mb_type) 976 { 977 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 978 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 979 } 980 else if (I8x8 == u4_mb_type) 981 { 982 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 983 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 984 } 985 986 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE)) 987 { 988 /* mv */ 989 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 990 } 991 992 *ps_proc->pu4_mb_pu_cnt = 1; 993 } 994 else 995 { 996 /* mv */ 997 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 998 } 999 1000 /* 1001 * Mark that the MB has been coded intra 1002 * So that future AIRs can skip it 1003 */ 1004 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra; 1005 1006 /**************************************************/ 1007 /* pack mb header info. for entropy coding */ 1008 /**************************************************/ 1009 ih264e_pack_header_data(ps_proc); 1010 1011 /* update previous mb qp */ 1012 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp; 1013 1014 /* store qp */ 1015 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1016 1017 /* 1018 * We need to sync the cache to make sure that the nmv content of proc 1019 * is updated to cache properly 1020 */ 1021 DATA_SYNC(); 1022 1023 /* Just before finishing the row, enqueue the job in to entropy queue. 1024 * The master thread depending on its convenience shall dequeue it and 1025 * performs entropy. 1026 * 1027 * WARN !! Placing this block post proc map update can cause queuing of 1028 * entropy jobs in out of order. 1029 */ 1030 if (i4_mb_x == i4_wd_mbs - 1) 1031 { 1032 /* job structures */ 1033 job_t s_job; 1034 1035 /* job class */ 1036 s_job.i4_cmd = CMD_ENTROPY; 1037 1038 /* number of mbs to be processed in the current job */ 1039 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs; 1040 1041 /* job start index x */ 1042 s_job.i2_mb_x = 0; 1043 1044 /* job start index y */ 1045 s_job.i2_mb_y = ps_proc->i4_mb_y; 1046 1047 /* proc base idx */ 1048 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0; 1049 1050 /* queue the job */ 1051 error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1); 1052 1053 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1)) 1054 ih264_list_terminate(ps_codec->pv_entropy_jobq); 1055 } 1056 1057 /* update proc map */ 1058 pu1_proc_map[i4_mb_x] = 1; 1059 1060 /**************************************************/ 1061 /* update proc ctxt elements for encoding next mb */ 1062 /**************************************************/ 1063 /* update indices */ 1064 i4_mb_x ++; 1065 ps_proc->i4_mb_x = i4_mb_x; 1066 1067 if (ps_proc->i4_mb_x == i4_wd_mbs) 1068 { 1069 ps_proc->i4_mb_y++; 1070 ps_proc->i4_mb_x = 0; 1071 } 1072 1073 /* update slice index */ 1074 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x]; 1075 1076 /* update buffers pointers */ 1077 ps_proc->pu1_src_buf_luma += MB_SIZE; 1078 ps_proc->pu1_rec_buf_luma += MB_SIZE; 1079 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE; 1080 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE; 1081 1082 /* 1083 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1084 * the stride per MB is MB_SIZE 1085 */ 1086 ps_proc->pu1_src_buf_chroma += MB_SIZE; 1087 ps_proc->pu1_rec_buf_chroma += MB_SIZE; 1088 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE; 1089 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE; 1090 1091 1092 1093 /* Reset cost, distortion params */ 1094 ps_proc->i4_mb_cost = INT_MAX; 1095 ps_proc->i4_mb_distortion = SHRT_MAX; 1096 1097 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt; 1098 1099 ps_proc->pu4_mb_pu_cnt += 1; 1100 1101 /* Update colocated pu */ 1102 if (ps_proc->i4_slice_type == BSLICE) 1103 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x); 1104 1105 /* deblk ctxts */ 1106 if (ps_proc->u4_disable_deblock_level != 1) 1107 { 1108 /* indices */ 1109 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1110 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1111 1112 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */ 1113 ps_deblk->i4_mb_x ++; 1114 1115 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1116 /* 1117 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1118 * the stride per MB is MB_SIZE 1119 */ 1120 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1121 #endif 1122 } 1123 1124 return error_status; 1125 } 1126 1127 /** 1128 ******************************************************************************* 1129 * 1130 * @brief initialize process context. 1131 * 1132 * @par Description: 1133 * Before dispatching the current job to process thread, the process context 1134 * associated with the job is initialized. Usually every job aims to encode one 1135 * row of mb's. Basing on the row indices provided by the job, the process 1136 * context's buffer ptrs, slice indices and other elements that are necessary 1137 * during core-coding are initialized. 1138 * 1139 * @param[in] ps_proc 1140 * Pointer to the current process context 1141 * 1142 * @returns error status 1143 * 1144 * @remarks none 1145 * 1146 ******************************************************************************* 1147 */ 1148 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) 1149 { 1150 /* codec context */ 1151 codec_t *ps_codec = ps_proc->ps_codec; 1152 1153 /* nmb processing context*/ 1154 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1155 1156 /* indices */ 1157 WORD32 i4_mb_x, i4_mb_y; 1158 1159 /* strides */ 1160 WORD32 i4_src_strd = ps_proc->i4_src_strd; 1161 WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd; 1162 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1163 1164 /* quant params */ 1165 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 1166 1167 /* deblk ctxt */ 1168 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1169 1170 /* deblk bs context */ 1171 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 1172 1173 /* Pointer to mv_buffer of current frame */ 1174 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 1175 1176 /* Pointers for color space conversion */ 1177 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base; 1178 1179 /* Pad the MB to support non standard sizes */ 1180 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd; 1181 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht; 1182 UWORD16 u2_num_rows = MB_SIZE; 1183 WORD32 convert_uv_only; 1184 1185 /********************************************************************/ 1186 /* BEGIN INIT */ 1187 /********************************************************************/ 1188 1189 i4_mb_x = ps_proc->i4_mb_x; 1190 i4_mb_y = ps_proc->i4_mb_y; 1191 1192 /* Number of mbs processed in one loop of process function */ 1193 ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs; 1194 ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs; 1195 1196 /* init buffer pointers */ 1197 convert_uv_only = 1; 1198 if (u4_pad_bottom_sz || u4_pad_right_sz || 1199 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE) 1200 { 1201 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1202 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz; 1203 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base; 1204 i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd; 1205 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE); 1206 convert_uv_only = 0; 1207 } 1208 else 1209 { 1210 i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; 1211 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE); 1212 } 1213 1214 1215 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE || 1216 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P || 1217 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || 1218 u4_pad_bottom_sz || u4_pad_right_sz) 1219 { 1220 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) || 1221 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU)) 1222 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base; 1223 1224 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE); 1225 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd; 1226 } 1227 else 1228 { 1229 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; 1230 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE); 1231 } 1232 1233 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1234 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1235 1236 /* Tempral back and forward reference buffer */ 1237 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1238 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1239 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1240 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1241 1242 /* 1243 * Do color space conversion 1244 * NOTE : We assume there that the number of MB's to process will not span multiple rows 1245 */ 1246 switch (ps_codec->s_cfg.e_inp_color_fmt) 1247 { 1248 case IV_YUV_420SP_UV: 1249 case IV_YUV_420SP_VU: 1250 /* In case of 420 semi-planar input, copy last few rows to intermediate 1251 buffer as chroma trans functions access one extra byte due to interleaved input. 1252 This data will be padded if required */ 1253 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz) 1254 { 1255 WORD32 num_rows = MB_SIZE; 1256 UWORD8 *pu1_src; 1257 UWORD8 *pu1_dst; 1258 WORD32 i; 1259 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1260 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1261 1262 pu1_dst = ps_proc->pu1_src_buf_luma; 1263 1264 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */ 1265 if (u4_pad_bottom_sz || u4_pad_right_sz) { 1266 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1267 num_rows = MB_SIZE - u4_pad_bottom_sz; 1268 for (i = 0; i < num_rows; i++) 1269 { 1270 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1271 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; 1272 pu1_dst += ps_proc->i4_src_strd; 1273 } 1274 } 1275 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1276 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1277 pu1_dst = ps_proc->pu1_src_buf_chroma; 1278 1279 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte 1280 * due to interleaved input 1281 */ 1282 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1283 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE); 1284 else 1285 num_rows = BLK8x8SIZE; 1286 for (i = 0; i < num_rows; i++) 1287 { 1288 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1289 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; 1290 pu1_dst += ps_proc->i4_src_chroma_strd; 1291 } 1292 1293 } 1294 break; 1295 1296 case IV_YUV_420P : 1297 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1298 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1299 1300 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1301 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1302 1303 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) + 1304 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE); 1305 1306 ps_codec->pf_ih264e_conv_420p_to_420sp( 1307 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base, 1308 ps_proc->pu1_src_buf_luma, 1309 ps_proc->pu1_src_buf_chroma, u2_num_rows, 1310 ps_codec->s_cfg.u4_disp_wd, 1311 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0], 1312 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1], 1313 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2], 1314 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, 1315 convert_uv_only); 1316 break; 1317 1318 case IV_YUV_422ILE : 1319 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2) 1320 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1321 1322 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp( 1323 ps_proc->pu1_src_buf_luma, 1324 ps_proc->pu1_src_buf_chroma, 1325 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base, 1326 ps_codec->s_cfg.u4_disp_wd, u2_num_rows, 1327 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, 1328 ps_proc->i4_src_chroma_strd, 1329 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1); 1330 break; 1331 1332 default: 1333 break; 1334 } 1335 1336 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0)) 1337 { 1338 UWORD32 u4_pad_wd, u4_pad_ht; 1339 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd); 1340 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd); 1341 u4_pad_ht = MB_SIZE; 1342 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1343 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz)); 1344 1345 ih264_pad_right_luma( 1346 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd, 1347 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd); 1348 1349 ih264_pad_right_chroma( 1350 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd, 1351 ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd); 1352 } 1353 1354 /* pad bottom edge */ 1355 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0) 1356 { 1357 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd, 1358 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz); 1359 1360 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2, 1361 ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2)); 1362 } 1363 1364 1365 /* packed mb coeff data */ 1366 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data; 1367 1368 /* packed mb header data */ 1369 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data; 1370 1371 /* slice index */ 1372 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x]; 1373 1374 /*********************************************************************/ 1375 /* ih264e_init_quant_params() routine is called at the pic init level*/ 1376 /* this would have initialized the qp. */ 1377 /* TODO_LATER: currently it is assumed that quant params donot change*/ 1378 /* across mb's. When they do calculate update ps_qp_params accordingly*/ 1379 /*********************************************************************/ 1380 1381 /* init mv buffer ptr */ 1382 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * 1383 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); 1384 1385 /* Init co-located mv buffer */ 1386 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * 1387 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); 1388 1389 if (i4_mb_y == 0) 1390 { 1391 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu; 1392 } 1393 else 1394 { 1395 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * 1396 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); 1397 } 1398 1399 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs); 1400 1401 /* mb type */ 1402 ps_proc->u4_mb_type = I16x16; 1403 1404 /* lambda */ 1405 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp]; 1406 1407 /* mb distortion */ 1408 ps_proc->i4_mb_distortion = SHRT_MAX; 1409 1410 if (i4_mb_x == 0) 1411 { 1412 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0; 1413 1414 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0; 1415 1416 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0; 1417 1418 if (i4_mb_y == 0) 1419 { 1420 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t)); 1421 } 1422 } 1423 1424 /* mb cost */ 1425 ps_proc->i4_mb_cost = INT_MAX; 1426 1427 /**********************/ 1428 /* init deblk context */ 1429 /**********************/ 1430 ps_deblk->i4_mb_x = ps_proc->i4_mb_x; 1431 /* deblk lags the current mb proc by 1 row */ 1432 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */ 1433 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */ 1434 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */ 1435 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1; 1436 1437 /* buffer ptrs */ 1438 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE); 1439 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE); 1440 1441 /* init deblk bs context */ 1442 /* mb indices */ 1443 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1444 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1445 1446 /* init n_mb_process context */ 1447 ps_n_mb_ctxt->i4_mb_x = 0; 1448 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y; 1449 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy; 1450 1451 return IH264E_SUCCESS; 1452 } 1453 1454 /** 1455 ******************************************************************************* 1456 * 1457 * @brief This function performs luma & chroma padding 1458 * 1459 * @par Description: 1460 * 1461 * @param[in] ps_proc 1462 * Process context corresponding to the job 1463 * 1464 * @param[in] pu1_curr_pic_luma 1465 * Pointer to luma buffer 1466 * 1467 * @param[in] pu1_curr_pic_chroma 1468 * Pointer to chroma buffer 1469 * 1470 * @param[in] i4_mb_x 1471 * mb index x 1472 * 1473 * @param[in] i4_mb_y 1474 * mb index y 1475 * 1476 * @param[in] i4_pad_ht 1477 * number of rows to be padded 1478 * 1479 * @returns error status 1480 * 1481 * @remarks none 1482 * 1483 ******************************************************************************* 1484 */ 1485 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc, 1486 UWORD8 *pu1_curr_pic_luma, 1487 UWORD8 *pu1_curr_pic_chroma, 1488 WORD32 i4_mb_x, 1489 WORD32 i4_mb_y, 1490 WORD32 i4_pad_ht) 1491 { 1492 /* codec context */ 1493 codec_t *ps_codec = ps_proc->ps_codec; 1494 1495 /* strides */ 1496 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1497 1498 if (i4_mb_x == 0) 1499 { 1500 /* padding left luma */ 1501 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT); 1502 1503 /* padding left chroma */ 1504 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT); 1505 } 1506 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1507 { 1508 /* padding right luma */ 1509 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT); 1510 1511 /* padding right chroma */ 1512 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT); 1513 1514 if (i4_mb_y == ps_proc->i4_ht_mbs - 1) 1515 { 1516 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd); 1517 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd); 1518 1519 /* padding bottom luma */ 1520 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT); 1521 1522 /* padding bottom chroma */ 1523 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1524 } 1525 } 1526 1527 if (i4_mb_y == 0) 1528 { 1529 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma; 1530 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma; 1531 WORD32 wd = MB_SIZE; 1532 1533 if (i4_mb_x == 0) 1534 { 1535 pu1_rec_luma -= PAD_LEFT; 1536 pu1_rec_chroma -= PAD_LEFT; 1537 1538 wd += PAD_LEFT; 1539 } 1540 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1541 { 1542 wd += PAD_RIGHT; 1543 } 1544 1545 /* padding top luma */ 1546 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP); 1547 1548 /* padding top chroma */ 1549 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1)); 1550 } 1551 1552 return IH264E_SUCCESS; 1553 } 1554 1555 1556 1557 1558 /** 1559 ******************************************************************************* 1560 * 1561 * @brief This function performs deblocking, padding and halfpel generation for 1562 * 'n' MBs 1563 * 1564 * @par Description: 1565 * 1566 * @param[in] ps_proc 1567 * Process context corresponding to the job 1568 * 1569 * @param[in] pu1_curr_pic_luma 1570 * Current MB being processed(Luma) 1571 * 1572 * @param[in] pu1_curr_pic_chroma 1573 * Current MB being processed(Chroma) 1574 * 1575 * @param[in] i4_mb_x 1576 * Column value of current MB processed 1577 * 1578 * @param[in] i4_mb_y 1579 * Curent row processed 1580 * 1581 * @returns error status 1582 * 1583 * @remarks none 1584 * 1585 ******************************************************************************* 1586 */ 1587 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc, 1588 UWORD8 *pu1_curr_pic_luma, 1589 UWORD8 *pu1_curr_pic_chroma, 1590 WORD32 i4_mb_x, 1591 WORD32 i4_mb_y) 1592 { 1593 /* codec context */ 1594 codec_t *ps_codec = ps_proc->ps_codec; 1595 1596 /* n_mb processing context */ 1597 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1598 1599 /* deblk context */ 1600 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1601 1602 /* strides */ 1603 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1604 1605 /* loop variables */ 1606 WORD32 row, i, j, col; 1607 1608 /* Padding Width */ 1609 UWORD32 u4_pad_wd; 1610 1611 /* deblk_map of the row being deblocked */ 1612 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs; 1613 1614 /* deblk_map_previous row */ 1615 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs; 1616 1617 WORD32 u4_pad_top = 0; 1618 1619 WORD32 u4_deblk_prev_row = 0; 1620 1621 /* Number of mbs to be processed */ 1622 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs; 1623 1624 /* Number of mbs actually processed 1625 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */ 1626 WORD32 i4_n_mb_process_count = 0; 1627 1628 UWORD8 *pu1_pad_bottom_src = NULL; 1629 1630 UWORD8 *pu1_pad_src_luma = NULL; 1631 UWORD8 *pu1_pad_src_chroma = NULL; 1632 1633 if (ps_proc->u4_disable_deblock_level == 1) 1634 { 1635 /* If left most MB is processed, then pad left */ 1636 if (i4_mb_x == 0) 1637 { 1638 /* padding left luma */ 1639 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1640 1641 /* padding left chroma */ 1642 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1643 } 1644 /*last col*/ 1645 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1)) 1646 { 1647 /* padding right luma */ 1648 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1649 1650 /* padding right chroma */ 1651 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1652 } 1653 } 1654 1655 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1))) 1656 { 1657 /* if number of mb's to be processed are less than 'N', go back. 1658 * exception to the above clause is end of row */ 1659 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) ) 1660 { 1661 return IH264E_SUCCESS; 1662 } 1663 else 1664 { 1665 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs); 1666 1667 /* performing deblocking for required number of MBs */ 1668 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1)) 1669 { 1670 u4_deblk_prev_row = 1; 1671 1672 /* checking whether the top rows are deblocked */ 1673 for (col = 0; col < i4_n_mb_process_count; col++) 1674 { 1675 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col]; 1676 } 1677 1678 /* checking whether the top right MB is deblocked */ 1679 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs) 1680 { 1681 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count]; 1682 } 1683 1684 /* Top or Top right MBs not deblocked */ 1685 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0)) 1686 { 1687 return IH264E_SUCCESS; 1688 } 1689 1690 for (row = 0; row < i4_n_mb_process_count; row++) 1691 { 1692 ih264e_deblock_mb(ps_proc, ps_deblk); 1693 1694 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1695 1696 if (ps_deblk->i4_mb_y > 0) 1697 { 1698 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/ 1699 { 1700 /* padding left luma */ 1701 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT); 1702 1703 /* padding left chroma */ 1704 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1705 } 1706 1707 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/ 1708 { 1709 /* padding right luma */ 1710 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1711 1712 /* padding right chroma */ 1713 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1714 } 1715 } 1716 ps_deblk->i4_mb_x++; 1717 1718 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1719 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1720 1721 } 1722 } 1723 else if(i4_mb_y > 0) 1724 { 1725 ps_deblk->i4_mb_x += i4_n_mb_process_count; 1726 1727 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE; 1728 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE; 1729 } 1730 1731 if (i4_mb_y == 2) 1732 { 1733 u4_pad_wd = i4_n_mb_process_count * MB_SIZE; 1734 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE; 1735 1736 if (ps_n_mb_ctxt->i4_mb_x == 0) 1737 { 1738 u4_pad_wd += PAD_LEFT; 1739 u4_pad_top = -PAD_LEFT; 1740 } 1741 1742 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1743 { 1744 u4_pad_wd += PAD_RIGHT; 1745 } 1746 1747 /* padding top luma */ 1748 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP); 1749 1750 /* padding top chroma */ 1751 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1)); 1752 } 1753 1754 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count; 1755 1756 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1757 { 1758 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1759 { 1760 /* Bottom Padding is done in one stretch for the entire width */ 1761 if (ps_proc->u4_disable_deblock_level != 1) 1762 { 1763 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE; 1764 1765 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE; 1766 1767 ps_n_mb_ctxt->i4_mb_x = 0; 1768 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y; 1769 ps_deblk->i4_mb_x = 0; 1770 ps_deblk->i4_mb_y = ps_proc->i4_mb_y; 1771 1772 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */ 1773 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1774 1775 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs; 1776 1777 j = (ps_proc->i4_wd_mbs) / i4_n_mbs; 1778 1779 for (i = 0; i < j; i++) 1780 { 1781 for (col = 0; col < i4_n_mbs; col++) 1782 { 1783 ih264e_deblock_mb(ps_proc, ps_deblk); 1784 1785 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1786 1787 ps_deblk->i4_mb_x++; 1788 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1789 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1790 ps_n_mb_ctxt->i4_mb_x++; 1791 } 1792 } 1793 1794 for (col = 0; col < i4_n_mb_process_count; col++) 1795 { 1796 ih264e_deblock_mb(ps_proc, ps_deblk); 1797 1798 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1799 1800 ps_deblk->i4_mb_x++; 1801 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1802 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1803 ps_n_mb_ctxt->i4_mb_x++; 1804 } 1805 1806 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd; 1807 1808 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd; 1809 1810 /* padding left luma */ 1811 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1812 1813 /* padding left chroma */ 1814 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1815 1816 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1817 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1818 1819 /* padding left luma */ 1820 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1821 1822 /* padding left chroma */ 1823 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1824 1825 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1826 1827 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1828 1829 /* padding right luma */ 1830 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1831 1832 /* padding right chroma */ 1833 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1834 1835 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1836 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1837 1838 /* padding right luma */ 1839 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1840 1841 /* padding right chroma */ 1842 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1843 1844 } 1845 1846 /* In case height is less than 2 MBs pad top */ 1847 if (ps_proc->i4_ht_mbs <= 2) 1848 { 1849 UWORD8 *pu1_pad_top_src; 1850 /* padding top luma */ 1851 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT; 1852 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP); 1853 1854 /* padding top chroma */ 1855 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT; 1856 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1)); 1857 } 1858 1859 /* padding bottom luma */ 1860 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT; 1861 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT); 1862 1863 /* padding bottom chroma */ 1864 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT; 1865 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1866 } 1867 } 1868 } 1869 } 1870 1871 return IH264E_SUCCESS; 1872 } 1873 1874 1875 /** 1876 ******************************************************************************* 1877 * 1878 * @brief This function performs luma & chroma core coding for a set of mb's. 1879 * 1880 * @par Description: 1881 * The mb to be coded is taken and is evaluated over a predefined set of modes 1882 * (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost 1883 * is selected and using intra/inter prediction filters, prediction is carried out. 1884 * The deviation between src and pred signal constitutes error signal. This error 1885 * signal is transformed (hierarchical transform if necessary) and quantized. The 1886 * quantized residue is packed in to entropy buffer for entropy coding. This is 1887 * repeated for all the mb's enlisted under the job. 1888 * 1889 * @param[in] ps_proc 1890 * Process context corresponding to the job 1891 * 1892 * @returns error status 1893 * 1894 * @remarks none 1895 * 1896 ******************************************************************************* 1897 */ 1898 WORD32 ih264e_process(process_ctxt_t *ps_proc) 1899 { 1900 /* error status */ 1901 WORD32 error_status = IH264_SUCCESS; 1902 1903 /* codec context */ 1904 codec_t *ps_codec = ps_proc->ps_codec; 1905 1906 /* cbp luma, chroma */ 1907 UWORD32 u4_cbp_l, u4_cbp_c; 1908 1909 /* width in mbs */ 1910 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 1911 1912 /* loop var */ 1913 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt; 1914 1915 /* valid modes */ 1916 UWORD32 u4_valid_modes = 0; 1917 1918 /* gate threshold */ 1919 WORD32 i4_gate_threshold = 0; 1920 1921 /* is intra */ 1922 WORD32 luma_idx, chroma_idx, is_intra; 1923 1924 /* temp variables */ 1925 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; 1926 1927 /* 1928 * list of modes for evaluation 1929 * ------------------------------------------------------------------------- 1930 * Note on enabling I4x4 and I16x16 1931 * At very low QP's the hadamard transform in I16x16 will push up the maximum 1932 * coeff value very high. CAVLC may not be able to represent the value and 1933 * hence the stream may not be decodable in some clips. 1934 * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset. 1935 */ 1936 if (ps_proc->i4_slice_type == ISLICE) 1937 { 1938 if (ps_proc->u4_frame_qp > 10) 1939 { 1940 /* enable intra 16x16 */ 1941 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1942 1943 /* enable intra 8x8 */ 1944 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0; 1945 } 1946 1947 /* enable intra 4x4 */ 1948 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1949 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; 1950 1951 } 1952 else if (ps_proc->i4_slice_type == PSLICE) 1953 { 1954 if (ps_proc->u4_frame_qp > 10) 1955 { 1956 /* enable intra 16x16 */ 1957 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1958 } 1959 1960 /* enable intra 4x4 */ 1961 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1962 { 1963 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1964 } 1965 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; 1966 1967 /* enable inter P16x16 */ 1968 u4_valid_modes |= (1 << P16x16); 1969 } 1970 else if (ps_proc->i4_slice_type == BSLICE) 1971 { 1972 if (ps_proc->u4_frame_qp > 10) 1973 { 1974 /* enable intra 16x16 */ 1975 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1976 } 1977 1978 /* enable intra 4x4 */ 1979 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1980 { 1981 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1982 } 1983 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; 1984 1985 /* enable inter B16x16 */ 1986 u4_valid_modes |= (1 << B16x16); 1987 } 1988 1989 1990 /* init entropy */ 1991 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x; 1992 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y; 1993 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x); 1994 1995 /* compute recon when : 1996 * 1. current frame is to be used as a reference 1997 * 2. dump recon for bit stream sanity check 1998 */ 1999 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref || 2000 ps_codec->s_cfg.u4_enable_recon; 2001 2002 /* Encode 'n' macroblocks, 2003 * 'n' being the number of mbs dictated by current proc ctxt */ 2004 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++) 2005 { 2006 /* since we have not yet found sad, we have not yet got min sad */ 2007 /* we need to initialize these variables for each MB */ 2008 /* TODO how to get the min sad into the codec */ 2009 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad; 2010 ps_proc->u4_min_sad_reached = 0; 2011 2012 /* mb analysis */ 2013 { 2014 /* temp var */ 2015 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs; 2016 2017 /* force intra refresh ? */ 2018 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) || 2019 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt); 2020 2021 /* evaluate inter 16x16 modes */ 2022 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16))) 2023 { 2024 /* compute nmb me */ 2025 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0) 2026 { 2027 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me, 2028 i4_wd_mbs - ps_proc->i4_mb_x)); 2029 } 2030 2031 /* set pointers to ME data appropriately for other modules to use */ 2032 { 2033 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ; 2034 2035 /* get the min sad condition for current mb */ 2036 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 2037 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 2038 2039 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]); 2040 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl); 2041 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]); 2042 2043 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion; 2044 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost; 2045 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 2046 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 2047 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type; 2048 2049 /* get the best sub pel buffer */ 2050 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf; 2051 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd; 2052 } 2053 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2054 } 2055 else 2056 { 2057 /* Derive neighbor availability for the current macroblock */ 2058 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl; 2059 2060 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2061 } 2062 2063 /* 2064 * If air says intra, we need to force the following code path to evaluate intra 2065 * The easy way is just to say that the inter cost is too much 2066 */ 2067 if (!i4_air_enable_inter) 2068 { 2069 ps_proc->u4_min_sad_reached = 0; 2070 ps_proc->i4_mb_cost = INT_MAX; 2071 ps_proc->i4_mb_distortion = INT_MAX; 2072 } 2073 else if (ps_proc->u4_mb_type == PSKIP) 2074 { 2075 goto UPDATE_MB_INFO; 2076 } 2077 2078 /* wait until the proc of [top + 1] mb is computed. 2079 * We wait till the proc dependencies are satisfied */ 2080 if(ps_proc->i4_mb_y > 0) 2081 { 2082 /* proc map */ 2083 UWORD8 *pu1_proc_map_top; 2084 2085 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs); 2086 2087 while (1) 2088 { 2089 volatile UWORD8 *pu1_buf; 2090 WORD32 idx = i4_mb_idx + 1; 2091 2092 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1)); 2093 pu1_buf = pu1_proc_map_top + idx; 2094 if(*pu1_buf) 2095 break; 2096 ithread_yield(); 2097 } 2098 } 2099 2100 /* If we already have the minimum sad, there is no point in searching for sad again */ 2101 if (ps_proc->u4_min_sad_reached == 0) 2102 { 2103 /* intra gating in inter slices */ 2104 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/ 2105 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate) 2106 { 2107 /* distortion of neighboring blocks */ 2108 WORD32 i4_distortion[4]; 2109 2110 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion; 2111 2112 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion; 2113 2114 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion; 2115 2116 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion; 2117 2118 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2; 2119 2120 } 2121 2122 2123 /* If we are going to force intra we need to evaluate intra irrespective of gating */ 2124 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion)) 2125 { 2126 /* evaluate intra 4x4 modes */ 2127 if (u4_valid_modes & (1 << I4x4)) 2128 { 2129 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 2130 { 2131 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc); 2132 } 2133 else 2134 { 2135 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc); 2136 } 2137 } 2138 2139 /* evaluate intra 16x16 modes */ 2140 if (u4_valid_modes & (1 << I16x16)) 2141 { 2142 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc); 2143 } 2144 2145 /* evaluate intra 8x8 modes */ 2146 if (u4_valid_modes & (1 << I8x8)) 2147 { 2148 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2149 } 2150 2151 } 2152 } 2153 } 2154 2155 /* is intra */ 2156 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8) 2157 { 2158 luma_idx = ps_proc->u4_mb_type; 2159 chroma_idx = 0; 2160 is_intra = 1; 2161 2162 /* evaluate chroma blocks for intra */ 2163 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2164 } 2165 else 2166 { 2167 luma_idx = 3; 2168 chroma_idx = 1; 2169 is_intra = 0; 2170 } 2171 ps_proc->u4_is_intra = is_intra; 2172 ps_proc->ps_pu->b1_intra_flag = is_intra; 2173 2174 /* redo MV pred of neighbors in the case intra mb */ 2175 /* TODO : currently called unconditionally, needs to be called only in the case of intra 2176 * to modify neighbors */ 2177 if (ps_proc->i4_slice_type != ISLICE) 2178 { 2179 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type); 2180 } 2181 2182 /* Perform luma mb core coding */ 2183 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc); 2184 2185 /* Perform luma mb core coding */ 2186 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc); 2187 2188 /* coded block pattern */ 2189 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; 2190 2191 if (!ps_proc->u4_is_intra) 2192 { 2193 if (ps_proc->i4_slice_type == BSLICE) 2194 { 2195 if (ih264e_find_bskip_params(ps_proc, PRED_L0)) 2196 { 2197 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP; 2198 } 2199 } 2200 else if(!ps_proc->u4_cbp) 2201 { 2202 if (ih264e_find_pskip_params(ps_proc, PRED_L0)) 2203 { 2204 ps_proc->u4_mb_type = PSKIP; 2205 } 2206 } 2207 } 2208 2209 UPDATE_MB_INFO: 2210 2211 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */ 2212 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc); 2213 2214 /**********************************************************************/ 2215 /* if disable deblock level is '0' this implies enable deblocking for */ 2216 /* all edges of all macroblocks with out any restrictions */ 2217 /* */ 2218 /* if disable deblock level is '1' this implies disable deblocking for*/ 2219 /* all edges of all macroblocks with out any restrictions */ 2220 /* */ 2221 /* if disable deblock level is '2' this implies enable deblocking for */ 2222 /* all edges of all macroblocks except edges overlapping with slice */ 2223 /* boundaries. This option is not currently supported by the encoder */ 2224 /* hence the slice map should be of no significance to perform debloc */ 2225 /* king */ 2226 /**********************************************************************/ 2227 2228 if (ps_proc->u4_compute_recon) 2229 { 2230 /* deblk context */ 2231 /* src pointers */ 2232 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma; 2233 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma; 2234 2235 /* src indices */ 2236 UWORD32 i4_mb_x = ps_proc->i4_mb_x; 2237 UWORD32 i4_mb_y = ps_proc->i4_mb_y; 2238 2239 /* compute blocking strength */ 2240 if (ps_proc->u4_disable_deblock_level != 1) 2241 { 2242 ih264e_compute_bs(ps_proc); 2243 } 2244 2245 /* nmb deblocking and hpel and padding */ 2246 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma, 2247 pu1_cur_pic_chroma, i4_mb_x, 2248 i4_mb_y); 2249 } 2250 2251 /* update the context after for coding next mb */ 2252 error_status |= ih264e_update_proc_ctxt(ps_proc); 2253 2254 /* Once the last row is processed, mark the buffer status appropriately */ 2255 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y) 2256 { 2257 /* Pointer to current picture buffer structure */ 2258 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic; 2259 2260 /* Pointer to current picture's mv buffer structure */ 2261 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 2262 2263 /**********************************************************************/ 2264 /* if disable deblock level is '0' this implies enable deblocking for */ 2265 /* all edges of all macroblocks with out any restrictions */ 2266 /* */ 2267 /* if disable deblock level is '1' this implies disable deblocking for*/ 2268 /* all edges of all macroblocks with out any restrictions */ 2269 /* */ 2270 /* if disable deblock level is '2' this implies enable deblocking for */ 2271 /* all edges of all macroblocks except edges overlapping with slice */ 2272 /* boundaries. This option is not currently supported by the encoder */ 2273 /* hence the slice map should be of no significance to perform debloc */ 2274 /* king */ 2275 /**********************************************************************/ 2276 error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC); 2277 2278 error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC); 2279 2280 if (ps_codec->s_cfg.u4_enable_recon) 2281 { 2282 /* pic cnt */ 2283 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt; 2284 2285 /* rec buffers */ 2286 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic; 2287 2288 /* is last? */ 2289 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last; 2290 2291 /* frame time stamp */ 2292 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high; 2293 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low; 2294 } 2295 2296 } 2297 } 2298 2299 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y); 2300 2301 return error_status; 2302 } 2303 2304 /** 2305 ******************************************************************************* 2306 * 2307 * @brief 2308 * Function to update rc context after encoding 2309 * 2310 * @par Description 2311 * This function updates the rate control context after the frame is encoded. 2312 * Number of bits consumed by the current frame, frame distortion, frame cost, 2313 * number of intra/inter mb's, ... are passed on to rate control context for 2314 * updating the rc model. 2315 * 2316 * @param[in] ps_codec 2317 * Handle to codec context 2318 * 2319 * @param[in] ctxt_sel 2320 * frame context selector 2321 * 2322 * @param[in] pic_cnt 2323 * pic count 2324 * 2325 * @returns i4_stuffing_byte 2326 * number of stuffing bytes (if necessary) 2327 * 2328 * @remarks 2329 * 2330 ******************************************************************************* 2331 */ 2332 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm) 2333 { 2334 /* proc set base idx */ 2335 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; 2336 2337 /* proc ctxt */ 2338 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base]; 2339 2340 /* frame qp */ 2341 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp; 2342 2343 /* cbr rc return status */ 2344 WORD32 i4_stuffing_byte = 0; 2345 2346 /* current frame stats */ 2347 frame_info_t s_frame_info; 2348 picture_type_e rc_pic_type; 2349 2350 /* temp var */ 2351 WORD32 i, j; 2352 2353 /********************************************************************/ 2354 /* BEGIN INIT */ 2355 /********************************************************************/ 2356 2357 /* init frame info */ 2358 irc_init_frame_info(&s_frame_info); 2359 2360 /* get frame info */ 2361 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++) 2362 { 2363 /*****************************************************************/ 2364 /* One frame can be encoded by max of u4_num_cores threads */ 2365 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */ 2366 /* u4_num_cores threads */ 2367 /*****************************************************************/ 2368 for (j = 0; j< MAX_MB_TYPE; j++) 2369 { 2370 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j]; 2371 2372 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j]; 2373 2374 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j]; 2375 } 2376 2377 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum; 2378 2379 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum; 2380 2381 /*****************************************************************/ 2382 /* gather number of residue and header bits consumed by the frame*/ 2383 /*****************************************************************/ 2384 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy); 2385 } 2386 2387 /* get pic type */ 2388 switch (ps_codec->pic_type) 2389 { 2390 case PIC_I: 2391 case PIC_IDR: 2392 rc_pic_type = I_PIC; 2393 break; 2394 case PIC_P: 2395 rc_pic_type = P_PIC; 2396 break; 2397 case PIC_B: 2398 rc_pic_type = B_PIC; 2399 break; 2400 default: 2401 assert(0); 2402 break; 2403 } 2404 2405 /* update rc lib with current frame stats */ 2406 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api, 2407 &(s_frame_info), 2408 ps_codec->s_rate_control.pps_pd_frm_rate, 2409 ps_codec->s_rate_control.pps_time_stamp, 2410 ps_codec->s_rate_control.pps_frame_time, 2411 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs), 2412 &rc_pic_type, 2413 i4_is_first_frm, 2414 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel], 2415 u1_frame_qp, 2416 &ps_codec->s_rate_control.num_intra_in_prev_frame, 2417 &ps_codec->s_rate_control.i4_avg_activity); 2418 return i4_stuffing_byte; 2419 } 2420 2421 /** 2422 ******************************************************************************* 2423 * 2424 * @brief 2425 * entry point of a spawned encoder thread 2426 * 2427 * @par Description: 2428 * The encoder thread dequeues a proc/entropy job from the encoder queue and 2429 * calls necessary routines. 2430 * 2431 * @param[in] pv_proc 2432 * Process context corresponding to the thread 2433 * 2434 * @returns error status 2435 * 2436 * @remarks 2437 * 2438 ******************************************************************************* 2439 */ 2440 WORD32 ih264e_process_thread(void *pv_proc) 2441 { 2442 /* error status */ 2443 IH264_ERROR_T ret = IH264_SUCCESS; 2444 WORD32 error_status = IH264_SUCCESS; 2445 2446 /* proc ctxt */ 2447 process_ctxt_t *ps_proc = pv_proc; 2448 2449 /* codec ctxt */ 2450 codec_t *ps_codec = ps_proc->ps_codec; 2451 2452 /* structure to represent a processing job entry */ 2453 job_t s_job; 2454 2455 /* blocking call : entropy dequeue is non-blocking till all 2456 * the proc jobs are processed */ 2457 WORD32 is_blocking = 0; 2458 2459 /* set affinity */ 2460 ithread_set_affinity(ps_proc->i4_id); 2461 2462 while(1) 2463 { 2464 /* dequeue a job from the entropy queue */ 2465 { 2466 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex); 2467 2468 /* codec context selector */ 2469 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; 2470 2471 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel]; 2472 2473 /* have the lock */ 2474 if (error == 0) 2475 { 2476 if (*pu4_buf == 0) 2477 { 2478 /* no entropy threads are active, try dequeuing a job from the entropy queue */ 2479 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking); 2480 if (IH264_SUCCESS == ret) 2481 { 2482 *pu4_buf = 1; 2483 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2484 goto WORKER; 2485 } 2486 else if(is_blocking) 2487 { 2488 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2489 break; 2490 } 2491 } 2492 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2493 } 2494 } 2495 2496 /* dequeue a job from the process queue */ 2497 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1); 2498 if (IH264_SUCCESS != ret) 2499 { 2500 if(ps_proc->i4_id) 2501 break; 2502 else 2503 { 2504 is_blocking = 1; 2505 continue; 2506 } 2507 } 2508 2509 WORKER: 2510 /* choose appropriate proc context based on proc_base_idx */ 2511 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx]; 2512 2513 switch (s_job.i4_cmd) 2514 { 2515 case CMD_PROCESS: 2516 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt; 2517 ps_proc->i4_mb_x = s_job.i2_mb_x; 2518 ps_proc->i4_mb_y = s_job.i2_mb_y; 2519 2520 /* init process context */ 2521 ih264e_init_proc_ctxt(ps_proc); 2522 2523 /* core code all mbs enlisted under the current job */ 2524 error_status |= ih264e_process(ps_proc); 2525 break; 2526 2527 case CMD_ENTROPY: 2528 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x; 2529 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y; 2530 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt; 2531 2532 /* init entropy */ 2533 ih264e_init_entropy_ctxt(ps_proc); 2534 2535 /* entropy code all mbs enlisted under the current job */ 2536 error_status |= ih264e_entropy(ps_proc); 2537 break; 2538 2539 default: 2540 error_status |= IH264_FAIL; 2541 break; 2542 } 2543 } 2544 2545 /* send error code */ 2546 ps_proc->i4_error_code = error_status; 2547 return ret; 2548 } 2549