1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ******************************************************************************* 23 * @file 24 * ih264e_process.c 25 * 26 * @brief 27 * Contains functions for codec thread 28 * 29 * @author 30 * Harish 31 * 32 * @par List of Functions: 33 * - ih264e_generate_sps_pps() 34 * - ih264e_init_entropy_ctxt() 35 * - ih264e_entropy() 36 * - ih264e_pack_header_data() 37 * - ih264e_update_proc_ctxt() 38 * - ih264e_init_proc_ctxt() 39 * - ih264e_pad_recon_buffer() 40 * - ih264e_dblk_pad_hpel_processing_n_mbs() 41 * - ih264e_process() 42 * - ih264e_set_rc_pic_params() 43 * - ih264e_update_rc_post_enc() 44 * - ih264e_process_thread() 45 * 46 * @remarks 47 * None 48 * 49 ******************************************************************************* 50 */ 51 52 /*****************************************************************************/ 53 /* File Includes */ 54 /*****************************************************************************/ 55 56 /* System include files */ 57 #include <stdio.h> 58 #include <stddef.h> 59 #include <stdlib.h> 60 #include <string.h> 61 #include <limits.h> 62 #include <assert.h> 63 64 /* User include files */ 65 #include "ih264_typedefs.h" 66 #include "iv2.h" 67 #include "ive2.h" 68 #include "ih264_defs.h" 69 #include "ih264_debug.h" 70 #include "ime_distortion_metrics.h" 71 #include "ime_defs.h" 72 #include "ime_structs.h" 73 #include "ih264_error.h" 74 #include "ih264_structs.h" 75 #include "ih264_trans_quant_itrans_iquant.h" 76 #include "ih264_inter_pred_filters.h" 77 #include "ih264_mem_fns.h" 78 #include "ih264_padding.h" 79 #include "ih264_intra_pred_filters.h" 80 #include "ih264_deblk_edge_filters.h" 81 #include "ih264_cabac_tables.h" 82 #include "ih264_platform_macros.h" 83 #include "ih264_macros.h" 84 #include "ih264_buf_mgr.h" 85 #include "ih264e_error.h" 86 #include "ih264e_bitstream.h" 87 #include "ih264_common_tables.h" 88 #include "ih264_list.h" 89 #include "ih264e_defs.h" 90 #include "irc_cntrl_param.h" 91 #include "irc_frame_info_collector.h" 92 #include "ih264e_rate_control.h" 93 #include "ih264e_cabac_structs.h" 94 #include "ih264e_structs.h" 95 #include "ih264e_cabac.h" 96 #include "ih264e_process.h" 97 #include "ithread.h" 98 #include "ih264e_intra_modes_eval.h" 99 #include "ih264e_encode_header.h" 100 #include "ih264e_globals.h" 101 #include "ih264e_config.h" 102 #include "ih264e_trace.h" 103 #include "ih264e_statistics.h" 104 #include "ih264_cavlc_tables.h" 105 #include "ih264e_cavlc.h" 106 #include "ih264e_deblk.h" 107 #include "ih264e_me.h" 108 #include "ih264e_debug.h" 109 #include "ih264e_master.h" 110 #include "ih264e_utils.h" 111 #include "irc_mem_req_and_acq.h" 112 #include "irc_rate_control_api.h" 113 #include "ih264e_platform_macros.h" 114 #include "ime_statistics.h" 115 116 117 /*****************************************************************************/ 118 /* Function Definitions */ 119 /*****************************************************************************/ 120 121 /** 122 ****************************************************************************** 123 * 124 * @brief This function generates sps, pps set on request 125 * 126 * @par Description 127 * When the encoder is set in header generation mode, the following function 128 * is called. This generates sps and pps headers and returns the control back 129 * to caller. 130 * 131 * @param[in] ps_codec 132 * pointer to codec context 133 * 134 * @return success or failure error code 135 * 136 ****************************************************************************** 137 */ 138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec) 139 { 140 /* choose between ping-pong process buffer set */ 141 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; 142 143 /* entropy ctxt */ 144 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy; 145 146 /* Bitstream structure */ 147 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 148 149 /* sps */ 150 sps_t *ps_sps = NULL; 151 152 /* pps */ 153 pps_t *ps_pps = NULL; 154 155 /* output buff */ 156 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel]; 157 158 159 /********************************************************************/ 160 /* initialize the bit stream buffer */ 161 /********************************************************************/ 162 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize); 163 164 /********************************************************************/ 165 /* BEGIN HEADER GENERATION */ 166 /********************************************************************/ 167 /*ps_codec->i4_pps_id ++;*/ 168 ps_codec->i4_pps_id %= MAX_PPS_CNT; 169 170 /*ps_codec->i4_sps_id ++;*/ 171 ps_codec->i4_sps_id %= MAX_SPS_CNT; 172 173 /* populate sps header */ 174 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id; 175 ih264e_populate_sps(ps_codec, ps_sps); 176 177 /* populate pps header */ 178 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id; 179 ih264e_populate_pps(ps_codec, ps_pps); 180 181 ps_entropy->i4_error_code = IH264E_SUCCESS; 182 183 /* generate sps */ 184 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, 185 &ps_codec->s_cfg.s_vui); 186 187 /* generate pps */ 188 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 189 190 /* queue output buffer */ 191 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset; 192 193 return ps_entropy->i4_error_code; 194 } 195 196 /** 197 ******************************************************************************* 198 * 199 * @brief initialize entropy context. 200 * 201 * @par Description: 202 * Before invoking the call to perform to entropy coding the entropy context 203 * associated with the job needs to be initialized. This involves the start 204 * mb address, end mb address, slice index and the pointer to location at 205 * which the mb residue info and mb header info are packed. 206 * 207 * @param[in] ps_proc 208 * Pointer to the current process context 209 * 210 * @returns error status 211 * 212 * @remarks none 213 * 214 ******************************************************************************* 215 */ 216 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc) 217 { 218 /* codec context */ 219 codec_t *ps_codec = ps_proc->ps_codec; 220 221 /* entropy ctxt */ 222 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 223 224 /* start address */ 225 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x; 226 227 /* end address */ 228 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt; 229 230 /* slice index */ 231 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add]; 232 233 /* sof */ 234 /* @ start of frame or start of a new slice, set sof flag */ 235 if (ps_entropy->i4_mb_start_add == 0) 236 { 237 ps_entropy->i4_sof = 1; 238 } 239 240 if (ps_entropy->i4_mb_x == 0) 241 { 242 /* packed mb coeff data */ 243 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 244 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 245 246 /* packed mb header data */ 247 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 248 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 249 } 250 251 return IH264E_SUCCESS; 252 } 253 254 /** 255 ******************************************************************************* 256 * 257 * @brief entry point for entropy coding 258 * 259 * @par Description 260 * This function calls lower level functions to perform entropy coding for a 261 * group (n rows) of mb's. After encoding 1 row of mb's, the function takes 262 * back the control, updates the ctxt and calls lower level functions again. 263 * This process is repeated till all the rows or group of mb's (which ever is 264 * minimum) are coded 265 * 266 * @param[in] ps_proc 267 * process context 268 * 269 * @returns error status 270 * 271 * @remarks 272 * 273 ******************************************************************************* 274 */ 275 276 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) 277 { 278 /* codec context */ 279 codec_t *ps_codec = ps_proc->ps_codec; 280 281 /* entropy context */ 282 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 283 284 /* cabac context */ 285 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac; 286 287 /* sps */ 288 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT); 289 290 /* pps */ 291 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT); 292 293 /* slice header */ 294 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT); 295 296 /* slice type */ 297 WORD32 i4_slice_type = ps_proc->i4_slice_type; 298 299 /* Bitstream structure */ 300 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 301 302 /* output buff */ 303 out_buf_t s_out_buf; 304 305 /* proc map */ 306 UWORD8 *pu1_proc_map; 307 308 /* entropy map */ 309 UWORD8 *pu1_entropy_map_curr; 310 311 /* proc base idx */ 312 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; 313 314 /* temp var */ 315 WORD32 i4_wd_mbs, i4_ht_mbs; 316 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx; 317 WORD32 bitstream_start_offset, bitstream_end_offset; 318 /********************************************************************/ 319 /* BEGIN INIT */ 320 /********************************************************************/ 321 322 /* entropy encode start address */ 323 u4_mb_idx = ps_entropy->i4_mb_start_add; 324 325 /* entropy encode end address */ 326 u4_mb_end_idx = ps_entropy->i4_mb_end_add; 327 328 /* width in mbs */ 329 i4_wd_mbs = ps_entropy->i4_wd_mbs; 330 331 /* height in mbs */ 332 i4_ht_mbs = ps_entropy->i4_ht_mbs; 333 334 /* total mb cnt */ 335 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs; 336 337 /* proc map */ 338 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 339 340 /* entropy map */ 341 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 342 343 /********************************************************************/ 344 /* @ start of frame / slice, */ 345 /* initialize the output buffer, */ 346 /* initialize the bit stream buffer, */ 347 /* check if sps and pps headers have to be generated, */ 348 /* populate and generate slice header */ 349 /********************************************************************/ 350 if (ps_entropy->i4_sof) 351 { 352 /********************************************************************/ 353 /* initialize the output buffer */ 354 /********************************************************************/ 355 s_out_buf = ps_codec->as_out_buf[ctxt_sel]; 356 357 /* is last frame to encode */ 358 s_out_buf.u4_is_last = ps_entropy->u4_is_last; 359 360 /* frame idx */ 361 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high; 362 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low; 363 364 /********************************************************************/ 365 /* initialize the bit stream buffer */ 366 /********************************************************************/ 367 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize); 368 369 /********************************************************************/ 370 /* BEGIN HEADER GENERATION */ 371 /********************************************************************/ 372 if (1 == ps_entropy->i4_gen_header) 373 { 374 /* generate sps */ 375 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, 376 &ps_codec->s_cfg.s_vui); 377 /* generate pps */ 378 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 379 380 /* reset i4_gen_header */ 381 ps_entropy->i4_gen_header = 0; 382 } 383 384 /* populate slice header */ 385 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); 386 387 /* generate slice header */ 388 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr, 389 ps_pps, ps_sps); 390 391 /* once start of frame / slice is done, you can reset it */ 392 /* it is the responsibility of the caller to set this flag */ 393 ps_entropy->i4_sof = 0; 394 395 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 396 { 397 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 398 BITSTREAM_FLUSH(ps_bitstrm); 399 ih264e_init_cabac_ctxt(ps_entropy); 400 } 401 } 402 403 /* begin entropy coding for the mb set */ 404 while (u4_mb_idx < u4_mb_end_idx) 405 { 406 /* init ptrs/indices */ 407 if (ps_entropy->i4_mb_x == i4_wd_mbs) 408 { 409 ps_entropy->i4_mb_y++; 410 ps_entropy->i4_mb_x = 0; 411 412 /* packed mb coeff data */ 413 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 414 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 415 416 /* packed mb header data */ 417 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 418 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 419 420 /* proc map */ 421 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 422 423 /* entropy map */ 424 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 425 } 426 427 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y); 428 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x); 429 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y); 430 431 /* wait until the curr mb is core coded */ 432 /* The wait for curr mb to be core coded is essential when entropy is launched 433 * as a separate job 434 */ 435 while (1) 436 { 437 volatile UWORD8 *pu1_buf1; 438 WORD32 idx = ps_entropy->i4_mb_x; 439 440 pu1_buf1 = pu1_proc_map + idx; 441 if (*pu1_buf1) 442 break; 443 ithread_yield(); 444 } 445 446 447 /* write mb layer */ 448 ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy); 449 /* Starting bitstream offset for header in bits */ 450 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm); 451 452 /* set entropy map */ 453 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1; 454 455 u4_mb_idx++; 456 ps_entropy->i4_mb_x++; 457 /* check for eof */ 458 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 459 { 460 if (ps_entropy->i4_mb_x < i4_wd_mbs) 461 { 462 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 463 } 464 } 465 466 if (ps_entropy->i4_mb_x == i4_wd_mbs) 467 { 468 /* if slices are enabled */ 469 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) 470 { 471 /* current slice index */ 472 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx; 473 474 /* slice map */ 475 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx; 476 477 /* No need to open a slice at end of frame. The current slice can be closed at the time 478 * of signaling eof flag. 479 */ 480 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx 481 != pu1_slice_idx[u4_mb_idx])) 482 { 483 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 484 { /* mb skip run */ 485 if ((i4_slice_type != ISLICE) 486 && *ps_entropy->pi4_mb_skip_run) 487 { 488 if (*ps_entropy->pi4_mb_skip_run) 489 { 490 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); 491 *ps_entropy->pi4_mb_skip_run = 0; 492 } 493 } 494 /* put rbsp trailing bits for the previous slice */ 495 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 496 } 497 else 498 { 499 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 500 } 501 502 /* update slice header pointer */ 503 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx]; 504 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx; 505 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT); 506 507 /* populate slice header */ 508 ps_entropy->i4_mb_start_add = u4_mb_idx; 509 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, 510 ps_sps); 511 512 /* generate slice header */ 513 ps_entropy->i4_error_code |= ih264e_generate_slice_header( 514 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps); 515 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 516 { 517 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 518 BITSTREAM_FLUSH(ps_bitstrm); 519 ih264e_init_cabac_ctxt(ps_entropy); 520 } 521 } 522 else 523 { 524 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 525 && u4_mb_idx != u4_mb_cnt) 526 { 527 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 528 } 529 } 530 } 531 /* Dont execute any further instructions until store synchronization took place */ 532 DATA_SYNC(); 533 } 534 535 /* Ending bitstream offset for header in bits */ 536 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm); 537 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] += 538 bitstream_end_offset - bitstream_start_offset; 539 } 540 541 /* check for eof */ 542 if (u4_mb_idx == u4_mb_cnt) 543 { 544 /* set end of frame flag */ 545 ps_entropy->i4_eof = 1; 546 } 547 else 548 { 549 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 550 && ps_codec->s_cfg.e_slice_mode 551 != IVE_SLICE_MODE_BLOCKS) 552 { 553 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 554 } 555 } 556 557 if (ps_entropy->i4_eof) 558 { 559 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 560 { 561 /* mb skip run */ 562 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) 563 { 564 if (*ps_entropy->pi4_mb_skip_run) 565 { 566 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, 567 ps_entropy->i4_error_code, "mb skip run"); 568 *ps_entropy->pi4_mb_skip_run = 0; 569 } 570 } 571 /* put rbsp trailing bits */ 572 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 573 } 574 else 575 { 576 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 577 } 578 579 /* update current frame stats to rc library */ 580 { 581 /* number of bytes to stuff */ 582 WORD32 i4_stuff_bytes; 583 584 /* update */ 585 i4_stuff_bytes = ih264e_update_rc_post_enc( 586 ps_codec, ctxt_sel, 587 (ps_proc->ps_codec->i4_poc == 0)); 588 589 /* cbr rc - house keeping */ 590 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) 591 { 592 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0; 593 } 594 else if (i4_stuff_bytes) 595 { 596 /* add filler nal units */ 597 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes); 598 } 599 } 600 601 /* 602 *Frame number is to be incremented only if the current frame is a 603 * reference frame. After each successful frame encode, we increment 604 * frame number by 1 605 */ 606 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] 607 && ps_codec->u4_is_curr_frm_ref) 608 { 609 ps_codec->i4_frame_num++; 610 } 611 /********************************************************************/ 612 /* signal the output */ 613 /********************************************************************/ 614 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 615 ps_entropy->ps_bitstrm->u4_strm_buf_offset; 616 617 DEBUG("entropy status %x", ps_entropy->i4_error_code); 618 } 619 620 /* allow threads to dequeue entropy jobs */ 621 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; 622 623 return ps_entropy->i4_error_code; 624 } 625 626 /** 627 ******************************************************************************* 628 * 629 * @brief Packs header information of a mb in to a buffer 630 * 631 * @par Description: 632 * After the deciding the mode info of a macroblock, the syntax elements 633 * associated with the mb are packed and stored. The entropy thread unpacks 634 * this buffer and generates the end bit stream. 635 * 636 * @param[in] ps_proc 637 * Pointer to the current process context 638 * 639 * @returns error status 640 * 641 * @remarks none 642 * 643 ******************************************************************************* 644 */ 645 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc) 646 { 647 /* curr mb type */ 648 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 649 650 /* pack mb syntax layer of curr mb (used for entropy coding) */ 651 if (u4_mb_type == I4x4) 652 { 653 /* pointer to mb header storage space */ 654 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 655 656 /* temp var */ 657 WORD32 i4, byte; 658 659 /* mb type plus mode */ 660 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type; 661 662 /* cbp */ 663 *pu1_ptr++ = ps_proc->u4_cbp; 664 665 /* mb qp delta */ 666 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 667 668 /* sub mb modes */ 669 for (i4 = 0; i4 < 16; i4 ++) 670 { 671 byte = 0; 672 673 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 674 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 675 { 676 byte |= 1; 677 } 678 else 679 { 680 681 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 682 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 683 { 684 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1); 685 } 686 else 687 { 688 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1; 689 } 690 } 691 692 i4++; 693 694 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 695 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 696 { 697 byte |= 16; 698 } 699 else 700 { 701 702 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 703 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 704 { 705 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5); 706 } 707 else 708 { 709 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5; 710 } 711 } 712 713 *pu1_ptr++ = byte; 714 } 715 716 /* end of mb layer */ 717 ps_proc->pv_mb_header_data = pu1_ptr; 718 } 719 else if (u4_mb_type == I16x16) 720 { 721 /* pointer to mb header storage space */ 722 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 723 724 /* mb type plus mode */ 725 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type; 726 727 /* cbp */ 728 *pu1_ptr++ = ps_proc->u4_cbp; 729 730 /* mb qp delta */ 731 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 732 733 /* end of mb layer */ 734 ps_proc->pv_mb_header_data = pu1_ptr; 735 } 736 else if (u4_mb_type == P16x16) 737 { 738 /* pointer to mb header storage space */ 739 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 740 741 WORD16 *i2_mv_ptr; 742 743 /* mb type plus mode */ 744 *pu1_ptr++ = u4_mb_type; 745 746 /* cbp */ 747 *pu1_ptr++ = ps_proc->u4_cbp; 748 749 /* mb qp delta */ 750 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 751 752 i2_mv_ptr = (WORD16 *)pu1_ptr; 753 754 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 755 756 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 757 758 /* end of mb layer */ 759 ps_proc->pv_mb_header_data = i2_mv_ptr; 760 } 761 else if (u4_mb_type == PSKIP) 762 { 763 /* pointer to mb header storage space */ 764 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 765 766 /* mb type plus mode */ 767 *pu1_ptr++ = u4_mb_type; 768 769 /* end of mb layer */ 770 ps_proc->pv_mb_header_data = pu1_ptr; 771 } 772 else if(u4_mb_type == B16x16) 773 { 774 775 /* pointer to mb header storage space */ 776 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 777 778 WORD16 *i2_mv_ptr; 779 780 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 781 782 /* mb type plus mode */ 783 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; 784 785 /* cbp */ 786 *pu1_ptr++ = ps_proc->u4_cbp; 787 788 /* mb qp delta */ 789 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 790 791 /* l0 & l1 me data */ 792 i2_mv_ptr = (WORD16 *)pu1_ptr; 793 794 if (u4_pred_mode != PRED_L1) 795 { 796 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx 797 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 798 799 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy 800 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 801 } 802 if (u4_pred_mode != PRED_L0) 803 { 804 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx 805 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx; 806 807 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy 808 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy; 809 } 810 811 /* end of mb layer */ 812 ps_proc->pv_mb_header_data = i2_mv_ptr; 813 814 } 815 else if(u4_mb_type == BDIRECT) 816 { 817 /* pointer to mb header storage space */ 818 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 819 820 /* mb type plus mode */ 821 *pu1_ptr++ = u4_mb_type; 822 823 /* cbp */ 824 *pu1_ptr++ = ps_proc->u4_cbp; 825 826 /* mb qp delta */ 827 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 828 829 ps_proc->pv_mb_header_data = pu1_ptr; 830 831 } 832 else if(u4_mb_type == BSKIP) 833 { 834 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 835 836 /* pointer to mb header storage space */ 837 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 838 839 /* mb type plus mode */ 840 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; 841 842 /* end of mb layer */ 843 ps_proc->pv_mb_header_data = pu1_ptr; 844 } 845 846 return IH264E_SUCCESS; 847 } 848 849 /** 850 ******************************************************************************* 851 * 852 * @brief update process context after encoding an mb. This involves preserving 853 * the current mb information for later use, initialize the proc ctxt elements to 854 * encode next mb. 855 * 856 * @par Description: 857 * This function performs house keeping tasks after encoding an mb. 858 * After encoding an mb, various elements of the process context needs to be 859 * updated to encode the next mb. For instance, the source, recon and reference 860 * pointers, mb indices have to be adjusted to the next mb. The slice index of 861 * the current mb needs to be updated. If mb qp modulation is enabled, then if 862 * the qp changes the quant param structure needs to be updated. Also to encoding 863 * the next mb, the current mb info is used as part of mode prediction or mv 864 * prediction. Hence the current mb info has to preserved at top/top left/left 865 * locations. 866 * 867 * @param[in] ps_proc 868 * Pointer to the current process context 869 * 870 * @returns none 871 * 872 * @remarks none 873 * 874 ******************************************************************************* 875 */ 876 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) 877 { 878 /* error status */ 879 WORD32 error_status = IH264_SUCCESS; 880 881 /* codec context */ 882 codec_t *ps_codec = ps_proc->ps_codec; 883 884 /* curr mb indices */ 885 WORD32 i4_mb_x = ps_proc->i4_mb_x; 886 WORD32 i4_mb_y = ps_proc->i4_mb_y; 887 888 /* mb syntax elements of neighbors */ 889 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele; 890 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x; 891 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele; 892 893 /* curr mb type */ 894 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 895 896 /* curr mb type */ 897 UWORD32 u4_is_intra = ps_proc->u4_is_intra; 898 899 /* width in mbs */ 900 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 901 902 /*height in mbs*/ 903 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs; 904 905 /* proc map */ 906 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs); 907 908 /* deblk context */ 909 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 910 911 /* deblk bs context */ 912 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 913 914 /* top row motion vector info */ 915 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x; 916 917 /* top left mb motion vector */ 918 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu; 919 920 /* left mb motion vector */ 921 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu; 922 923 /* sub mb modes */ 924 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4); 925 926 /*************************************************************/ 927 /* During MV prediction, when top right mb is not available, */ 928 /* top left mb info. is used for prediction. Hence the curr */ 929 /* top, which will be top left for the next mb needs to be */ 930 /* preserved before updating it with curr mb info. */ 931 /*************************************************************/ 932 933 /* mb type, mb class, csbp */ 934 *ps_top_left_syn = *ps_top_syn; 935 936 if (ps_proc->i4_slice_type != ISLICE) 937 { 938 /*****************************************/ 939 /* update top left with top info results */ 940 /*****************************************/ 941 /* mv */ 942 *ps_top_left_mb_pu = *ps_top_row_pu; 943 } 944 945 /*************************************************/ 946 /* update top and left with curr mb info results */ 947 /*************************************************/ 948 949 /* mb type */ 950 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type; 951 952 /* mb class */ 953 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra; 954 955 /* csbp */ 956 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp; 957 958 /* distortion */ 959 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion; 960 961 if (u4_is_intra) 962 { 963 /* mb / sub mb modes */ 964 if (I16x16 == u4_mb_type) 965 { 966 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode; 967 } 968 else if (I4x4 == u4_mb_type) 969 { 970 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 971 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 972 } 973 else if (I8x8 == u4_mb_type) 974 { 975 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 976 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 977 } 978 979 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE)) 980 { 981 /* mv */ 982 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 983 } 984 985 *ps_proc->pu4_mb_pu_cnt = 1; 986 } 987 else 988 { 989 /* mv */ 990 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 991 } 992 993 /* 994 * Mark that the MB has been coded intra 995 * So that future AIRs can skip it 996 */ 997 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra; 998 999 /**************************************************/ 1000 /* pack mb header info. for entropy coding */ 1001 /**************************************************/ 1002 ih264e_pack_header_data(ps_proc); 1003 1004 /* update previous mb qp */ 1005 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp; 1006 1007 /* store qp */ 1008 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1009 1010 /* 1011 * We need to sync the cache to make sure that the nmv content of proc 1012 * is updated to cache properly 1013 */ 1014 DATA_SYNC(); 1015 1016 /* Just before finishing the row, enqueue the job in to entropy queue. 1017 * The master thread depending on its convenience shall dequeue it and 1018 * performs entropy. 1019 * 1020 * WARN !! Placing this block post proc map update can cause queuing of 1021 * entropy jobs in out of order. 1022 */ 1023 if (i4_mb_x == i4_wd_mbs - 1) 1024 { 1025 /* job structures */ 1026 job_t s_job; 1027 1028 /* job class */ 1029 s_job.i4_cmd = CMD_ENTROPY; 1030 1031 /* number of mbs to be processed in the current job */ 1032 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs; 1033 1034 /* job start index x */ 1035 s_job.i2_mb_x = 0; 1036 1037 /* job start index y */ 1038 s_job.i2_mb_y = ps_proc->i4_mb_y; 1039 1040 /* proc base idx */ 1041 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0; 1042 1043 /* queue the job */ 1044 error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1); 1045 1046 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1)) 1047 ih264_list_terminate(ps_codec->pv_entropy_jobq); 1048 } 1049 1050 /* update proc map */ 1051 pu1_proc_map[i4_mb_x] = 1; 1052 1053 /**************************************************/ 1054 /* update proc ctxt elements for encoding next mb */ 1055 /**************************************************/ 1056 /* update indices */ 1057 i4_mb_x ++; 1058 ps_proc->i4_mb_x = i4_mb_x; 1059 1060 if (ps_proc->i4_mb_x == i4_wd_mbs) 1061 { 1062 ps_proc->i4_mb_y++; 1063 ps_proc->i4_mb_x = 0; 1064 } 1065 1066 /* update slice index */ 1067 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x]; 1068 1069 /* update buffers pointers */ 1070 ps_proc->pu1_src_buf_luma += MB_SIZE; 1071 ps_proc->pu1_rec_buf_luma += MB_SIZE; 1072 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE; 1073 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE; 1074 1075 /* 1076 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1077 * the stride per MB is MB_SIZE 1078 */ 1079 ps_proc->pu1_src_buf_chroma += MB_SIZE; 1080 ps_proc->pu1_rec_buf_chroma += MB_SIZE; 1081 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE; 1082 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE; 1083 1084 1085 1086 /* Reset cost, distortion params */ 1087 ps_proc->i4_mb_cost = INT_MAX; 1088 ps_proc->i4_mb_distortion = SHRT_MAX; 1089 1090 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt; 1091 1092 ps_proc->pu4_mb_pu_cnt += 1; 1093 1094 /* Update colocated pu */ 1095 if (ps_proc->i4_slice_type == BSLICE) 1096 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x); 1097 1098 /* deblk ctxts */ 1099 if (ps_proc->u4_disable_deblock_level != 1) 1100 { 1101 /* indices */ 1102 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1103 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1104 1105 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */ 1106 ps_deblk->i4_mb_x ++; 1107 1108 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1109 /* 1110 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1111 * the stride per MB is MB_SIZE 1112 */ 1113 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1114 #endif 1115 } 1116 1117 return error_status; 1118 } 1119 1120 /** 1121 ******************************************************************************* 1122 * 1123 * @brief initialize process context. 1124 * 1125 * @par Description: 1126 * Before dispatching the current job to process thread, the process context 1127 * associated with the job is initialized. Usually every job aims to encode one 1128 * row of mb's. Basing on the row indices provided by the job, the process 1129 * context's buffer ptrs, slice indices and other elements that are necessary 1130 * during core-coding are initialized. 1131 * 1132 * @param[in] ps_proc 1133 * Pointer to the current process context 1134 * 1135 * @returns error status 1136 * 1137 * @remarks none 1138 * 1139 ******************************************************************************* 1140 */ 1141 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) 1142 { 1143 /* codec context */ 1144 codec_t *ps_codec = ps_proc->ps_codec; 1145 1146 /* nmb processing context*/ 1147 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1148 1149 /* indices */ 1150 WORD32 i4_mb_x, i4_mb_y; 1151 1152 /* strides */ 1153 WORD32 i4_src_strd = ps_proc->i4_src_strd; 1154 WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd; 1155 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1156 1157 /* quant params */ 1158 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 1159 1160 /* deblk ctxt */ 1161 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1162 1163 /* deblk bs context */ 1164 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 1165 1166 /* Pointer to mv_buffer of current frame */ 1167 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 1168 1169 /* Pointers for color space conversion */ 1170 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base; 1171 1172 /* Pad the MB to support non standard sizes */ 1173 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd; 1174 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht; 1175 UWORD16 u2_num_rows = MB_SIZE; 1176 WORD32 convert_uv_only; 1177 1178 /********************************************************************/ 1179 /* BEGIN INIT */ 1180 /********************************************************************/ 1181 1182 i4_mb_x = ps_proc->i4_mb_x; 1183 i4_mb_y = ps_proc->i4_mb_y; 1184 1185 /* Number of mbs processed in one loop of process function */ 1186 ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs; 1187 ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs; 1188 1189 /* init buffer pointers */ 1190 convert_uv_only = 1; 1191 if (u4_pad_bottom_sz || u4_pad_right_sz || 1192 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE) 1193 { 1194 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1195 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz; 1196 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base; 1197 i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd; 1198 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE); 1199 convert_uv_only = 0; 1200 } 1201 else 1202 { 1203 i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; 1204 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE); 1205 } 1206 1207 1208 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE || 1209 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P || 1210 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || 1211 u4_pad_bottom_sz || u4_pad_right_sz) 1212 { 1213 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) || 1214 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU)) 1215 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base; 1216 1217 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE); 1218 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd; 1219 } 1220 else 1221 { 1222 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; 1223 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE); 1224 } 1225 1226 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1227 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1228 1229 /* Tempral back and forward reference buffer */ 1230 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1231 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1232 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1233 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1234 1235 /* 1236 * Do color space conversion 1237 * NOTE : We assume there that the number of MB's to process will not span multiple rows 1238 */ 1239 switch (ps_codec->s_cfg.e_inp_color_fmt) 1240 { 1241 case IV_YUV_420SP_UV: 1242 case IV_YUV_420SP_VU: 1243 /* In case of 420 semi-planar input, copy last few rows to intermediate 1244 buffer as chroma trans functions access one extra byte due to interleaved input. 1245 This data will be padded if required */ 1246 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz) 1247 { 1248 WORD32 num_rows = MB_SIZE; 1249 UWORD8 *pu1_src; 1250 UWORD8 *pu1_dst; 1251 WORD32 i; 1252 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1253 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1254 1255 pu1_dst = ps_proc->pu1_src_buf_luma; 1256 1257 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */ 1258 if (u4_pad_bottom_sz || u4_pad_right_sz) { 1259 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1260 num_rows = MB_SIZE - u4_pad_bottom_sz; 1261 for (i = 0; i < num_rows; i++) 1262 { 1263 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1264 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; 1265 pu1_dst += ps_proc->i4_src_strd; 1266 } 1267 } 1268 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1269 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1270 pu1_dst = ps_proc->pu1_src_buf_chroma; 1271 1272 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte 1273 * due to interleaved input 1274 */ 1275 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1276 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE); 1277 else 1278 num_rows = BLK8x8SIZE; 1279 for (i = 0; i < num_rows; i++) 1280 { 1281 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1282 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; 1283 pu1_dst += ps_proc->i4_src_chroma_strd; 1284 } 1285 1286 } 1287 break; 1288 1289 case IV_YUV_420P : 1290 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1291 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1292 1293 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1294 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1295 1296 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) + 1297 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE); 1298 1299 ps_codec->pf_ih264e_conv_420p_to_420sp( 1300 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base, 1301 ps_proc->pu1_src_buf_luma, 1302 ps_proc->pu1_src_buf_chroma, u2_num_rows, 1303 ps_codec->s_cfg.u4_disp_wd, 1304 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0], 1305 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1], 1306 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2], 1307 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, 1308 convert_uv_only); 1309 break; 1310 1311 case IV_YUV_422ILE : 1312 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2) 1313 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1314 1315 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp( 1316 ps_proc->pu1_src_buf_luma, 1317 ps_proc->pu1_src_buf_chroma, 1318 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base, 1319 ps_codec->s_cfg.u4_disp_wd, u2_num_rows, 1320 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, 1321 ps_proc->i4_src_chroma_strd, 1322 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1); 1323 break; 1324 1325 default: 1326 break; 1327 } 1328 1329 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0)) 1330 { 1331 UWORD32 u4_pad_wd, u4_pad_ht; 1332 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd); 1333 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd); 1334 u4_pad_ht = MB_SIZE; 1335 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1336 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz)); 1337 1338 ih264_pad_right_luma( 1339 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd, 1340 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd); 1341 1342 ih264_pad_right_chroma( 1343 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd, 1344 ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd); 1345 } 1346 1347 /* pad bottom edge */ 1348 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0) 1349 { 1350 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd, 1351 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz); 1352 1353 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2, 1354 ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2)); 1355 } 1356 1357 1358 /* packed mb coeff data */ 1359 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data; 1360 1361 /* packed mb header data */ 1362 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data; 1363 1364 /* slice index */ 1365 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x]; 1366 1367 /*********************************************************************/ 1368 /* ih264e_init_quant_params() routine is called at the pic init level*/ 1369 /* this would have initialized the qp. */ 1370 /* TODO_LATER: currently it is assumed that quant params donot change*/ 1371 /* across mb's. When they do calculate update ps_qp_params accordingly*/ 1372 /*********************************************************************/ 1373 1374 /* init mv buffer ptr */ 1375 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * 1376 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); 1377 1378 /* Init co-located mv buffer */ 1379 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * 1380 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); 1381 1382 if (i4_mb_y == 0) 1383 { 1384 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu; 1385 } 1386 else 1387 { 1388 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * 1389 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); 1390 } 1391 1392 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs); 1393 1394 /* mb type */ 1395 ps_proc->u4_mb_type = I16x16; 1396 1397 /* lambda */ 1398 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp]; 1399 1400 /* mb distortion */ 1401 ps_proc->i4_mb_distortion = SHRT_MAX; 1402 1403 if (i4_mb_x == 0) 1404 { 1405 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0; 1406 1407 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0; 1408 1409 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0; 1410 1411 if (i4_mb_y == 0) 1412 { 1413 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t)); 1414 } 1415 } 1416 1417 /* mb cost */ 1418 ps_proc->i4_mb_cost = INT_MAX; 1419 1420 /**********************/ 1421 /* init deblk context */ 1422 /**********************/ 1423 ps_deblk->i4_mb_x = ps_proc->i4_mb_x; 1424 /* deblk lags the current mb proc by 1 row */ 1425 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */ 1426 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */ 1427 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */ 1428 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1; 1429 1430 /* buffer ptrs */ 1431 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE); 1432 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE); 1433 1434 /* init deblk bs context */ 1435 /* mb indices */ 1436 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1437 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1438 1439 /* init n_mb_process context */ 1440 ps_n_mb_ctxt->i4_mb_x = 0; 1441 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y; 1442 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy; 1443 1444 return IH264E_SUCCESS; 1445 } 1446 1447 /** 1448 ******************************************************************************* 1449 * 1450 * @brief This function performs luma & chroma padding 1451 * 1452 * @par Description: 1453 * 1454 * @param[in] ps_proc 1455 * Process context corresponding to the job 1456 * 1457 * @param[in] pu1_curr_pic_luma 1458 * Pointer to luma buffer 1459 * 1460 * @param[in] pu1_curr_pic_chroma 1461 * Pointer to chroma buffer 1462 * 1463 * @param[in] i4_mb_x 1464 * mb index x 1465 * 1466 * @param[in] i4_mb_y 1467 * mb index y 1468 * 1469 * @param[in] i4_pad_ht 1470 * number of rows to be padded 1471 * 1472 * @returns error status 1473 * 1474 * @remarks none 1475 * 1476 ******************************************************************************* 1477 */ 1478 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc, 1479 UWORD8 *pu1_curr_pic_luma, 1480 UWORD8 *pu1_curr_pic_chroma, 1481 WORD32 i4_mb_x, 1482 WORD32 i4_mb_y, 1483 WORD32 i4_pad_ht) 1484 { 1485 /* codec context */ 1486 codec_t *ps_codec = ps_proc->ps_codec; 1487 1488 /* strides */ 1489 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1490 1491 if (i4_mb_x == 0) 1492 { 1493 /* padding left luma */ 1494 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT); 1495 1496 /* padding left chroma */ 1497 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT); 1498 } 1499 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1500 { 1501 /* padding right luma */ 1502 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT); 1503 1504 /* padding right chroma */ 1505 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT); 1506 1507 if (i4_mb_y == ps_proc->i4_ht_mbs - 1) 1508 { 1509 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd); 1510 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd); 1511 1512 /* padding bottom luma */ 1513 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT); 1514 1515 /* padding bottom chroma */ 1516 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1517 } 1518 } 1519 1520 if (i4_mb_y == 0) 1521 { 1522 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma; 1523 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma; 1524 WORD32 wd = MB_SIZE; 1525 1526 if (i4_mb_x == 0) 1527 { 1528 pu1_rec_luma -= PAD_LEFT; 1529 pu1_rec_chroma -= PAD_LEFT; 1530 1531 wd += PAD_LEFT; 1532 } 1533 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1534 { 1535 wd += PAD_RIGHT; 1536 } 1537 1538 /* padding top luma */ 1539 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP); 1540 1541 /* padding top chroma */ 1542 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1)); 1543 } 1544 1545 return IH264E_SUCCESS; 1546 } 1547 1548 1549 1550 1551 /** 1552 ******************************************************************************* 1553 * 1554 * @brief This function performs deblocking, padding and halfpel generation for 1555 * 'n' MBs 1556 * 1557 * @par Description: 1558 * 1559 * @param[in] ps_proc 1560 * Process context corresponding to the job 1561 * 1562 * @param[in] pu1_curr_pic_luma 1563 * Current MB being processed(Luma) 1564 * 1565 * @param[in] pu1_curr_pic_chroma 1566 * Current MB being processed(Chroma) 1567 * 1568 * @param[in] i4_mb_x 1569 * Column value of current MB processed 1570 * 1571 * @param[in] i4_mb_y 1572 * Curent row processed 1573 * 1574 * @returns error status 1575 * 1576 * @remarks none 1577 * 1578 ******************************************************************************* 1579 */ 1580 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc, 1581 UWORD8 *pu1_curr_pic_luma, 1582 UWORD8 *pu1_curr_pic_chroma, 1583 WORD32 i4_mb_x, 1584 WORD32 i4_mb_y) 1585 { 1586 /* codec context */ 1587 codec_t *ps_codec = ps_proc->ps_codec; 1588 1589 /* n_mb processing context */ 1590 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1591 1592 /* deblk context */ 1593 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1594 1595 /* strides */ 1596 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1597 1598 /* loop variables */ 1599 WORD32 row, i, j, col; 1600 1601 /* Padding Width */ 1602 UWORD32 u4_pad_wd; 1603 1604 /* deblk_map of the row being deblocked */ 1605 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs; 1606 1607 /* deblk_map_previous row */ 1608 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs; 1609 1610 WORD32 u4_pad_top = 0; 1611 1612 WORD32 u4_deblk_prev_row = 0; 1613 1614 /* Number of mbs to be processed */ 1615 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs; 1616 1617 /* Number of mbs actually processed 1618 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */ 1619 WORD32 i4_n_mb_process_count = 0; 1620 1621 UWORD8 *pu1_pad_bottom_src = NULL; 1622 1623 UWORD8 *pu1_pad_src_luma = NULL; 1624 UWORD8 *pu1_pad_src_chroma = NULL; 1625 1626 if (ps_proc->u4_disable_deblock_level == 1) 1627 { 1628 /* If left most MB is processed, then pad left */ 1629 if (i4_mb_x == 0) 1630 { 1631 /* padding left luma */ 1632 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1633 1634 /* padding left chroma */ 1635 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1636 } 1637 /*last col*/ 1638 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1)) 1639 { 1640 /* padding right luma */ 1641 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1642 1643 /* padding right chroma */ 1644 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1645 } 1646 } 1647 1648 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1))) 1649 { 1650 /* if number of mb's to be processed are less than 'N', go back. 1651 * exception to the above clause is end of row */ 1652 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) ) 1653 { 1654 return IH264E_SUCCESS; 1655 } 1656 else 1657 { 1658 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs); 1659 1660 /* performing deblocking for required number of MBs */ 1661 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1)) 1662 { 1663 u4_deblk_prev_row = 1; 1664 1665 /* checking whether the top rows are deblocked */ 1666 for (col = 0; col < i4_n_mb_process_count; col++) 1667 { 1668 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col]; 1669 } 1670 1671 /* checking whether the top right MB is deblocked */ 1672 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs) 1673 { 1674 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count]; 1675 } 1676 1677 /* Top or Top right MBs not deblocked */ 1678 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0)) 1679 { 1680 return IH264E_SUCCESS; 1681 } 1682 1683 for (row = 0; row < i4_n_mb_process_count; row++) 1684 { 1685 ih264e_deblock_mb(ps_proc, ps_deblk); 1686 1687 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1688 1689 if (ps_deblk->i4_mb_y > 0) 1690 { 1691 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/ 1692 { 1693 /* padding left luma */ 1694 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT); 1695 1696 /* padding left chroma */ 1697 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1698 } 1699 1700 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/ 1701 { 1702 /* padding right luma */ 1703 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1704 1705 /* padding right chroma */ 1706 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1707 } 1708 } 1709 ps_deblk->i4_mb_x++; 1710 1711 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1712 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1713 1714 } 1715 } 1716 else if(i4_mb_y > 0) 1717 { 1718 ps_deblk->i4_mb_x += i4_n_mb_process_count; 1719 1720 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE; 1721 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE; 1722 } 1723 1724 if (i4_mb_y == 2) 1725 { 1726 u4_pad_wd = i4_n_mb_process_count * MB_SIZE; 1727 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE; 1728 1729 if (ps_n_mb_ctxt->i4_mb_x == 0) 1730 { 1731 u4_pad_wd += PAD_LEFT; 1732 u4_pad_top = -PAD_LEFT; 1733 } 1734 1735 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1736 { 1737 u4_pad_wd += PAD_RIGHT; 1738 } 1739 1740 /* padding top luma */ 1741 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP); 1742 1743 /* padding top chroma */ 1744 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1)); 1745 } 1746 1747 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count; 1748 1749 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1750 { 1751 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1752 { 1753 /* Bottom Padding is done in one stretch for the entire width */ 1754 if (ps_proc->u4_disable_deblock_level != 1) 1755 { 1756 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE; 1757 1758 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE; 1759 1760 ps_n_mb_ctxt->i4_mb_x = 0; 1761 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y; 1762 ps_deblk->i4_mb_x = 0; 1763 ps_deblk->i4_mb_y = ps_proc->i4_mb_y; 1764 1765 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */ 1766 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1767 1768 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs; 1769 1770 j = (ps_proc->i4_wd_mbs) / i4_n_mbs; 1771 1772 for (i = 0; i < j; i++) 1773 { 1774 for (col = 0; col < i4_n_mbs; col++) 1775 { 1776 ih264e_deblock_mb(ps_proc, ps_deblk); 1777 1778 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1779 1780 ps_deblk->i4_mb_x++; 1781 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1782 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1783 ps_n_mb_ctxt->i4_mb_x++; 1784 } 1785 } 1786 1787 for (col = 0; col < i4_n_mb_process_count; col++) 1788 { 1789 ih264e_deblock_mb(ps_proc, ps_deblk); 1790 1791 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1792 1793 ps_deblk->i4_mb_x++; 1794 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1795 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1796 ps_n_mb_ctxt->i4_mb_x++; 1797 } 1798 1799 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd; 1800 1801 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd; 1802 1803 /* padding left luma */ 1804 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1805 1806 /* padding left chroma */ 1807 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1808 1809 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1810 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1811 1812 /* padding left luma */ 1813 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1814 1815 /* padding left chroma */ 1816 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1817 1818 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1819 1820 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1821 1822 /* padding right luma */ 1823 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1824 1825 /* padding right chroma */ 1826 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1827 1828 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1829 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1830 1831 /* padding right luma */ 1832 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1833 1834 /* padding right chroma */ 1835 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1836 1837 } 1838 1839 /* In case height is less than 2 MBs pad top */ 1840 if (ps_proc->i4_ht_mbs <= 2) 1841 { 1842 UWORD8 *pu1_pad_top_src; 1843 /* padding top luma */ 1844 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT; 1845 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP); 1846 1847 /* padding top chroma */ 1848 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT; 1849 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1)); 1850 } 1851 1852 /* padding bottom luma */ 1853 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT; 1854 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT); 1855 1856 /* padding bottom chroma */ 1857 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT; 1858 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1859 } 1860 } 1861 } 1862 } 1863 1864 return IH264E_SUCCESS; 1865 } 1866 1867 1868 /** 1869 ******************************************************************************* 1870 * 1871 * @brief This function performs luma & chroma core coding for a set of mb's. 1872 * 1873 * @par Description: 1874 * The mb to be coded is taken and is evaluated over a predefined set of modes 1875 * (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost 1876 * is selected and using intra/inter prediction filters, prediction is carried out. 1877 * The deviation between src and pred signal constitutes error signal. This error 1878 * signal is transformed (hierarchical transform if necessary) and quantized. The 1879 * quantized residue is packed in to entropy buffer for entropy coding. This is 1880 * repeated for all the mb's enlisted under the job. 1881 * 1882 * @param[in] ps_proc 1883 * Process context corresponding to the job 1884 * 1885 * @returns error status 1886 * 1887 * @remarks none 1888 * 1889 ******************************************************************************* 1890 */ 1891 WORD32 ih264e_process(process_ctxt_t *ps_proc) 1892 { 1893 /* error status */ 1894 WORD32 error_status = IH264_SUCCESS; 1895 1896 /* codec context */ 1897 codec_t *ps_codec = ps_proc->ps_codec; 1898 1899 /* cbp luma, chroma */ 1900 UWORD32 u4_cbp_l, u4_cbp_c; 1901 1902 /* width in mbs */ 1903 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 1904 1905 /* loop var */ 1906 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt; 1907 1908 /* valid modes */ 1909 UWORD32 u4_valid_modes = 0; 1910 1911 /* gate threshold */ 1912 WORD32 i4_gate_threshold = 0; 1913 1914 /* is intra */ 1915 WORD32 luma_idx, chroma_idx, is_intra; 1916 1917 /* temp variables */ 1918 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; 1919 1920 /* 1921 * list of modes for evaluation 1922 * ------------------------------------------------------------------------- 1923 * Note on enabling I4x4 and I16x16 1924 * At very low QP's the hadamard transform in I16x16 will push up the maximum 1925 * coeff value very high. CAVLC may not be able to represent the value and 1926 * hence the stream may not be decodable in some clips. 1927 * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset. 1928 */ 1929 if (ps_proc->i4_slice_type == ISLICE) 1930 { 1931 if (ps_proc->u4_frame_qp > 10) 1932 { 1933 /* enable intra 16x16 */ 1934 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1935 1936 /* enable intra 8x8 */ 1937 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0; 1938 } 1939 1940 /* enable intra 4x4 */ 1941 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1942 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; 1943 1944 } 1945 else if (ps_proc->i4_slice_type == PSLICE) 1946 { 1947 if (ps_proc->u4_frame_qp > 10) 1948 { 1949 /* enable intra 16x16 */ 1950 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1951 } 1952 1953 /* enable intra 4x4 */ 1954 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1955 { 1956 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1957 } 1958 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; 1959 1960 /* enable inter P16x16 */ 1961 u4_valid_modes |= (1 << P16x16); 1962 } 1963 else if (ps_proc->i4_slice_type == BSLICE) 1964 { 1965 if (ps_proc->u4_frame_qp > 10) 1966 { 1967 /* enable intra 16x16 */ 1968 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1969 } 1970 1971 /* enable intra 4x4 */ 1972 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1973 { 1974 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1975 } 1976 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; 1977 1978 /* enable inter B16x16 */ 1979 u4_valid_modes |= (1 << B16x16); 1980 } 1981 1982 1983 /* init entropy */ 1984 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x; 1985 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y; 1986 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x); 1987 1988 /* compute recon when : 1989 * 1. current frame is to be used as a reference 1990 * 2. dump recon for bit stream sanity check 1991 */ 1992 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref || 1993 ps_codec->s_cfg.u4_enable_recon; 1994 1995 /* Encode 'n' macroblocks, 1996 * 'n' being the number of mbs dictated by current proc ctxt */ 1997 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++) 1998 { 1999 /* since we have not yet found sad, we have not yet got min sad */ 2000 /* we need to initialize these variables for each MB */ 2001 /* TODO how to get the min sad into the codec */ 2002 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad; 2003 ps_proc->u4_min_sad_reached = 0; 2004 2005 /* mb analysis */ 2006 { 2007 /* temp var */ 2008 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs; 2009 2010 /* force intra refresh ? */ 2011 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) || 2012 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt); 2013 2014 /* evaluate inter 16x16 modes */ 2015 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16))) 2016 { 2017 /* compute nmb me */ 2018 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0) 2019 { 2020 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me, 2021 i4_wd_mbs - ps_proc->i4_mb_x)); 2022 } 2023 2024 /* set pointers to ME data appropriately for other modules to use */ 2025 { 2026 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ; 2027 2028 /* get the min sad condition for current mb */ 2029 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 2030 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 2031 2032 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]); 2033 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl); 2034 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]); 2035 2036 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion; 2037 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost; 2038 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 2039 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 2040 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type; 2041 2042 /* get the best sub pel buffer */ 2043 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf; 2044 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd; 2045 } 2046 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2047 } 2048 else 2049 { 2050 /* Derive neighbor availability for the current macroblock */ 2051 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl; 2052 2053 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2054 } 2055 2056 /* 2057 * If air says intra, we need to force the following code path to evaluate intra 2058 * The easy way is just to say that the inter cost is too much 2059 */ 2060 if (!i4_air_enable_inter) 2061 { 2062 ps_proc->u4_min_sad_reached = 0; 2063 ps_proc->i4_mb_cost = INT_MAX; 2064 ps_proc->i4_mb_distortion = INT_MAX; 2065 } 2066 else if (ps_proc->u4_mb_type == PSKIP) 2067 { 2068 goto UPDATE_MB_INFO; 2069 } 2070 2071 /* wait until the proc of [top + 1] mb is computed. 2072 * We wait till the proc dependencies are satisfied */ 2073 if(ps_proc->i4_mb_y > 0) 2074 { 2075 /* proc map */ 2076 UWORD8 *pu1_proc_map_top; 2077 2078 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs); 2079 2080 while (1) 2081 { 2082 volatile UWORD8 *pu1_buf; 2083 WORD32 idx = i4_mb_idx + 1; 2084 2085 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1)); 2086 pu1_buf = pu1_proc_map_top + idx; 2087 if(*pu1_buf) 2088 break; 2089 ithread_yield(); 2090 } 2091 } 2092 2093 /* If we already have the minimum sad, there is no point in searching for sad again */ 2094 if (ps_proc->u4_min_sad_reached == 0) 2095 { 2096 /* intra gating in inter slices */ 2097 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/ 2098 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate) 2099 { 2100 /* distortion of neighboring blocks */ 2101 WORD32 i4_distortion[4]; 2102 2103 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion; 2104 2105 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion; 2106 2107 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion; 2108 2109 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion; 2110 2111 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2; 2112 2113 } 2114 2115 2116 /* If we are going to force intra we need to evaluate intra irrespective of gating */ 2117 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion)) 2118 { 2119 /* evaluate intra 4x4 modes */ 2120 if (u4_valid_modes & (1 << I4x4)) 2121 { 2122 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 2123 { 2124 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc); 2125 } 2126 else 2127 { 2128 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc); 2129 } 2130 } 2131 2132 /* evaluate intra 16x16 modes */ 2133 if (u4_valid_modes & (1 << I16x16)) 2134 { 2135 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc); 2136 } 2137 2138 /* evaluate intra 8x8 modes */ 2139 if (u4_valid_modes & (1 << I8x8)) 2140 { 2141 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2142 } 2143 2144 } 2145 } 2146 } 2147 2148 /* is intra */ 2149 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8) 2150 { 2151 luma_idx = ps_proc->u4_mb_type; 2152 chroma_idx = 0; 2153 is_intra = 1; 2154 2155 /* evaluate chroma blocks for intra */ 2156 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2157 } 2158 else 2159 { 2160 luma_idx = 3; 2161 chroma_idx = 1; 2162 is_intra = 0; 2163 } 2164 ps_proc->u4_is_intra = is_intra; 2165 ps_proc->ps_pu->b1_intra_flag = is_intra; 2166 2167 /* redo MV pred of neighbors in the case intra mb */ 2168 /* TODO : currently called unconditionally, needs to be called only in the case of intra 2169 * to modify neighbors */ 2170 if (ps_proc->i4_slice_type != ISLICE) 2171 { 2172 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type); 2173 } 2174 2175 /* Perform luma mb core coding */ 2176 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc); 2177 2178 /* Perform luma mb core coding */ 2179 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc); 2180 2181 /* coded block pattern */ 2182 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; 2183 2184 if (!ps_proc->u4_is_intra) 2185 { 2186 if (ps_proc->i4_slice_type == BSLICE) 2187 { 2188 if (ih264e_find_bskip_params(ps_proc, PRED_L0)) 2189 { 2190 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP; 2191 } 2192 } 2193 else if(!ps_proc->u4_cbp) 2194 { 2195 if (ih264e_find_pskip_params(ps_proc, PRED_L0)) 2196 { 2197 ps_proc->u4_mb_type = PSKIP; 2198 } 2199 } 2200 } 2201 2202 UPDATE_MB_INFO: 2203 2204 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */ 2205 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc); 2206 2207 /**********************************************************************/ 2208 /* if disable deblock level is '0' this implies enable deblocking for */ 2209 /* all edges of all macroblocks with out any restrictions */ 2210 /* */ 2211 /* if disable deblock level is '1' this implies disable deblocking for*/ 2212 /* all edges of all macroblocks with out any restrictions */ 2213 /* */ 2214 /* if disable deblock level is '2' this implies enable deblocking for */ 2215 /* all edges of all macroblocks except edges overlapping with slice */ 2216 /* boundaries. This option is not currently supported by the encoder */ 2217 /* hence the slice map should be of no significance to perform debloc */ 2218 /* king */ 2219 /**********************************************************************/ 2220 2221 if (ps_proc->u4_compute_recon) 2222 { 2223 /* deblk context */ 2224 /* src pointers */ 2225 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma; 2226 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma; 2227 2228 /* src indices */ 2229 UWORD32 i4_mb_x = ps_proc->i4_mb_x; 2230 UWORD32 i4_mb_y = ps_proc->i4_mb_y; 2231 2232 /* compute blocking strength */ 2233 if (ps_proc->u4_disable_deblock_level != 1) 2234 { 2235 ih264e_compute_bs(ps_proc); 2236 } 2237 2238 /* nmb deblocking and hpel and padding */ 2239 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma, 2240 pu1_cur_pic_chroma, i4_mb_x, 2241 i4_mb_y); 2242 } 2243 2244 /* update the context after for coding next mb */ 2245 error_status |= ih264e_update_proc_ctxt(ps_proc); 2246 2247 /* Once the last row is processed, mark the buffer status appropriately */ 2248 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y) 2249 { 2250 /* Pointer to current picture buffer structure */ 2251 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic; 2252 2253 /* Pointer to current picture's mv buffer structure */ 2254 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 2255 2256 /**********************************************************************/ 2257 /* if disable deblock level is '0' this implies enable deblocking for */ 2258 /* all edges of all macroblocks with out any restrictions */ 2259 /* */ 2260 /* if disable deblock level is '1' this implies disable deblocking for*/ 2261 /* all edges of all macroblocks with out any restrictions */ 2262 /* */ 2263 /* if disable deblock level is '2' this implies enable deblocking for */ 2264 /* all edges of all macroblocks except edges overlapping with slice */ 2265 /* boundaries. This option is not currently supported by the encoder */ 2266 /* hence the slice map should be of no significance to perform debloc */ 2267 /* king */ 2268 /**********************************************************************/ 2269 error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC); 2270 2271 error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC); 2272 2273 if (ps_codec->s_cfg.u4_enable_recon) 2274 { 2275 /* pic cnt */ 2276 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt; 2277 2278 /* rec buffers */ 2279 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic; 2280 2281 /* is last? */ 2282 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last; 2283 2284 /* frame time stamp */ 2285 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high; 2286 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low; 2287 } 2288 2289 } 2290 } 2291 2292 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y); 2293 2294 return error_status; 2295 } 2296 2297 /** 2298 ******************************************************************************* 2299 * 2300 * @brief 2301 * Function to update rc context after encoding 2302 * 2303 * @par Description 2304 * This function updates the rate control context after the frame is encoded. 2305 * Number of bits consumed by the current frame, frame distortion, frame cost, 2306 * number of intra/inter mb's, ... are passed on to rate control context for 2307 * updating the rc model. 2308 * 2309 * @param[in] ps_codec 2310 * Handle to codec context 2311 * 2312 * @param[in] ctxt_sel 2313 * frame context selector 2314 * 2315 * @param[in] pic_cnt 2316 * pic count 2317 * 2318 * @returns i4_stuffing_byte 2319 * number of stuffing bytes (if necessary) 2320 * 2321 * @remarks 2322 * 2323 ******************************************************************************* 2324 */ 2325 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm) 2326 { 2327 /* proc set base idx */ 2328 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; 2329 2330 /* proc ctxt */ 2331 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base]; 2332 2333 /* frame qp */ 2334 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp; 2335 2336 /* cbr rc return status */ 2337 WORD32 i4_stuffing_byte = 0; 2338 2339 /* current frame stats */ 2340 frame_info_t s_frame_info; 2341 picture_type_e rc_pic_type; 2342 2343 /* temp var */ 2344 WORD32 i, j; 2345 2346 /********************************************************************/ 2347 /* BEGIN INIT */ 2348 /********************************************************************/ 2349 2350 /* init frame info */ 2351 irc_init_frame_info(&s_frame_info); 2352 2353 /* get frame info */ 2354 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++) 2355 { 2356 /*****************************************************************/ 2357 /* One frame can be encoded by max of u4_num_cores threads */ 2358 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */ 2359 /* u4_num_cores threads */ 2360 /*****************************************************************/ 2361 for (j = 0; j< MAX_MB_TYPE; j++) 2362 { 2363 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j]; 2364 2365 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j]; 2366 2367 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j]; 2368 } 2369 2370 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum; 2371 2372 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum; 2373 2374 /*****************************************************************/ 2375 /* gather number of residue and header bits consumed by the frame*/ 2376 /*****************************************************************/ 2377 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy); 2378 } 2379 2380 /* get pic type */ 2381 switch (ps_codec->pic_type) 2382 { 2383 case PIC_I: 2384 case PIC_IDR: 2385 rc_pic_type = I_PIC; 2386 break; 2387 case PIC_P: 2388 rc_pic_type = P_PIC; 2389 break; 2390 case PIC_B: 2391 rc_pic_type = B_PIC; 2392 break; 2393 default: 2394 assert(0); 2395 break; 2396 } 2397 2398 /* update rc lib with current frame stats */ 2399 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api, 2400 &(s_frame_info), 2401 ps_codec->s_rate_control.pps_pd_frm_rate, 2402 ps_codec->s_rate_control.pps_time_stamp, 2403 ps_codec->s_rate_control.pps_frame_time, 2404 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs), 2405 &rc_pic_type, 2406 i4_is_first_frm, 2407 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel], 2408 u1_frame_qp, 2409 &ps_codec->s_rate_control.num_intra_in_prev_frame, 2410 &ps_codec->s_rate_control.i4_avg_activity); 2411 return i4_stuffing_byte; 2412 } 2413 2414 /** 2415 ******************************************************************************* 2416 * 2417 * @brief 2418 * entry point of a spawned encoder thread 2419 * 2420 * @par Description: 2421 * The encoder thread dequeues a proc/entropy job from the encoder queue and 2422 * calls necessary routines. 2423 * 2424 * @param[in] pv_proc 2425 * Process context corresponding to the thread 2426 * 2427 * @returns error status 2428 * 2429 * @remarks 2430 * 2431 ******************************************************************************* 2432 */ 2433 WORD32 ih264e_process_thread(void *pv_proc) 2434 { 2435 /* error status */ 2436 IH264_ERROR_T ret = IH264_SUCCESS; 2437 WORD32 error_status = IH264_SUCCESS; 2438 2439 /* proc ctxt */ 2440 process_ctxt_t *ps_proc = pv_proc; 2441 2442 /* codec ctxt */ 2443 codec_t *ps_codec = ps_proc->ps_codec; 2444 2445 /* structure to represent a processing job entry */ 2446 job_t s_job; 2447 2448 /* blocking call : entropy dequeue is non-blocking till all 2449 * the proc jobs are processed */ 2450 WORD32 is_blocking = 0; 2451 2452 /* set affinity */ 2453 ithread_set_affinity(ps_proc->i4_id); 2454 2455 while(1) 2456 { 2457 /* dequeue a job from the entropy queue */ 2458 { 2459 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex); 2460 2461 /* codec context selector */ 2462 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; 2463 2464 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel]; 2465 2466 /* have the lock */ 2467 if (error == 0) 2468 { 2469 if (*pu4_buf == 0) 2470 { 2471 /* no entropy threads are active, try dequeuing a job from the entropy queue */ 2472 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking); 2473 if (IH264_SUCCESS == ret) 2474 { 2475 *pu4_buf = 1; 2476 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2477 goto WORKER; 2478 } 2479 else if(is_blocking) 2480 { 2481 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2482 break; 2483 } 2484 } 2485 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2486 } 2487 } 2488 2489 /* dequeue a job from the process queue */ 2490 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1); 2491 if (IH264_SUCCESS != ret) 2492 { 2493 if(ps_proc->i4_id) 2494 break; 2495 else 2496 { 2497 is_blocking = 1; 2498 continue; 2499 } 2500 } 2501 2502 WORKER: 2503 /* choose appropriate proc context based on proc_base_idx */ 2504 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx]; 2505 2506 switch (s_job.i4_cmd) 2507 { 2508 case CMD_PROCESS: 2509 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt; 2510 ps_proc->i4_mb_x = s_job.i2_mb_x; 2511 ps_proc->i4_mb_y = s_job.i2_mb_y; 2512 2513 /* init process context */ 2514 ih264e_init_proc_ctxt(ps_proc); 2515 2516 /* core code all mbs enlisted under the current job */ 2517 error_status |= ih264e_process(ps_proc); 2518 break; 2519 2520 case CMD_ENTROPY: 2521 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x; 2522 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y; 2523 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt; 2524 2525 /* init entropy */ 2526 ih264e_init_entropy_ctxt(ps_proc); 2527 2528 /* entropy code all mbs enlisted under the current job */ 2529 error_status |= ih264e_entropy(ps_proc); 2530 break; 2531 2532 default: 2533 error_status |= IH264_FAIL; 2534 break; 2535 } 2536 } 2537 2538 /* send error code */ 2539 ps_proc->i4_error_code = error_status; 2540 return ret; 2541 } 2542