1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ******************************************************************************* 23 * @file 24 * ih264e_process.c 25 * 26 * @brief 27 * Contains functions for codec thread 28 * 29 * @author 30 * Harish 31 * 32 * @par List of Functions: 33 * - ih264e_generate_sps_pps() 34 * - ih264e_init_entropy_ctxt() 35 * - ih264e_entropy() 36 * - ih264e_pack_header_data() 37 * - ih264e_update_proc_ctxt() 38 * - ih264e_init_proc_ctxt() 39 * - ih264e_pad_recon_buffer() 40 * - ih264e_dblk_pad_hpel_processing_n_mbs() 41 * - ih264e_process() 42 * - ih264e_set_rc_pic_params() 43 * - ih264e_update_rc_post_enc() 44 * - ih264e_process_thread() 45 * 46 * @remarks 47 * None 48 * 49 ******************************************************************************* 50 */ 51 52 /*****************************************************************************/ 53 /* File Includes */ 54 /*****************************************************************************/ 55 56 /* System include files */ 57 #include <stdio.h> 58 #include <stddef.h> 59 #include <stdlib.h> 60 #include <string.h> 61 #include <limits.h> 62 #include <assert.h> 63 64 /* User include files */ 65 #include "ih264_typedefs.h" 66 #include "iv2.h" 67 #include "ive2.h" 68 #include "ih264_defs.h" 69 #include "ih264_debug.h" 70 #include "ime_distortion_metrics.h" 71 #include "ime_defs.h" 72 #include "ime_structs.h" 73 #include "ih264_error.h" 74 #include "ih264_structs.h" 75 #include "ih264_trans_quant_itrans_iquant.h" 76 #include "ih264_inter_pred_filters.h" 77 #include "ih264_mem_fns.h" 78 #include "ih264_padding.h" 79 #include "ih264_intra_pred_filters.h" 80 #include "ih264_deblk_edge_filters.h" 81 #include "ih264_cabac_tables.h" 82 #include "ih264_platform_macros.h" 83 #include "ih264_macros.h" 84 #include "ih264_buf_mgr.h" 85 #include "ih264e_error.h" 86 #include "ih264e_bitstream.h" 87 #include "ih264_common_tables.h" 88 #include "ih264_list.h" 89 #include "ih264e_defs.h" 90 #include "irc_cntrl_param.h" 91 #include "irc_frame_info_collector.h" 92 #include "ih264e_rate_control.h" 93 #include "ih264e_cabac_structs.h" 94 #include "ih264e_structs.h" 95 #include "ih264e_cabac.h" 96 #include "ih264e_process.h" 97 #include "ithread.h" 98 #include "ih264e_intra_modes_eval.h" 99 #include "ih264e_encode_header.h" 100 #include "ih264e_globals.h" 101 #include "ih264e_config.h" 102 #include "ih264e_trace.h" 103 #include "ih264e_statistics.h" 104 #include "ih264_cavlc_tables.h" 105 #include "ih264e_cavlc.h" 106 #include "ih264e_deblk.h" 107 #include "ih264e_me.h" 108 #include "ih264e_debug.h" 109 #include "ih264e_master.h" 110 #include "ih264e_utils.h" 111 #include "irc_mem_req_and_acq.h" 112 #include "irc_rate_control_api.h" 113 #include "ih264e_platform_macros.h" 114 #include "ime_statistics.h" 115 116 117 /*****************************************************************************/ 118 /* Function Definitions */ 119 /*****************************************************************************/ 120 121 /** 122 ****************************************************************************** 123 * 124 * @brief This function generates sps, pps set on request 125 * 126 * @par Description 127 * When the encoder is set in header generation mode, the following function 128 * is called. This generates sps and pps headers and returns the control back 129 * to caller. 130 * 131 * @param[in] ps_codec 132 * pointer to codec context 133 * 134 * @return success or failure error code 135 * 136 ****************************************************************************** 137 */ 138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec) 139 { 140 /* choose between ping-pong process buffer set */ 141 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; 142 143 /* entropy ctxt */ 144 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy; 145 146 /* Bitstream structure */ 147 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 148 149 /* sps */ 150 sps_t *ps_sps = NULL; 151 152 /* pps */ 153 pps_t *ps_pps = NULL; 154 155 /* output buff */ 156 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel]; 157 158 159 /********************************************************************/ 160 /* initialize the bit stream buffer */ 161 /********************************************************************/ 162 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize); 163 164 /********************************************************************/ 165 /* BEGIN HEADER GENERATION */ 166 /********************************************************************/ 167 /*ps_codec->i4_pps_id ++;*/ 168 ps_codec->i4_pps_id %= MAX_PPS_CNT; 169 170 /*ps_codec->i4_sps_id ++;*/ 171 ps_codec->i4_sps_id %= MAX_SPS_CNT; 172 173 /* populate sps header */ 174 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id; 175 ih264e_populate_sps(ps_codec, ps_sps); 176 177 /* populate pps header */ 178 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id; 179 ih264e_populate_pps(ps_codec, ps_pps); 180 181 ps_entropy->i4_error_code = IH264E_SUCCESS; 182 183 /* generate sps */ 184 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, &ps_codec->s_vui); 185 186 /* generate pps */ 187 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 188 189 /* queue output buffer */ 190 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset; 191 192 return ps_entropy->i4_error_code; 193 } 194 195 /** 196 ******************************************************************************* 197 * 198 * @brief initialize entropy context. 199 * 200 * @par Description: 201 * Before invoking the call to perform to entropy coding the entropy context 202 * associated with the job needs to be initialized. This involves the start 203 * mb address, end mb address, slice index and the pointer to location at 204 * which the mb residue info and mb header info are packed. 205 * 206 * @param[in] ps_proc 207 * Pointer to the current process context 208 * 209 * @returns error status 210 * 211 * @remarks none 212 * 213 ******************************************************************************* 214 */ 215 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc) 216 { 217 /* codec context */ 218 codec_t *ps_codec = ps_proc->ps_codec; 219 220 /* entropy ctxt */ 221 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 222 223 /* start address */ 224 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x; 225 226 /* end address */ 227 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt; 228 229 /* slice index */ 230 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add]; 231 232 /* sof */ 233 /* @ start of frame or start of a new slice, set sof flag */ 234 if (ps_entropy->i4_mb_start_add == 0) 235 { 236 ps_entropy->i4_sof = 1; 237 } 238 239 if (ps_entropy->i4_mb_x == 0) 240 { 241 /* packed mb coeff data */ 242 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 243 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 244 245 /* packed mb header data */ 246 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 247 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 248 } 249 250 return IH264E_SUCCESS; 251 } 252 253 /** 254 ******************************************************************************* 255 * 256 * @brief entry point for entropy coding 257 * 258 * @par Description 259 * This function calls lower level functions to perform entropy coding for a 260 * group (n rows) of mb's. After encoding 1 row of mb's, the function takes 261 * back the control, updates the ctxt and calls lower level functions again. 262 * This process is repeated till all the rows or group of mb's (which ever is 263 * minimum) are coded 264 * 265 * @param[in] ps_proc 266 * process context 267 * 268 * @returns error status 269 * 270 * @remarks 271 * 272 ******************************************************************************* 273 */ 274 275 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc) 276 { 277 /* codec context */ 278 codec_t *ps_codec = ps_proc->ps_codec; 279 280 /* entropy context */ 281 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy; 282 283 /* cabac context */ 284 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac; 285 286 /* sps */ 287 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT); 288 289 /* pps */ 290 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT); 291 292 /* slice header */ 293 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT); 294 295 /* slice type */ 296 WORD32 i4_slice_type = ps_proc->i4_slice_type; 297 298 /* Bitstream structure */ 299 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm; 300 301 /* output buff */ 302 out_buf_t s_out_buf; 303 304 /* proc map */ 305 UWORD8 *pu1_proc_map; 306 307 /* entropy map */ 308 UWORD8 *pu1_entropy_map_curr; 309 310 /* proc base idx */ 311 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; 312 313 /* temp var */ 314 WORD32 i4_wd_mbs, i4_ht_mbs; 315 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx; 316 WORD32 bitstream_start_offset, bitstream_end_offset; 317 /********************************************************************/ 318 /* BEGIN INIT */ 319 /********************************************************************/ 320 321 /* entropy encode start address */ 322 u4_mb_idx = ps_entropy->i4_mb_start_add; 323 324 /* entropy encode end address */ 325 u4_mb_end_idx = ps_entropy->i4_mb_end_add; 326 327 /* width in mbs */ 328 i4_wd_mbs = ps_entropy->i4_wd_mbs; 329 330 /* height in mbs */ 331 i4_ht_mbs = ps_entropy->i4_ht_mbs; 332 333 /* total mb cnt */ 334 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs; 335 336 /* proc map */ 337 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 338 339 /* entropy map */ 340 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 341 342 /********************************************************************/ 343 /* @ start of frame / slice, */ 344 /* initialize the output buffer, */ 345 /* initialize the bit stream buffer, */ 346 /* check if sps and pps headers have to be generated, */ 347 /* populate and generate slice header */ 348 /********************************************************************/ 349 if (ps_entropy->i4_sof) 350 { 351 /********************************************************************/ 352 /* initialize the output buffer */ 353 /********************************************************************/ 354 s_out_buf = ps_codec->as_out_buf[ctxt_sel]; 355 356 /* is last frame to encode */ 357 s_out_buf.u4_is_last = ps_entropy->u4_is_last; 358 359 /* frame idx */ 360 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high; 361 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low; 362 363 /********************************************************************/ 364 /* initialize the bit stream buffer */ 365 /********************************************************************/ 366 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize); 367 368 /********************************************************************/ 369 /* BEGIN HEADER GENERATION */ 370 /********************************************************************/ 371 if (1 == ps_entropy->i4_gen_header) 372 { 373 /* generate sps */ 374 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps, &ps_codec->s_vui); 375 376 /* generate pps */ 377 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps); 378 379 /* reset i4_gen_header */ 380 ps_entropy->i4_gen_header = 0; 381 } 382 383 /* populate slice header */ 384 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps); 385 386 /* generate slice header */ 387 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr, 388 ps_pps, ps_sps); 389 390 /* once start of frame / slice is done, you can reset it */ 391 /* it is the responsibility of the caller to set this flag */ 392 ps_entropy->i4_sof = 0; 393 394 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 395 { 396 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 397 BITSTREAM_FLUSH(ps_bitstrm); 398 ih264e_init_cabac_ctxt(ps_entropy); 399 } 400 } 401 402 /* begin entropy coding for the mb set */ 403 while (u4_mb_idx < u4_mb_end_idx) 404 { 405 /* init ptrs/indices */ 406 if (ps_entropy->i4_mb_x == i4_wd_mbs) 407 { 408 ps_entropy->i4_mb_y++; 409 ps_entropy->i4_mb_x = 0; 410 411 /* packed mb coeff data */ 412 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) + 413 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data; 414 415 /* packed mb header data */ 416 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) + 417 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data; 418 419 /* proc map */ 420 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs; 421 422 /* entropy map */ 423 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs; 424 } 425 426 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y); 427 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x); 428 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y); 429 430 /* wait until the curr mb is core coded */ 431 /* The wait for curr mb to be core coded is essential when entropy is launched 432 * as a separate job 433 */ 434 while (1) 435 { 436 volatile UWORD8 *pu1_buf1; 437 WORD32 idx = ps_entropy->i4_mb_x; 438 439 pu1_buf1 = pu1_proc_map + idx; 440 if (*pu1_buf1) 441 break; 442 ithread_yield(); 443 } 444 445 446 /* write mb layer */ 447 ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy); 448 /* Starting bitstream offset for header in bits */ 449 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm); 450 451 /* set entropy map */ 452 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1; 453 454 u4_mb_idx++; 455 ps_entropy->i4_mb_x++; 456 /* check for eof */ 457 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 458 { 459 if (ps_entropy->i4_mb_x < i4_wd_mbs) 460 { 461 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 462 } 463 } 464 465 if (ps_entropy->i4_mb_x == i4_wd_mbs) 466 { 467 /* if slices are enabled */ 468 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS) 469 { 470 /* current slice index */ 471 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx; 472 473 /* slice map */ 474 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx; 475 476 /* No need to open a slice at end of frame. The current slice can be closed at the time 477 * of signaling eof flag. 478 */ 479 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx 480 != pu1_slice_idx[u4_mb_idx])) 481 { 482 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 483 { /* mb skip run */ 484 if ((i4_slice_type != ISLICE) 485 && *ps_entropy->pi4_mb_skip_run) 486 { 487 if (*ps_entropy->pi4_mb_skip_run) 488 { 489 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run"); 490 *ps_entropy->pi4_mb_skip_run = 0; 491 } 492 } 493 /* put rbsp trailing bits for the previous slice */ 494 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 495 } 496 else 497 { 498 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 499 } 500 501 /* update slice header pointer */ 502 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx]; 503 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx; 504 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT); 505 506 /* populate slice header */ 507 ps_entropy->i4_mb_start_add = u4_mb_idx; 508 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, 509 ps_sps); 510 511 /* generate slice header */ 512 ps_entropy->i4_error_code |= ih264e_generate_slice_header( 513 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps); 514 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag) 515 { 516 BITSTREAM_BYTE_ALIGN(ps_bitstrm); 517 BITSTREAM_FLUSH(ps_bitstrm); 518 ih264e_init_cabac_ctxt(ps_entropy); 519 } 520 } 521 else 522 { 523 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 524 && u4_mb_idx != u4_mb_cnt) 525 { 526 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 527 } 528 } 529 } 530 /* Dont execute any further instructions until store synchronization took place */ 531 DATA_SYNC(); 532 } 533 534 /* Ending bitstream offset for header in bits */ 535 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm); 536 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] += 537 bitstream_end_offset - bitstream_start_offset; 538 } 539 540 /* check for eof */ 541 if (u4_mb_idx == u4_mb_cnt) 542 { 543 /* set end of frame flag */ 544 ps_entropy->i4_eof = 1; 545 } 546 else 547 { 548 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag 549 && ps_codec->s_cfg.e_slice_mode 550 != IVE_SLICE_MODE_BLOCKS) 551 { 552 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0); 553 } 554 } 555 556 if (ps_entropy->i4_eof) 557 { 558 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag) 559 { 560 /* mb skip run */ 561 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run) 562 { 563 if (*ps_entropy->pi4_mb_skip_run) 564 { 565 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, 566 ps_entropy->i4_error_code, "mb skip run"); 567 *ps_entropy->pi4_mb_skip_run = 0; 568 } 569 } 570 /* put rbsp trailing bits */ 571 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm); 572 } 573 else 574 { 575 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1); 576 } 577 578 /* update current frame stats to rc library */ 579 { 580 /* number of bytes to stuff */ 581 WORD32 i4_stuff_bytes; 582 583 /* update */ 584 i4_stuff_bytes = ih264e_update_rc_post_enc( 585 ps_codec, ctxt_sel, 586 (ps_proc->ps_codec->i4_poc == 0)); 587 588 /* cbr rc - house keeping */ 589 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel]) 590 { 591 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0; 592 } 593 else if (i4_stuff_bytes) 594 { 595 /* add filler nal units */ 596 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes); 597 } 598 } 599 600 /* 601 *Frame number is to be incremented only if the current frame is a 602 * reference frame. After each successful frame encode, we increment 603 * frame number by 1 604 */ 605 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel] 606 && ps_codec->u4_is_curr_frm_ref) 607 { 608 ps_codec->i4_frame_num++; 609 } 610 /********************************************************************/ 611 /* signal the output */ 612 /********************************************************************/ 613 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes = 614 ps_entropy->ps_bitstrm->u4_strm_buf_offset; 615 616 DEBUG("entropy status %x", ps_entropy->i4_error_code); 617 } 618 619 /* allow threads to dequeue entropy jobs */ 620 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0; 621 622 return ps_entropy->i4_error_code; 623 } 624 625 /** 626 ******************************************************************************* 627 * 628 * @brief Packs header information of a mb in to a buffer 629 * 630 * @par Description: 631 * After the deciding the mode info of a macroblock, the syntax elements 632 * associated with the mb are packed and stored. The entropy thread unpacks 633 * this buffer and generates the end bit stream. 634 * 635 * @param[in] ps_proc 636 * Pointer to the current process context 637 * 638 * @returns error status 639 * 640 * @remarks none 641 * 642 ******************************************************************************* 643 */ 644 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc) 645 { 646 /* curr mb type */ 647 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 648 649 /* pack mb syntax layer of curr mb (used for entropy coding) */ 650 if (u4_mb_type == I4x4) 651 { 652 /* pointer to mb header storage space */ 653 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 654 655 /* temp var */ 656 WORD32 i4, byte; 657 658 /* mb type plus mode */ 659 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type; 660 661 /* cbp */ 662 *pu1_ptr++ = ps_proc->u4_cbp; 663 664 /* mb qp delta */ 665 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 666 667 /* sub mb modes */ 668 for (i4 = 0; i4 < 16; i4 ++) 669 { 670 byte = 0; 671 672 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 673 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 674 { 675 byte |= 1; 676 } 677 else 678 { 679 680 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 681 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 682 { 683 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1); 684 } 685 else 686 { 687 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1; 688 } 689 } 690 691 i4++; 692 693 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] == 694 ps_proc->au1_intra_luma_mb_4x4_modes[i4]) 695 { 696 byte |= 16; 697 } 698 else 699 { 700 701 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] < 702 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4]) 703 { 704 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5); 705 } 706 else 707 { 708 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5; 709 } 710 } 711 712 *pu1_ptr++ = byte; 713 } 714 715 /* end of mb layer */ 716 ps_proc->pv_mb_header_data = pu1_ptr; 717 } 718 else if (u4_mb_type == I16x16) 719 { 720 /* pointer to mb header storage space */ 721 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 722 723 /* mb type plus mode */ 724 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type; 725 726 /* cbp */ 727 *pu1_ptr++ = ps_proc->u4_cbp; 728 729 /* mb qp delta */ 730 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 731 732 /* end of mb layer */ 733 ps_proc->pv_mb_header_data = pu1_ptr; 734 } 735 else if (u4_mb_type == P16x16) 736 { 737 /* pointer to mb header storage space */ 738 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 739 740 WORD16 *i2_mv_ptr; 741 742 /* mb type plus mode */ 743 *pu1_ptr++ = u4_mb_type; 744 745 /* cbp */ 746 *pu1_ptr++ = ps_proc->u4_cbp; 747 748 /* mb qp delta */ 749 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 750 751 i2_mv_ptr = (WORD16 *)pu1_ptr; 752 753 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 754 755 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 756 757 /* end of mb layer */ 758 ps_proc->pv_mb_header_data = i2_mv_ptr; 759 } 760 else if (u4_mb_type == PSKIP) 761 { 762 /* pointer to mb header storage space */ 763 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 764 765 /* mb type plus mode */ 766 *pu1_ptr++ = u4_mb_type; 767 768 /* end of mb layer */ 769 ps_proc->pv_mb_header_data = pu1_ptr; 770 } 771 else if(u4_mb_type == B16x16) 772 { 773 774 /* pointer to mb header storage space */ 775 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 776 777 WORD16 *i2_mv_ptr; 778 779 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 780 781 /* mb type plus mode */ 782 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; 783 784 /* cbp */ 785 *pu1_ptr++ = ps_proc->u4_cbp; 786 787 /* mb qp delta */ 788 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 789 790 /* l0 & l1 me data */ 791 i2_mv_ptr = (WORD16 *)pu1_ptr; 792 793 if (u4_pred_mode != PRED_L1) 794 { 795 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx 796 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx; 797 798 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy 799 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy; 800 } 801 if (u4_pred_mode != PRED_L0) 802 { 803 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx 804 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx; 805 806 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy 807 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy; 808 } 809 810 /* end of mb layer */ 811 ps_proc->pv_mb_header_data = i2_mv_ptr; 812 813 } 814 else if(u4_mb_type == BDIRECT) 815 { 816 /* pointer to mb header storage space */ 817 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 818 819 /* mb type plus mode */ 820 *pu1_ptr++ = u4_mb_type; 821 822 /* cbp */ 823 *pu1_ptr++ = ps_proc->u4_cbp; 824 825 /* mb qp delta */ 826 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev; 827 828 ps_proc->pv_mb_header_data = pu1_ptr; 829 830 } 831 else if(u4_mb_type == BSKIP) 832 { 833 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode; 834 835 /* pointer to mb header storage space */ 836 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data; 837 838 /* mb type plus mode */ 839 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type; 840 841 /* end of mb layer */ 842 ps_proc->pv_mb_header_data = pu1_ptr; 843 } 844 845 return IH264E_SUCCESS; 846 } 847 848 /** 849 ******************************************************************************* 850 * 851 * @brief update process context after encoding an mb. This involves preserving 852 * the current mb information for later use, initialize the proc ctxt elements to 853 * encode next mb. 854 * 855 * @par Description: 856 * This function performs house keeping tasks after encoding an mb. 857 * After encoding an mb, various elements of the process context needs to be 858 * updated to encode the next mb. For instance, the source, recon and reference 859 * pointers, mb indices have to be adjusted to the next mb. The slice index of 860 * the current mb needs to be updated. If mb qp modulation is enabled, then if 861 * the qp changes the quant param structure needs to be updated. Also to encoding 862 * the next mb, the current mb info is used as part of mode prediction or mv 863 * prediction. Hence the current mb info has to preserved at top/top left/left 864 * locations. 865 * 866 * @param[in] ps_proc 867 * Pointer to the current process context 868 * 869 * @returns none 870 * 871 * @remarks none 872 * 873 ******************************************************************************* 874 */ 875 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc) 876 { 877 /* error status */ 878 WORD32 error_status = IH264_SUCCESS; 879 880 /* codec context */ 881 codec_t *ps_codec = ps_proc->ps_codec; 882 883 /* curr mb indices */ 884 WORD32 i4_mb_x = ps_proc->i4_mb_x; 885 WORD32 i4_mb_y = ps_proc->i4_mb_y; 886 887 /* mb syntax elements of neighbors */ 888 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele; 889 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x; 890 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele; 891 892 /* curr mb type */ 893 UWORD32 u4_mb_type = ps_proc->u4_mb_type; 894 895 /* curr mb type */ 896 UWORD32 u4_is_intra = ps_proc->u4_is_intra; 897 898 /* width in mbs */ 899 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 900 901 /*height in mbs*/ 902 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs; 903 904 /* proc map */ 905 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs); 906 907 /* deblk context */ 908 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 909 910 /* deblk bs context */ 911 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 912 913 /* top row motion vector info */ 914 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x; 915 916 /* top left mb motion vector */ 917 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu; 918 919 /* left mb motion vector */ 920 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu; 921 922 /* sub mb modes */ 923 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4); 924 925 /*************************************************************/ 926 /* During MV prediction, when top right mb is not available, */ 927 /* top left mb info. is used for prediction. Hence the curr */ 928 /* top, which will be top left for the next mb needs to be */ 929 /* preserved before updating it with curr mb info. */ 930 /*************************************************************/ 931 932 /* mb type, mb class, csbp */ 933 *ps_top_left_syn = *ps_top_syn; 934 935 if (ps_proc->i4_slice_type != ISLICE) 936 { 937 /*****************************************/ 938 /* update top left with top info results */ 939 /*****************************************/ 940 /* mv */ 941 *ps_top_left_mb_pu = *ps_top_row_pu; 942 } 943 944 /*************************************************/ 945 /* update top and left with curr mb info results */ 946 /*************************************************/ 947 948 /* mb type */ 949 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type; 950 951 /* mb class */ 952 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra; 953 954 /* csbp */ 955 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp; 956 957 /* distortion */ 958 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion; 959 960 if (u4_is_intra) 961 { 962 /* mb / sub mb modes */ 963 if (I16x16 == u4_mb_type) 964 { 965 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode; 966 } 967 else if (I4x4 == u4_mb_type) 968 { 969 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 970 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16); 971 } 972 else if (I8x8 == u4_mb_type) 973 { 974 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 975 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4); 976 } 977 978 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE)) 979 { 980 /* mv */ 981 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 982 } 983 984 *ps_proc->pu4_mb_pu_cnt = 1; 985 } 986 else 987 { 988 /* mv */ 989 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu); 990 } 991 992 /* 993 * Mark that the MB has been coded intra 994 * So that future AIRs can skip it 995 */ 996 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra; 997 998 /**************************************************/ 999 /* pack mb header info. for entropy coding */ 1000 /**************************************************/ 1001 ih264e_pack_header_data(ps_proc); 1002 1003 /* update previous mb qp */ 1004 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp; 1005 1006 /* store qp */ 1007 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1008 1009 /* 1010 * We need to sync the cache to make sure that the nmv content of proc 1011 * is updated to cache properly 1012 */ 1013 DATA_SYNC(); 1014 1015 /* Just before finishing the row, enqueue the job in to entropy queue. 1016 * The master thread depending on its convenience shall dequeue it and 1017 * performs entropy. 1018 * 1019 * WARN !! Placing this block post proc map update can cause queuing of 1020 * entropy jobs in out of order. 1021 */ 1022 if (i4_mb_x == i4_wd_mbs - 1) 1023 { 1024 /* job structures */ 1025 job_t s_job; 1026 1027 /* job class */ 1028 s_job.i4_cmd = CMD_ENTROPY; 1029 1030 /* number of mbs to be processed in the current job */ 1031 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs; 1032 1033 /* job start index x */ 1034 s_job.i2_mb_x = 0; 1035 1036 /* job start index y */ 1037 s_job.i2_mb_y = ps_proc->i4_mb_y; 1038 1039 /* proc base idx */ 1040 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0; 1041 1042 /* queue the job */ 1043 error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1); 1044 1045 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1)) 1046 ih264_list_terminate(ps_codec->pv_entropy_jobq); 1047 } 1048 1049 /* update proc map */ 1050 pu1_proc_map[i4_mb_x] = 1; 1051 1052 /**************************************************/ 1053 /* update proc ctxt elements for encoding next mb */ 1054 /**************************************************/ 1055 /* update indices */ 1056 i4_mb_x ++; 1057 ps_proc->i4_mb_x = i4_mb_x; 1058 1059 if (ps_proc->i4_mb_x == i4_wd_mbs) 1060 { 1061 ps_proc->i4_mb_y++; 1062 ps_proc->i4_mb_x = 0; 1063 } 1064 1065 /* update slice index */ 1066 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x]; 1067 1068 /* update buffers pointers */ 1069 ps_proc->pu1_src_buf_luma += MB_SIZE; 1070 ps_proc->pu1_rec_buf_luma += MB_SIZE; 1071 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE; 1072 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE; 1073 1074 /* 1075 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1076 * the stride per MB is MB_SIZE 1077 */ 1078 ps_proc->pu1_src_buf_chroma += MB_SIZE; 1079 ps_proc->pu1_rec_buf_chroma += MB_SIZE; 1080 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE; 1081 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE; 1082 1083 1084 1085 /* Reset cost, distortion params */ 1086 ps_proc->i4_mb_cost = INT_MAX; 1087 ps_proc->i4_mb_distortion = SHRT_MAX; 1088 1089 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt; 1090 1091 ps_proc->pu4_mb_pu_cnt += 1; 1092 1093 /* Update colocated pu */ 1094 if (ps_proc->i4_slice_type == BSLICE) 1095 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x); 1096 1097 /* deblk ctxts */ 1098 if (ps_proc->u4_disable_deblock_level != 1) 1099 { 1100 /* indices */ 1101 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1102 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1103 1104 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */ 1105 ps_deblk->i4_mb_x ++; 1106 1107 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1108 /* 1109 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved, 1110 * the stride per MB is MB_SIZE 1111 */ 1112 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1113 #endif 1114 } 1115 1116 return error_status; 1117 } 1118 1119 /** 1120 ******************************************************************************* 1121 * 1122 * @brief initialize process context. 1123 * 1124 * @par Description: 1125 * Before dispatching the current job to process thread, the process context 1126 * associated with the job is initialized. Usually every job aims to encode one 1127 * row of mb's. Basing on the row indices provided by the job, the process 1128 * context's buffer ptrs, slice indices and other elements that are necessary 1129 * during core-coding are initialized. 1130 * 1131 * @param[in] ps_proc 1132 * Pointer to the current process context 1133 * 1134 * @returns error status 1135 * 1136 * @remarks none 1137 * 1138 ******************************************************************************* 1139 */ 1140 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc) 1141 { 1142 /* codec context */ 1143 codec_t *ps_codec = ps_proc->ps_codec; 1144 1145 /* nmb processing context*/ 1146 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1147 1148 /* indices */ 1149 WORD32 i4_mb_x, i4_mb_y; 1150 1151 /* strides */ 1152 WORD32 i4_src_strd = ps_proc->i4_src_strd; 1153 WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd; 1154 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1155 1156 /* quant params */ 1157 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 1158 1159 /* deblk ctxt */ 1160 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1161 1162 /* deblk bs context */ 1163 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt); 1164 1165 /* Pointer to mv_buffer of current frame */ 1166 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 1167 1168 /* Pointers for color space conversion */ 1169 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base; 1170 1171 /* Pad the MB to support non standard sizes */ 1172 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd; 1173 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht; 1174 UWORD16 u2_num_rows = MB_SIZE; 1175 WORD32 convert_uv_only; 1176 1177 /********************************************************************/ 1178 /* BEGIN INIT */ 1179 /********************************************************************/ 1180 1181 i4_mb_x = ps_proc->i4_mb_x; 1182 i4_mb_y = ps_proc->i4_mb_y; 1183 1184 /* Number of mbs processed in one loop of process function */ 1185 ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs; 1186 ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs; 1187 1188 /* init buffer pointers */ 1189 convert_uv_only = 1; 1190 if (u4_pad_bottom_sz || u4_pad_right_sz || 1191 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE) 1192 { 1193 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1194 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz; 1195 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base; 1196 i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd; 1197 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE); 1198 convert_uv_only = 0; 1199 } 1200 else 1201 { 1202 i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; 1203 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE); 1204 } 1205 1206 1207 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE || 1208 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P || 1209 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || 1210 u4_pad_bottom_sz || u4_pad_right_sz) 1211 { 1212 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) || 1213 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU)) 1214 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base; 1215 1216 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE); 1217 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd; 1218 } 1219 else 1220 { 1221 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; 1222 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE); 1223 } 1224 1225 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1226 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1227 1228 /* Tempral back and forward reference buffer */ 1229 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1230 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1231 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE); 1232 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE); 1233 1234 /* 1235 * Do color space conversion 1236 * NOTE : We assume there that the number of MB's to process will not span multiple rows 1237 */ 1238 switch (ps_codec->s_cfg.e_inp_color_fmt) 1239 { 1240 case IV_YUV_420SP_UV: 1241 case IV_YUV_420SP_VU: 1242 /* In case of 420 semi-planar input, copy last few rows to intermediate 1243 buffer as chroma trans functions access one extra byte due to interleaved input. 1244 This data will be padded if required */ 1245 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz) 1246 { 1247 WORD32 num_rows = MB_SIZE; 1248 UWORD8 *pu1_src; 1249 UWORD8 *pu1_dst; 1250 WORD32 i; 1251 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1252 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1253 1254 pu1_dst = ps_proc->pu1_src_buf_luma; 1255 1256 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */ 1257 if (u4_pad_bottom_sz || u4_pad_right_sz) { 1258 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1259 num_rows = MB_SIZE - u4_pad_bottom_sz; 1260 for (i = 0; i < num_rows; i++) 1261 { 1262 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1263 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0]; 1264 pu1_dst += ps_proc->i4_src_strd; 1265 } 1266 } 1267 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1268 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1269 pu1_dst = ps_proc->pu1_src_buf_chroma; 1270 1271 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte 1272 * due to interleaved input 1273 */ 1274 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1)) 1275 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE); 1276 else 1277 num_rows = BLK8x8SIZE; 1278 for (i = 0; i < num_rows; i++) 1279 { 1280 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd); 1281 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1]; 1282 pu1_dst += ps_proc->i4_src_chroma_strd; 1283 } 1284 1285 } 1286 break; 1287 1288 case IV_YUV_420P : 1289 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) + 1290 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1291 1292 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) + 1293 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE); 1294 1295 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) + 1296 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE); 1297 1298 ps_codec->pf_ih264e_conv_420p_to_420sp( 1299 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base, 1300 ps_proc->pu1_src_buf_luma, 1301 ps_proc->pu1_src_buf_chroma, u2_num_rows, 1302 ps_codec->s_cfg.u4_disp_wd, 1303 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0], 1304 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1], 1305 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2], 1306 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, 1307 convert_uv_only); 1308 break; 1309 1310 case IV_YUV_422ILE : 1311 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2) 1312 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE); 1313 1314 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp( 1315 ps_proc->pu1_src_buf_luma, 1316 ps_proc->pu1_src_buf_chroma, 1317 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base, 1318 ps_codec->s_cfg.u4_disp_wd, u2_num_rows, 1319 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd, 1320 ps_proc->i4_src_chroma_strd, 1321 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1); 1322 break; 1323 1324 default: 1325 break; 1326 } 1327 1328 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0)) 1329 { 1330 UWORD32 u4_pad_wd, u4_pad_ht; 1331 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd); 1332 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd); 1333 u4_pad_ht = MB_SIZE; 1334 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1335 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz)); 1336 1337 ih264_pad_right_luma( 1338 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd, 1339 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd); 1340 1341 ih264_pad_right_chroma( 1342 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd, 1343 ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd); 1344 } 1345 1346 /* pad bottom edge */ 1347 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0) 1348 { 1349 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd, 1350 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz); 1351 1352 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2, 1353 ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2)); 1354 } 1355 1356 1357 /* packed mb coeff data */ 1358 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data; 1359 1360 /* packed mb header data */ 1361 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data; 1362 1363 /* slice index */ 1364 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x]; 1365 1366 /*********************************************************************/ 1367 /* ih264e_init_quant_params() routine is called at the pic init level*/ 1368 /* this would have initialized the qp. */ 1369 /* TODO_LATER: currently it is assumed that quant params donot change*/ 1370 /* across mb's. When they do calculate update ps_qp_params accordingly*/ 1371 /*********************************************************************/ 1372 1373 /* init mv buffer ptr */ 1374 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * 1375 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); 1376 1377 /* Init co-located mv buffer */ 1378 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs * 1379 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); 1380 1381 if (i4_mb_y == 0) 1382 { 1383 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu; 1384 } 1385 else 1386 { 1387 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs * 1388 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE))); 1389 } 1390 1391 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs); 1392 1393 /* mb type */ 1394 ps_proc->u4_mb_type = I16x16; 1395 1396 /* lambda */ 1397 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp]; 1398 1399 /* mb distortion */ 1400 ps_proc->i4_mb_distortion = SHRT_MAX; 1401 1402 if (i4_mb_x == 0) 1403 { 1404 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0; 1405 1406 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0; 1407 1408 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0; 1409 1410 if (i4_mb_y == 0) 1411 { 1412 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t)); 1413 } 1414 } 1415 1416 /* mb cost */ 1417 ps_proc->i4_mb_cost = INT_MAX; 1418 1419 /**********************/ 1420 /* init deblk context */ 1421 /**********************/ 1422 ps_deblk->i4_mb_x = ps_proc->i4_mb_x; 1423 /* deblk lags the current mb proc by 1 row */ 1424 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */ 1425 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */ 1426 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */ 1427 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1; 1428 1429 /* buffer ptrs */ 1430 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE); 1431 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE); 1432 1433 /* init deblk bs context */ 1434 /* mb indices */ 1435 ps_bs->i4_mb_x = ps_proc->i4_mb_x; 1436 ps_bs->i4_mb_y = ps_proc->i4_mb_y; 1437 1438 /* init n_mb_process context */ 1439 ps_n_mb_ctxt->i4_mb_x = 0; 1440 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y; 1441 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy; 1442 1443 return IH264E_SUCCESS; 1444 } 1445 1446 /** 1447 ******************************************************************************* 1448 * 1449 * @brief This function performs luma & chroma padding 1450 * 1451 * @par Description: 1452 * 1453 * @param[in] ps_proc 1454 * Process context corresponding to the job 1455 * 1456 * @param[in] pu1_curr_pic_luma 1457 * Pointer to luma buffer 1458 * 1459 * @param[in] pu1_curr_pic_chroma 1460 * Pointer to chroma buffer 1461 * 1462 * @param[in] i4_mb_x 1463 * mb index x 1464 * 1465 * @param[in] i4_mb_y 1466 * mb index y 1467 * 1468 * @param[in] i4_pad_ht 1469 * number of rows to be padded 1470 * 1471 * @returns error status 1472 * 1473 * @remarks none 1474 * 1475 ******************************************************************************* 1476 */ 1477 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc, 1478 UWORD8 *pu1_curr_pic_luma, 1479 UWORD8 *pu1_curr_pic_chroma, 1480 WORD32 i4_mb_x, 1481 WORD32 i4_mb_y, 1482 WORD32 i4_pad_ht) 1483 { 1484 /* codec context */ 1485 codec_t *ps_codec = ps_proc->ps_codec; 1486 1487 /* strides */ 1488 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1489 1490 if (i4_mb_x == 0) 1491 { 1492 /* padding left luma */ 1493 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT); 1494 1495 /* padding left chroma */ 1496 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT); 1497 } 1498 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1499 { 1500 /* padding right luma */ 1501 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT); 1502 1503 /* padding right chroma */ 1504 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT); 1505 1506 if (i4_mb_y == ps_proc->i4_ht_mbs - 1) 1507 { 1508 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd); 1509 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd); 1510 1511 /* padding bottom luma */ 1512 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT); 1513 1514 /* padding bottom chroma */ 1515 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1516 } 1517 } 1518 1519 if (i4_mb_y == 0) 1520 { 1521 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma; 1522 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma; 1523 WORD32 wd = MB_SIZE; 1524 1525 if (i4_mb_x == 0) 1526 { 1527 pu1_rec_luma -= PAD_LEFT; 1528 pu1_rec_chroma -= PAD_LEFT; 1529 1530 wd += PAD_LEFT; 1531 } 1532 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1533 { 1534 wd += PAD_RIGHT; 1535 } 1536 1537 /* padding top luma */ 1538 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP); 1539 1540 /* padding top chroma */ 1541 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1)); 1542 } 1543 1544 return IH264E_SUCCESS; 1545 } 1546 1547 1548 1549 1550 /** 1551 ******************************************************************************* 1552 * 1553 * @brief This function performs deblocking, padding and halfpel generation for 1554 * 'n' MBs 1555 * 1556 * @par Description: 1557 * 1558 * @param[in] ps_proc 1559 * Process context corresponding to the job 1560 * 1561 * @param[in] pu1_curr_pic_luma 1562 * Current MB being processed(Luma) 1563 * 1564 * @param[in] pu1_curr_pic_chroma 1565 * Current MB being processed(Chroma) 1566 * 1567 * @param[in] i4_mb_x 1568 * Column value of current MB processed 1569 * 1570 * @param[in] i4_mb_y 1571 * Curent row processed 1572 * 1573 * @returns error status 1574 * 1575 * @remarks none 1576 * 1577 ******************************************************************************* 1578 */ 1579 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc, 1580 UWORD8 *pu1_curr_pic_luma, 1581 UWORD8 *pu1_curr_pic_chroma, 1582 WORD32 i4_mb_x, 1583 WORD32 i4_mb_y) 1584 { 1585 /* codec context */ 1586 codec_t *ps_codec = ps_proc->ps_codec; 1587 1588 /* n_mb processing context */ 1589 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt; 1590 1591 /* deblk context */ 1592 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt; 1593 1594 /* strides */ 1595 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 1596 1597 /* loop variables */ 1598 WORD32 row, i, j, col; 1599 1600 /* Padding Width */ 1601 UWORD32 u4_pad_wd; 1602 1603 /* deblk_map of the row being deblocked */ 1604 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs; 1605 1606 /* deblk_map_previous row */ 1607 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs; 1608 1609 WORD32 u4_pad_top = 0; 1610 1611 WORD32 u4_deblk_prev_row = 0; 1612 1613 /* Number of mbs to be processed */ 1614 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs; 1615 1616 /* Number of mbs actually processed 1617 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */ 1618 WORD32 i4_n_mb_process_count = 0; 1619 1620 UWORD8 *pu1_pad_bottom_src = NULL; 1621 1622 UWORD8 *pu1_pad_src_luma = NULL; 1623 UWORD8 *pu1_pad_src_chroma = NULL; 1624 1625 if (ps_proc->u4_disable_deblock_level == 1) 1626 { 1627 /* If left most MB is processed, then pad left */ 1628 if (i4_mb_x == 0) 1629 { 1630 /* padding left luma */ 1631 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1632 1633 /* padding left chroma */ 1634 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1635 } 1636 /*last col*/ 1637 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1)) 1638 { 1639 /* padding right luma */ 1640 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1641 1642 /* padding right chroma */ 1643 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1644 } 1645 } 1646 1647 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1))) 1648 { 1649 /* if number of mb's to be processed are less than 'N', go back. 1650 * exception to the above clause is end of row */ 1651 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) ) 1652 { 1653 return IH264E_SUCCESS; 1654 } 1655 else 1656 { 1657 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs); 1658 1659 /* performing deblocking for required number of MBs */ 1660 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1)) 1661 { 1662 u4_deblk_prev_row = 1; 1663 1664 /* checking whether the top rows are deblocked */ 1665 for (col = 0; col < i4_n_mb_process_count; col++) 1666 { 1667 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col]; 1668 } 1669 1670 /* checking whether the top right MB is deblocked */ 1671 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs) 1672 { 1673 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count]; 1674 } 1675 1676 /* Top or Top right MBs not deblocked */ 1677 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0)) 1678 { 1679 return IH264E_SUCCESS; 1680 } 1681 1682 for (row = 0; row < i4_n_mb_process_count; row++) 1683 { 1684 ih264e_deblock_mb(ps_proc, ps_deblk); 1685 1686 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1687 1688 if (ps_deblk->i4_mb_y > 0) 1689 { 1690 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/ 1691 { 1692 /* padding left luma */ 1693 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT); 1694 1695 /* padding left chroma */ 1696 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT); 1697 } 1698 1699 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/ 1700 { 1701 /* padding right luma */ 1702 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1703 1704 /* padding right chroma */ 1705 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT); 1706 } 1707 } 1708 ps_deblk->i4_mb_x++; 1709 1710 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1711 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1712 1713 } 1714 } 1715 else if(i4_mb_y > 0) 1716 { 1717 ps_deblk->i4_mb_x += i4_n_mb_process_count; 1718 1719 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE; 1720 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE; 1721 } 1722 1723 if (i4_mb_y == 2) 1724 { 1725 u4_pad_wd = i4_n_mb_process_count * MB_SIZE; 1726 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE; 1727 1728 if (ps_n_mb_ctxt->i4_mb_x == 0) 1729 { 1730 u4_pad_wd += PAD_LEFT; 1731 u4_pad_top = -PAD_LEFT; 1732 } 1733 1734 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1735 { 1736 u4_pad_wd += PAD_RIGHT; 1737 } 1738 1739 /* padding top luma */ 1740 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP); 1741 1742 /* padding top chroma */ 1743 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1)); 1744 } 1745 1746 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count; 1747 1748 if (i4_mb_x == ps_proc->i4_wd_mbs - 1) 1749 { 1750 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) 1751 { 1752 /* Bottom Padding is done in one stretch for the entire width */ 1753 if (ps_proc->u4_disable_deblock_level != 1) 1754 { 1755 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE; 1756 1757 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE; 1758 1759 ps_n_mb_ctxt->i4_mb_x = 0; 1760 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y; 1761 ps_deblk->i4_mb_x = 0; 1762 ps_deblk->i4_mb_y = ps_proc->i4_mb_y; 1763 1764 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */ 1765 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp; 1766 1767 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs; 1768 1769 j = (ps_proc->i4_wd_mbs) / i4_n_mbs; 1770 1771 for (i = 0; i < j; i++) 1772 { 1773 for (col = 0; col < i4_n_mbs; col++) 1774 { 1775 ih264e_deblock_mb(ps_proc, ps_deblk); 1776 1777 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1778 1779 ps_deblk->i4_mb_x++; 1780 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1781 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1782 ps_n_mb_ctxt->i4_mb_x++; 1783 } 1784 } 1785 1786 for (col = 0; col < i4_n_mb_process_count; col++) 1787 { 1788 ih264e_deblock_mb(ps_proc, ps_deblk); 1789 1790 pu1_deblk_map[ps_deblk->i4_mb_x] = 1; 1791 1792 ps_deblk->i4_mb_x++; 1793 ps_deblk->pu1_cur_pic_luma += MB_SIZE; 1794 ps_deblk->pu1_cur_pic_chroma += MB_SIZE; 1795 ps_n_mb_ctxt->i4_mb_x++; 1796 } 1797 1798 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd; 1799 1800 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd; 1801 1802 /* padding left luma */ 1803 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1804 1805 /* padding left chroma */ 1806 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1807 1808 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1809 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1810 1811 /* padding left luma */ 1812 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT); 1813 1814 /* padding left chroma */ 1815 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT); 1816 1817 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1818 1819 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE; 1820 1821 /* padding right luma */ 1822 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1823 1824 /* padding right chroma */ 1825 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1826 1827 pu1_pad_src_luma += i4_rec_strd * MB_SIZE; 1828 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE; 1829 1830 /* padding right luma */ 1831 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT); 1832 1833 /* padding right chroma */ 1834 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT); 1835 1836 } 1837 1838 /* In case height is less than 2 MBs pad top */ 1839 if (ps_proc->i4_ht_mbs <= 2) 1840 { 1841 UWORD8 *pu1_pad_top_src; 1842 /* padding top luma */ 1843 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT; 1844 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP); 1845 1846 /* padding top chroma */ 1847 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT; 1848 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1)); 1849 } 1850 1851 /* padding bottom luma */ 1852 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT; 1853 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT); 1854 1855 /* padding bottom chroma */ 1856 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT; 1857 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1)); 1858 } 1859 } 1860 } 1861 } 1862 1863 return IH264E_SUCCESS; 1864 } 1865 1866 1867 /** 1868 ******************************************************************************* 1869 * 1870 * @brief This function performs luma & chroma core coding for a set of mb's. 1871 * 1872 * @par Description: 1873 * The mb to be coded is taken and is evaluated over a predefined set of modes 1874 * (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost 1875 * is selected and using intra/inter prediction filters, prediction is carried out. 1876 * The deviation between src and pred signal constitutes error signal. This error 1877 * signal is transformed (hierarchical transform if necessary) and quantized. The 1878 * quantized residue is packed in to entropy buffer for entropy coding. This is 1879 * repeated for all the mb's enlisted under the job. 1880 * 1881 * @param[in] ps_proc 1882 * Process context corresponding to the job 1883 * 1884 * @returns error status 1885 * 1886 * @remarks none 1887 * 1888 ******************************************************************************* 1889 */ 1890 WORD32 ih264e_process(process_ctxt_t *ps_proc) 1891 { 1892 /* error status */ 1893 WORD32 error_status = IH264_SUCCESS; 1894 1895 /* codec context */ 1896 codec_t *ps_codec = ps_proc->ps_codec; 1897 1898 /* cbp luma, chroma */ 1899 UWORD32 u4_cbp_l, u4_cbp_c; 1900 1901 /* width in mbs */ 1902 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs; 1903 1904 /* loop var */ 1905 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt; 1906 1907 /* valid modes */ 1908 UWORD32 u4_valid_modes = 0; 1909 1910 /* gate threshold */ 1911 WORD32 i4_gate_threshold = 0; 1912 1913 /* is intra */ 1914 WORD32 luma_idx, chroma_idx, is_intra; 1915 1916 /* temp variables */ 1917 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS; 1918 1919 /* 1920 * list of modes for evaluation 1921 * ------------------------------------------------------------------------- 1922 * Note on enabling I4x4 and I16x16 1923 * At very low QP's the hadamard transform in I16x16 will push up the maximum 1924 * coeff value very high. CAVLC may not be able to represent the value and 1925 * hence the stream may not be decodable in some clips. 1926 * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset. 1927 */ 1928 if (ps_proc->i4_slice_type == ISLICE) 1929 { 1930 if (ps_proc->u4_frame_qp > 10) 1931 { 1932 /* enable intra 16x16 */ 1933 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1934 1935 /* enable intra 8x8 */ 1936 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0; 1937 } 1938 1939 /* enable intra 4x4 */ 1940 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1941 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; 1942 1943 } 1944 else if (ps_proc->i4_slice_type == PSLICE) 1945 { 1946 if (ps_proc->u4_frame_qp > 10) 1947 { 1948 /* enable intra 16x16 */ 1949 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1950 } 1951 1952 /* enable intra 4x4 */ 1953 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1954 { 1955 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1956 } 1957 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; 1958 1959 /* enable inter P16x16 */ 1960 u4_valid_modes |= (1 << P16x16); 1961 } 1962 else if (ps_proc->i4_slice_type == BSLICE) 1963 { 1964 if (ps_proc->u4_frame_qp > 10) 1965 { 1966 /* enable intra 16x16 */ 1967 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0; 1968 } 1969 1970 /* enable intra 4x4 */ 1971 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 1972 { 1973 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0; 1974 } 1975 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4; 1976 1977 /* enable inter B16x16 */ 1978 u4_valid_modes |= (1 << B16x16); 1979 } 1980 1981 1982 /* init entropy */ 1983 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x; 1984 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y; 1985 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x); 1986 1987 /* compute recon when : 1988 * 1. current frame is to be used as a reference 1989 * 2. dump recon for bit stream sanity check 1990 */ 1991 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref || 1992 ps_codec->s_cfg.u4_enable_recon; 1993 1994 /* Encode 'n' macroblocks, 1995 * 'n' being the number of mbs dictated by current proc ctxt */ 1996 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++) 1997 { 1998 /* since we have not yet found sad, we have not yet got min sad */ 1999 /* we need to initialize these variables for each MB */ 2000 /* TODO how to get the min sad into the codec */ 2001 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad; 2002 ps_proc->u4_min_sad_reached = 0; 2003 2004 /* mb analysis */ 2005 { 2006 /* temp var */ 2007 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs; 2008 2009 /* force intra refresh ? */ 2010 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) || 2011 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt); 2012 2013 /* evaluate inter 16x16 modes */ 2014 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16))) 2015 { 2016 /* compute nmb me */ 2017 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0) 2018 { 2019 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me, 2020 i4_wd_mbs - ps_proc->i4_mb_x)); 2021 } 2022 2023 /* set pointers to ME data appropriately for other modules to use */ 2024 { 2025 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ; 2026 2027 /* get the min sad condition for current mb */ 2028 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 2029 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 2030 2031 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]); 2032 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl); 2033 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]); 2034 2035 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion; 2036 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost; 2037 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad; 2038 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached; 2039 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type; 2040 2041 /* get the best sub pel buffer */ 2042 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf; 2043 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd; 2044 } 2045 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2046 } 2047 else 2048 { 2049 /* Derive neighbor availability for the current macroblock */ 2050 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl; 2051 2052 ih264e_derive_nghbr_avbl_of_mbs(ps_proc); 2053 } 2054 2055 /* 2056 * If air says intra, we need to force the following code path to evaluate intra 2057 * The easy way is just to say that the inter cost is too much 2058 */ 2059 if (!i4_air_enable_inter) 2060 { 2061 ps_proc->u4_min_sad_reached = 0; 2062 ps_proc->i4_mb_cost = INT_MAX; 2063 ps_proc->i4_mb_distortion = INT_MAX; 2064 } 2065 else if (ps_proc->u4_mb_type == PSKIP) 2066 { 2067 goto UPDATE_MB_INFO; 2068 } 2069 2070 /* wait until the proc of [top + 1] mb is computed. 2071 * We wait till the proc dependencies are satisfied */ 2072 if(ps_proc->i4_mb_y > 0) 2073 { 2074 /* proc map */ 2075 UWORD8 *pu1_proc_map_top; 2076 2077 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs); 2078 2079 while (1) 2080 { 2081 volatile UWORD8 *pu1_buf; 2082 WORD32 idx = i4_mb_idx + 1; 2083 2084 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1)); 2085 pu1_buf = pu1_proc_map_top + idx; 2086 if(*pu1_buf) 2087 break; 2088 ithread_yield(); 2089 } 2090 } 2091 2092 /* If we already have the minimum sad, there is no point in searching for sad again */ 2093 if (ps_proc->u4_min_sad_reached == 0) 2094 { 2095 /* intra gating in inter slices */ 2096 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/ 2097 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate) 2098 { 2099 /* distortion of neighboring blocks */ 2100 WORD32 i4_distortion[4]; 2101 2102 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion; 2103 2104 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion; 2105 2106 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion; 2107 2108 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion; 2109 2110 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2; 2111 2112 } 2113 2114 2115 /* If we are going to force intra we need to evaluate intra irrespective of gating */ 2116 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion)) 2117 { 2118 /* evaluate intra 4x4 modes */ 2119 if (u4_valid_modes & (1 << I4x4)) 2120 { 2121 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST) 2122 { 2123 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc); 2124 } 2125 else 2126 { 2127 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc); 2128 } 2129 } 2130 2131 /* evaluate intra 16x16 modes */ 2132 if (u4_valid_modes & (1 << I16x16)) 2133 { 2134 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc); 2135 } 2136 2137 /* evaluate intra 8x8 modes */ 2138 if (u4_valid_modes & (1 << I8x8)) 2139 { 2140 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2141 } 2142 2143 } 2144 } 2145 } 2146 2147 /* is intra */ 2148 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8) 2149 { 2150 luma_idx = ps_proc->u4_mb_type; 2151 chroma_idx = 0; 2152 is_intra = 1; 2153 2154 /* evaluate chroma blocks for intra */ 2155 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc); 2156 } 2157 else 2158 { 2159 luma_idx = 3; 2160 chroma_idx = 1; 2161 is_intra = 0; 2162 } 2163 ps_proc->u4_is_intra = is_intra; 2164 ps_proc->ps_pu->b1_intra_flag = is_intra; 2165 2166 /* redo MV pred of neighbors in the case intra mb */ 2167 /* TODO : currently called unconditionally, needs to be called only in the case of intra 2168 * to modify neighbors */ 2169 if (ps_proc->i4_slice_type != ISLICE) 2170 { 2171 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type); 2172 } 2173 2174 /* Perform luma mb core coding */ 2175 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc); 2176 2177 /* Perform luma mb core coding */ 2178 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc); 2179 2180 /* coded block pattern */ 2181 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l; 2182 2183 if (!ps_proc->u4_is_intra) 2184 { 2185 if (ps_proc->i4_slice_type == BSLICE) 2186 { 2187 if (ih264e_find_bskip_params(ps_proc, PRED_L0)) 2188 { 2189 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP; 2190 } 2191 } 2192 else if(!ps_proc->u4_cbp) 2193 { 2194 if (ih264e_find_pskip_params(ps_proc, PRED_L0)) 2195 { 2196 ps_proc->u4_mb_type = PSKIP; 2197 } 2198 } 2199 } 2200 2201 UPDATE_MB_INFO: 2202 2203 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */ 2204 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc); 2205 2206 /**********************************************************************/ 2207 /* if disable deblock level is '0' this implies enable deblocking for */ 2208 /* all edges of all macroblocks with out any restrictions */ 2209 /* */ 2210 /* if disable deblock level is '1' this implies disable deblocking for*/ 2211 /* all edges of all macroblocks with out any restrictions */ 2212 /* */ 2213 /* if disable deblock level is '2' this implies enable deblocking for */ 2214 /* all edges of all macroblocks except edges overlapping with slice */ 2215 /* boundaries. This option is not currently supported by the encoder */ 2216 /* hence the slice map should be of no significance to perform debloc */ 2217 /* king */ 2218 /**********************************************************************/ 2219 2220 if (ps_proc->u4_compute_recon) 2221 { 2222 /* deblk context */ 2223 /* src pointers */ 2224 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma; 2225 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma; 2226 2227 /* src indices */ 2228 UWORD32 i4_mb_x = ps_proc->i4_mb_x; 2229 UWORD32 i4_mb_y = ps_proc->i4_mb_y; 2230 2231 /* compute blocking strength */ 2232 if (ps_proc->u4_disable_deblock_level != 1) 2233 { 2234 ih264e_compute_bs(ps_proc); 2235 } 2236 2237 /* nmb deblocking and hpel and padding */ 2238 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma, 2239 pu1_cur_pic_chroma, i4_mb_x, 2240 i4_mb_y); 2241 } 2242 2243 /* update the context after for coding next mb */ 2244 error_status |= ih264e_update_proc_ctxt(ps_proc); 2245 2246 /* Once the last row is processed, mark the buffer status appropriately */ 2247 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y) 2248 { 2249 /* Pointer to current picture buffer structure */ 2250 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic; 2251 2252 /* Pointer to current picture's mv buffer structure */ 2253 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf; 2254 2255 /**********************************************************************/ 2256 /* if disable deblock level is '0' this implies enable deblocking for */ 2257 /* all edges of all macroblocks with out any restrictions */ 2258 /* */ 2259 /* if disable deblock level is '1' this implies disable deblocking for*/ 2260 /* all edges of all macroblocks with out any restrictions */ 2261 /* */ 2262 /* if disable deblock level is '2' this implies enable deblocking for */ 2263 /* all edges of all macroblocks except edges overlapping with slice */ 2264 /* boundaries. This option is not currently supported by the encoder */ 2265 /* hence the slice map should be of no significance to perform debloc */ 2266 /* king */ 2267 /**********************************************************************/ 2268 error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC); 2269 2270 error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC); 2271 2272 if (ps_codec->s_cfg.u4_enable_recon) 2273 { 2274 /* pic cnt */ 2275 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt; 2276 2277 /* rec buffers */ 2278 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic; 2279 2280 /* is last? */ 2281 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last; 2282 2283 /* frame time stamp */ 2284 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high; 2285 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low; 2286 } 2287 2288 } 2289 } 2290 2291 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y); 2292 2293 return error_status; 2294 } 2295 2296 /** 2297 ******************************************************************************* 2298 * 2299 * @brief 2300 * Function to update rc context after encoding 2301 * 2302 * @par Description 2303 * This function updates the rate control context after the frame is encoded. 2304 * Number of bits consumed by the current frame, frame distortion, frame cost, 2305 * number of intra/inter mb's, ... are passed on to rate control context for 2306 * updating the rc model. 2307 * 2308 * @param[in] ps_codec 2309 * Handle to codec context 2310 * 2311 * @param[in] ctxt_sel 2312 * frame context selector 2313 * 2314 * @param[in] pic_cnt 2315 * pic count 2316 * 2317 * @returns i4_stuffing_byte 2318 * number of stuffing bytes (if necessary) 2319 * 2320 * @remarks 2321 * 2322 ******************************************************************************* 2323 */ 2324 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm) 2325 { 2326 /* proc set base idx */ 2327 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0; 2328 2329 /* proc ctxt */ 2330 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base]; 2331 2332 /* frame qp */ 2333 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp; 2334 2335 /* cbr rc return status */ 2336 WORD32 i4_stuffing_byte = 0; 2337 2338 /* current frame stats */ 2339 frame_info_t s_frame_info; 2340 picture_type_e rc_pic_type; 2341 2342 /* temp var */ 2343 WORD32 i, j; 2344 2345 /********************************************************************/ 2346 /* BEGIN INIT */ 2347 /********************************************************************/ 2348 2349 /* init frame info */ 2350 irc_init_frame_info(&s_frame_info); 2351 2352 /* get frame info */ 2353 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++) 2354 { 2355 /*****************************************************************/ 2356 /* One frame can be encoded by max of u4_num_cores threads */ 2357 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */ 2358 /* u4_num_cores threads */ 2359 /*****************************************************************/ 2360 for (j = 0; j< MAX_MB_TYPE; j++) 2361 { 2362 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j]; 2363 2364 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j]; 2365 2366 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j]; 2367 } 2368 2369 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum; 2370 2371 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum; 2372 2373 /*****************************************************************/ 2374 /* gather number of residue and header bits consumed by the frame*/ 2375 /*****************************************************************/ 2376 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy); 2377 } 2378 2379 /* get pic type */ 2380 switch (ps_codec->pic_type) 2381 { 2382 case PIC_I: 2383 case PIC_IDR: 2384 rc_pic_type = I_PIC; 2385 break; 2386 case PIC_P: 2387 rc_pic_type = P_PIC; 2388 break; 2389 case PIC_B: 2390 rc_pic_type = B_PIC; 2391 break; 2392 default: 2393 assert(0); 2394 break; 2395 } 2396 2397 /* update rc lib with current frame stats */ 2398 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api, 2399 &(s_frame_info), 2400 ps_codec->s_rate_control.pps_pd_frm_rate, 2401 ps_codec->s_rate_control.pps_time_stamp, 2402 ps_codec->s_rate_control.pps_frame_time, 2403 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs), 2404 &rc_pic_type, 2405 i4_is_first_frm, 2406 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel], 2407 u1_frame_qp, 2408 &ps_codec->s_rate_control.num_intra_in_prev_frame, 2409 &ps_codec->s_rate_control.i4_avg_activity); 2410 return i4_stuffing_byte; 2411 } 2412 2413 /** 2414 ******************************************************************************* 2415 * 2416 * @brief 2417 * entry point of a spawned encoder thread 2418 * 2419 * @par Description: 2420 * The encoder thread dequeues a proc/entropy job from the encoder queue and 2421 * calls necessary routines. 2422 * 2423 * @param[in] pv_proc 2424 * Process context corresponding to the thread 2425 * 2426 * @returns error status 2427 * 2428 * @remarks 2429 * 2430 ******************************************************************************* 2431 */ 2432 WORD32 ih264e_process_thread(void *pv_proc) 2433 { 2434 /* error status */ 2435 IH264_ERROR_T ret = IH264_SUCCESS; 2436 WORD32 error_status = IH264_SUCCESS; 2437 2438 /* proc ctxt */ 2439 process_ctxt_t *ps_proc = pv_proc; 2440 2441 /* codec ctxt */ 2442 codec_t *ps_codec = ps_proc->ps_codec; 2443 2444 /* structure to represent a processing job entry */ 2445 job_t s_job; 2446 2447 /* blocking call : entropy dequeue is non-blocking till all 2448 * the proc jobs are processed */ 2449 WORD32 is_blocking = 0; 2450 2451 /* set affinity */ 2452 ithread_set_affinity(ps_proc->i4_id); 2453 2454 while(1) 2455 { 2456 /* dequeue a job from the entropy queue */ 2457 { 2458 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex); 2459 2460 /* codec context selector */ 2461 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS; 2462 2463 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel]; 2464 2465 /* have the lock */ 2466 if (error == 0) 2467 { 2468 if (*pu4_buf == 0) 2469 { 2470 /* no entropy threads are active, try dequeuing a job from the entropy queue */ 2471 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking); 2472 if (IH264_SUCCESS == ret) 2473 { 2474 *pu4_buf = 1; 2475 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2476 goto WORKER; 2477 } 2478 else if(is_blocking) 2479 { 2480 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2481 break; 2482 } 2483 } 2484 ithread_mutex_unlock(ps_codec->pv_entropy_mutex); 2485 } 2486 } 2487 2488 /* dequeue a job from the process queue */ 2489 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1); 2490 if (IH264_SUCCESS != ret) 2491 { 2492 if(ps_proc->i4_id) 2493 break; 2494 else 2495 { 2496 is_blocking = 1; 2497 continue; 2498 } 2499 } 2500 2501 WORKER: 2502 /* choose appropriate proc context based on proc_base_idx */ 2503 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx]; 2504 2505 switch (s_job.i4_cmd) 2506 { 2507 case CMD_PROCESS: 2508 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt; 2509 ps_proc->i4_mb_x = s_job.i2_mb_x; 2510 ps_proc->i4_mb_y = s_job.i2_mb_y; 2511 2512 /* init process context */ 2513 ih264e_init_proc_ctxt(ps_proc); 2514 2515 /* core code all mbs enlisted under the current job */ 2516 error_status |= ih264e_process(ps_proc); 2517 break; 2518 2519 case CMD_ENTROPY: 2520 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x; 2521 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y; 2522 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt; 2523 2524 /* init entropy */ 2525 ih264e_init_entropy_ctxt(ps_proc); 2526 2527 /* entropy code all mbs enlisted under the current job */ 2528 error_status |= ih264e_entropy(ps_proc); 2529 break; 2530 2531 default: 2532 error_status |= IH264_FAIL; 2533 break; 2534 } 2535 } 2536 2537 /* send error code */ 2538 ps_proc->i4_error_code = error_status; 2539 return ret; 2540 } 2541