1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ******************************************************************************* 23 * @file 24 * ih264e_intra_modes_eval.c 25 * 26 * @brief 27 * This file contains definitions of routines that perform rate distortion 28 * analysis on a macroblock if they are to be coded as intra. 29 * 30 * @author 31 * ittiam 32 * 33 * @par List of Functions: 34 * - ih264e_derive_neighbor_availability_of_mbs() 35 * - ih264e_derive_ngbr_avbl_of_mb_partitions() 36 * - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff() 37 * - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff() 38 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff() 39 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton() 40 * - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff() 41 * - ih264e_evaluate_intra16x16_modes() 42 * - ih264e_evaluate_intra4x4_modes() 43 * - ih264e_evaluate_intra_chroma_modes() 44 * 45 * @remarks 46 * None 47 * 48 ******************************************************************************* 49 */ 50 51 /*****************************************************************************/ 52 /* File Includes */ 53 /*****************************************************************************/ 54 55 /* System include files */ 56 #include <stdio.h> 57 #include <string.h> 58 #include <limits.h> 59 #include <assert.h> 60 61 /* User include files */ 62 #include "ih264e_config.h" 63 #include "ih264_typedefs.h" 64 #include "ih264e_defs.h" 65 #include "iv2.h" 66 #include "ive2.h" 67 #include "ih264_debug.h" 68 #include "ih264_defs.h" 69 #include "ih264_macros.h" 70 #include "ih264_intra_pred_filters.h" 71 #include "ih264_structs.h" 72 #include "ih264_common_tables.h" 73 #include "ih264_trans_quant_itrans_iquant.h" 74 #include "ih264_inter_pred_filters.h" 75 #include "ih264_mem_fns.h" 76 #include "ih264_padding.h" 77 #include "ih264_deblk_edge_filters.h" 78 #include "ih264_cabac_tables.h" 79 #include "ime_distortion_metrics.h" 80 #include "ih264e_error.h" 81 #include "ih264e_bitstream.h" 82 #include "ime_defs.h" 83 #include "ime_structs.h" 84 #include "irc_cntrl_param.h" 85 #include "irc_frame_info_collector.h" 86 #include "ih264e_rate_control.h" 87 #include "ih264e_cabac_structs.h" 88 #include "ih264e_structs.h" 89 #include "ih264e_intra_modes_eval.h" 90 #include "ih264e_globals.h" 91 #include "ime_platform_macros.h" 92 93 94 /*****************************************************************************/ 95 /* Function Definitions */ 96 /*****************************************************************************/ 97 98 /** 99 ****************************************************************************** 100 * 101 * @brief 102 * derivation process for macroblock availability 103 * 104 * @par Description 105 * Calculates the availability of the left, top, topright and topleft macroblocks. 106 * 107 * @param[in] ps_proc_ctxt 108 * pointer to proc context (handle) 109 * 110 * @remarks Based on section 6.4.5 in H264 spec 111 * 112 * @return none 113 * 114 ****************************************************************************** 115 */ 116 void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc) 117 { 118 UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx; 119 UWORD8 *pu1_slice_idx_b; 120 UWORD8 *pu1_slice_idx_a; 121 UWORD8 *pu1_slice_idx_c; 122 UWORD8 *pu1_slice_idx_d; 123 block_neighbors_t *ps_ngbr_avbl; 124 WORD32 i4_mb_x, i4_mb_y; 125 WORD32 i4_wd_mbs; 126 127 i4_mb_x = ps_proc->i4_mb_x; 128 i4_mb_y = ps_proc->i4_mb_y; 129 130 i4_wd_mbs = ps_proc->i4_wd_mbs; 131 132 pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x; 133 pu1_slice_idx_a = pu1_slice_idx_curr - 1; 134 pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs; 135 pu1_slice_idx_c = pu1_slice_idx_b + 1; 136 pu1_slice_idx_d = pu1_slice_idx_b - 1; 137 ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; 138 139 /**********************************************************************/ 140 /* The macroblock is marked as available, unless one of the following */ 141 /* conditions is true in which case the macroblock shall be marked as */ 142 /* not available. */ 143 /* 1. mbAddr < 0 */ 144 /* 2 mbAddr > CurrMbAddr */ 145 /* 3. the macroblock with address mbAddr belongs to a different slice */ 146 /* than the macroblock with address CurrMbAddr */ 147 /**********************************************************************/ 148 149 /* left macroblock availability */ 150 if (i4_mb_x == 0) 151 { /* macroblocks along first column */ 152 ps_ngbr_avbl->u1_mb_a = 0; 153 } 154 else 155 { /* macroblocks belong to same slice? */ 156 if (*pu1_slice_idx_a != *pu1_slice_idx_curr) 157 ps_ngbr_avbl->u1_mb_a = 0; 158 else 159 ps_ngbr_avbl->u1_mb_a = 1; 160 } 161 162 /* top macroblock availability */ 163 if (i4_mb_y == 0) 164 { /* macroblocks along first row */ 165 ps_ngbr_avbl->u1_mb_b = 0; 166 } 167 else 168 { /* macroblocks belong to same slice? */ 169 if (*pu1_slice_idx_b != *pu1_slice_idx_curr) 170 ps_ngbr_avbl->u1_mb_b = 0; 171 else 172 ps_ngbr_avbl->u1_mb_b = 1; 173 } 174 175 /* top right macroblock availability */ 176 if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0) 177 { /* macroblocks along last column */ 178 ps_ngbr_avbl->u1_mb_c = 0; 179 } 180 else 181 { /* macroblocks belong to same slice? */ 182 if (*pu1_slice_idx_c != *pu1_slice_idx_curr) 183 ps_ngbr_avbl->u1_mb_c = 0; 184 else 185 ps_ngbr_avbl->u1_mb_c = 1; 186 } 187 188 /* top left macroblock availability */ 189 if (i4_mb_x == 0 || i4_mb_y == 0) 190 { /* macroblocks along first column */ 191 ps_ngbr_avbl->u1_mb_d = 0; 192 } 193 else 194 { /* macroblocks belong to same slice? */ 195 if (*pu1_slice_idx_d != *pu1_slice_idx_curr) 196 ps_ngbr_avbl->u1_mb_d = 0; 197 else 198 ps_ngbr_avbl->u1_mb_d = 1; 199 } 200 } 201 202 /** 203 ****************************************************************************** 204 * 205 * @brief 206 * derivation process for subblock/partition availability 207 * 208 * @par Description 209 * Calculates the availability of the left, top, topright and topleft subblock 210 * or partitions. 211 * 212 * @param[in] ps_proc_ctxt 213 * pointer to macroblock context (handle) 214 * 215 * @param[in] i1_pel_pos_x 216 * column position of the pel wrt the current block 217 * 218 * @param[in] i1_pel_pos_y 219 * row position of the pel in wrt current block 220 * 221 * @remarks Assumptions: before calling this function it is assumed that 222 * the neighbor availability of the current macroblock is already derived. 223 * Based on table 6-3 of H264 specification 224 * 225 * @return availability status (yes or no) 226 * 227 ****************************************************************************** 228 */ 229 UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, 230 WORD8 i1_pel_pos_x, 231 WORD8 i1_pel_pos_y) 232 { 233 UWORD8 u1_neighbor_avail=0; 234 235 /**********************************************************************/ 236 /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */ 237 /* various columns of a macroblock */ 238 /* */ 239 /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */ 240 /* various rows of a macroblock */ 241 /* */ 242 /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */ 243 /* outside the bound of an mb ie., represents its neighbors. */ 244 /**********************************************************************/ 245 if (i1_pel_pos_x < 0) 246 { /* column(-1) */ 247 if (i1_pel_pos_y < 0) 248 { /* row(-1) */ 249 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */ 250 } 251 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) 252 { /* all rows of a macroblock */ 253 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */ 254 } 255 else /* if (i1_pel_pos_y >= 16) */ 256 { /* rows(+16) */ 257 u1_neighbor_avail = 0; /* current mb bottom left availability */ 258 } 259 } 260 else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16) 261 { /* all columns of a macroblock */ 262 if (i1_pel_pos_y < 0) 263 { /* row(-1) */ 264 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */ 265 } 266 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) 267 { /* all rows of a macroblock */ 268 u1_neighbor_avail = 1; /* current mb availability */ 269 /* availability of the partition is dependent on the position of the partition inside the mb */ 270 /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */ 271 } 272 else /* if (i1_pel_pos_y >= 16) */ 273 { /* rows(+16) */ 274 u1_neighbor_avail = 0; /* current mb bottom availability */ 275 } 276 } 277 else if (i1_pel_pos_x >= 16) 278 { /* column(+16) */ 279 if (i1_pel_pos_y < 0) 280 { /* row(-1) */ 281 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */ 282 } 283 else /* if (i1_pel_pos_y >= 0) */ 284 { /* all other rows */ 285 u1_neighbor_avail = 0; /* current mb right & bottom right availability */ 286 } 287 } 288 289 return u1_neighbor_avail; 290 } 291 292 /** 293 ****************************************************************************** 294 * 295 * @brief 296 * evaluate best intra 16x16 mode (rate distortion opt off) 297 * 298 * @par Description 299 * This function evaluates all the possible intra 16x16 modes and finds the mode 300 * that best represents the macro-block (least distortion) and occupies fewer 301 * bits in the bit-stream. 302 * 303 * @param[in] ps_proc_ctxt 304 * pointer to process context (handle) 305 * 306 * @remarks 307 * Ideally the cost of encoding a macroblock is calculated as 308 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the 309 * input block and the reconstructed block and rate is the number of bits taken 310 * to place the macroblock in the bit-stream. In this routine the rate does not 311 * exactly point to the total number of bits it takes, rather it points to header 312 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits 313 * and residual bits fall in to texture bits the number of bits taken to encoding 314 * mbtype is considered as rate, we compute cost. Further we will approximate 315 * the distortion as the deviation b/w input and the predicted block as opposed 316 * to input and reconstructed block. 317 * 318 * NOTE: As per the Document JVT-O079, for intra 16x16 macroblock, 319 * the SAD and cost are one and the same. 320 * 321 * @return none 322 * 323 ****************************************************************************** 324 */ 325 326 void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) 327 { 328 /* Codec Context */ 329 codec_t *ps_codec = ps_proc->ps_codec; 330 331 /* SAD(distortion metric) of an 8x8 block */ 332 WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX; 333 334 /* lambda */ 335 UWORD32 u4_lambda = ps_proc->u4_lambda; 336 337 /* cost = distortion + lambda*rate */ 338 WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX; 339 340 /* intra mode */ 341 UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16; 342 343 /* neighbor pels for intra prediction */ 344 UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels; 345 346 /* neighbor availability */ 347 WORD32 i4_ngbr_avbl; 348 349 /* pointer to src macro block */ 350 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; 351 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; 352 353 /* pointer to prediction macro block */ 354 UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16; 355 UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane; 356 357 /* strides */ 358 WORD32 i4_src_strd = ps_proc->i4_src_strd; 359 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 360 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 361 362 /* pointer to neighbors left, top, topleft */ 363 UWORD8 *pu1_mb_a = pu1_ref_mb - 1; 364 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd; 365 UWORD8 *pu1_mb_d = pu1_mb_b - 1; 366 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d; 367 /* valid intra modes map */ 368 UWORD32 u4_valid_intra_modes; 369 370 /* lut for valid intra modes */ 371 const UWORD8 u1_valid_intra_modes[8] = {4, 6, 4, 6, 5, 7, 5, 15}; 372 373 /* temp var */ 374 UWORD32 i, u4_enable_fast_sad = 0, offset = 0; 375 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 376 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; 377 378 /* init temp var */ 379 if (ps_proc->i4_slice_type != ISLICE) 380 { 381 /* Offset for MBtype */ 382 offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23; 383 u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad; 384 } 385 386 /* locating neighbors that are available for prediction */ 387 388 /* gather prediction pels from the neighbors, if particular set is not available 389 * it is set to zero*/ 390 /* left pels */ 391 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) 392 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); 393 if (u1_mb_a) 394 { 395 for(i = 0; i < 16; i++) 396 pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd]; 397 } 398 else 399 { 400 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE); 401 } 402 /* top pels */ 403 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) 404 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); 405 if (u1_mb_b) 406 { 407 ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16); 408 } 409 else 410 { 411 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE); 412 } 413 /* topleft pels */ 414 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) 415 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); 416 if (u1_mb_d) 417 { 418 pu1_ngbr_pels_i16[16] = *pu1_mb_d; 419 } 420 else 421 { 422 pu1_ngbr_pels_i16[16] = 0; 423 } 424 425 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1); 426 ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl; 427 428 /* set valid intra modes for evaluation */ 429 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; 430 431 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST) 432 u4_valid_intra_modes &= ~(1 << PLANE_I16x16); 433 434 /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */ 435 ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, 436 i4_src_strd, i4_pred_strd, 437 i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, 438 u4_valid_intra_modes); 439 440 /* cost = distortion + lambda*rate */ 441 i4_mb_cost_least = i4_mb_distortion_least; 442 443 if ((( (u4_valid_intra_modes >> 3) & 1) != 0) && (ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST || 444 ps_proc->i4_slice_type == ISLICE)) 445 { 446 /* intra prediction for PLANE mode*/ 447 (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl); 448 449 /* evaluate distortion between the actual blk and the estimated blk for the given mode */ 450 ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion); 451 452 /* cost = distortion + lambda*rate */ 453 i4_mb_cost = i4_mb_distortion; 454 455 /* update the least cost information if necessary */ 456 if(i4_mb_cost < i4_mb_distortion_least) 457 { 458 u4_intra_mode = PLANE_I16x16; 459 460 i4_mb_cost_least = i4_mb_cost; 461 i4_mb_distortion_least = i4_mb_distortion; 462 } 463 } 464 465 u4_best_intra_16x16_mode = u4_intra_mode; 466 467 DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode); 468 469 ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode; 470 471 /* cost = distortion + lambda*rate */ 472 i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode]; 473 474 475 /* update the type of the mb if necessary */ 476 if (i4_mb_cost_least < ps_proc->i4_mb_cost) 477 { 478 ps_proc->i4_mb_cost = i4_mb_cost_least; 479 ps_proc->i4_mb_distortion = i4_mb_distortion_least; 480 ps_proc->u4_mb_type = I16x16; 481 } 482 483 return ; 484 } 485 486 487 /** 488 ****************************************************************************** 489 * 490 * @brief 491 * evaluate best intra 8x8 mode (rate distortion opt on) 492 * 493 * @par Description 494 * This function evaluates all the possible intra 8x8 modes and finds the mode 495 * that best represents the macro-block (least distortion) and occupies fewer 496 * bits in the bit-stream. 497 * 498 * @param[in] ps_proc_ctxt 499 * pointer to proc ctxt 500 * 501 * @remarks Ideally the cost of encoding a macroblock is calculated as 502 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the 503 * input block and the reconstructed block and rate is the number of bits taken 504 * to place the macroblock in the bit-stream. In this routine the rate does not 505 * exactly point to the total number of bits it takes, rather it points to header 506 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits 507 * and residual bits fall in to texture bits the number of bits taken to encoding 508 * mbtype is considered as rate, we compute cost. Further we will approximate 509 * the distortion as the deviation b/w input and the predicted block as opposed 510 * to input and reconstructed block. 511 * 512 * NOTE: TODO: This function needs to be tested 513 * 514 * @return none 515 * 516 ****************************************************************************** 517 */ 518 void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) 519 { 520 /* Codec Context */ 521 codec_t *ps_codec = ps_proc->ps_codec; 522 523 /* SAD(distortion metric) of an 4x4 block */ 524 WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; 525 526 /* lambda */ 527 UWORD32 u4_lambda = ps_proc->u4_lambda; 528 529 /* cost = distortion + lambda*rate */ 530 WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda; 531 532 /* cost due to mbtype */ 533 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; 534 535 /* intra mode */ 536 UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode; 537 538 /* neighbor pels for intra prediction */ 539 UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels; 540 541 /* pointer to curr partition */ 542 UWORD8 *pu1_mb_curr; 543 544 /* pointer to prediction macro block */ 545 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; 546 547 /* strides */ 548 WORD32 i4_src_strd = ps_proc->i4_src_strd; 549 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 550 551 /* neighbors left, top, top right, top left */ 552 UWORD8 *pu1_mb_a; 553 UWORD8 *pu1_mb_b; 554 UWORD8 *pu1_mb_d; 555 556 /* neighbor availability */ 557 WORD32 i4_ngbr_avbl; 558 block_neighbors_t s_ngbr_avbl; 559 560 /* temp vars */ 561 UWORD32 b8, u4_pix_x, u4_pix_y; 562 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; 563 block_neighbors_t s_ngbr_avbl_MB; 564 565 /* ngbr mb syntax information */ 566 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); 567 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 568 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 569 /* valid intra modes map */ 570 UWORD32 u4_valid_intra_modes; 571 572 if (ps_proc->ps_ngbr_avbl->u1_mb_c) 573 { 574 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1); 575 } 576 /* left pels */ 577 s_ngbr_avbl_MB.u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) 578 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); 579 580 /* top pels */ 581 s_ngbr_avbl_MB.u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) 582 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); 583 584 /* topleft pels */ 585 s_ngbr_avbl_MB.u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) 586 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); 587 588 /* top right */ 589 s_ngbr_avbl_MB.u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) 590 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1)); 591 592 593 for(b8 = 0; b8 < 4; b8++) 594 { 595 u4_pix_x = (b8 & 0x01) << 3; 596 u4_pix_y = (b8 >> 1) << 3; 597 598 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); 599 /* when rdopt is off, we use the input as reference for constructing prediction buffer */ 600 /* as opposed to using the recon pels. (open loop intra prediction) */ 601 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ 602 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ 603 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ 604 605 /* locating neighbors that are available for prediction */ 606 /* TODO : update the neighbor availability information basing on constrained intra pred information */ 607 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ 608 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ 609 s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */ 610 s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */ 611 s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */ 612 s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(&s_ngbr_avbl_MB, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */ 613 614 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */ 615 i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) + 616 (s_ngbr_avbl.u1_mb_a << 4); 617 /* if top partition is available and top right is not available for intra prediction, then */ 618 /* padd top right samples using top sample and make top right also available */ 619 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ 620 ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl; 621 622 623 ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8, 624 i4_src_strd, i4_ngbr_avbl); 625 626 i4_partition_cost_least = INT_MAX; 627 /* set valid intra modes for evaluation */ 628 u4_valid_intra_modes = 0x1ff; 629 630 if (!s_ngbr_avbl.u1_mb_b) 631 { 632 u4_valid_intra_modes &= ~(1 << VERT_I4x4); 633 u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4); 634 u4_valid_intra_modes &= ~(1 << VERT_L_I4x4); 635 } 636 if (!s_ngbr_avbl.u1_mb_a) 637 { 638 u4_valid_intra_modes &= ~(1 << HORZ_I4x4); 639 u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4); 640 } 641 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d) 642 { 643 u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4); 644 u4_valid_intra_modes &= ~(1 << VERT_R_I4x4); 645 u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4); 646 } 647 648 /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */ 649 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) 650 { 651 u4_estimated_intra_8x8_mode = DC_I8x8; 652 } 653 else 654 { 655 UWORD32 u4_left_intra_8x8_mode = DC_I8x8; 656 UWORD32 u4_top_intra_8x8_mode = DC_I8x8; 657 658 if (u4_pix_x == 0) 659 { 660 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) 661 { 662 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1]; 663 } 664 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) 665 { 666 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2]; 667 } 668 } 669 else 670 { 671 u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1]; 672 } 673 674 if (u4_pix_y == 0) 675 { 676 if (ps_top_mb_syn_ele->u2_mb_type == I8x8) 677 { 678 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2]; 679 } 680 else if (ps_top_mb_syn_ele->u2_mb_type == I4x4) 681 { 682 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2]; 683 } 684 } 685 else 686 { 687 u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2]; 688 } 689 690 u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode); 691 } 692 693 /* perform intra mode 8x8 evaluation */ 694 for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1) 695 { 696 if ( (u4_valid_intra_modes & 1) == 0) 697 continue; 698 699 /* intra prediction */ 700 (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl); 701 702 /* evaluate distortion between the actual blk and the estimated blk for the given mode */ 703 ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion); 704 705 i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits); 706 707 /* update the least cost information if necessary */ 708 if (i4_partition_cost < i4_partition_cost_least) 709 { 710 i4_partition_cost_least = i4_partition_cost; 711 i4_partition_distortion_least = i4_partition_distortion; 712 u4_best_intra_8x8_mode = u4_intra_mode; 713 } 714 } 715 /* macroblock distortion */ 716 i4_total_cost += i4_partition_cost_least; 717 i4_total_distortion += i4_partition_distortion_least; 718 /* mb partition mode */ 719 ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode; 720 721 } 722 723 /* update the type of the mb if necessary */ 724 if (i4_total_cost < ps_proc->i4_mb_cost) 725 { 726 ps_proc->i4_mb_cost = i4_total_cost; 727 ps_proc->i4_mb_distortion = i4_total_distortion; 728 ps_proc->u4_mb_type = I8x8; 729 } 730 731 return ; 732 } 733 734 735 /** 736 ****************************************************************************** 737 * 738 * @brief 739 * evaluate best intra 4x4 mode (rate distortion opt off) 740 * 741 * @par Description 742 * This function evaluates all the possible intra 4x4 modes and finds the mode 743 * that best represents the macro-block (least distortion) and occupies fewer 744 * bits in the bit-stream. 745 * 746 * @param[in] ps_proc_ctxt 747 * pointer to proc ctxt 748 * 749 * @remarks 750 * Ideally the cost of encoding a macroblock is calculated as 751 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the 752 * input block and the reconstructed block and rate is the number of bits taken 753 * to place the macroblock in the bit-stream. In this routine the rate does not 754 * exactly point to the total number of bits it takes, rather it points to header 755 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits 756 * and residual bits fall in to texture bits the number of bits taken to encoding 757 * mbtype is considered as rate, we compute cost. Further we will approximate 758 * the distortion as the deviation b/w input and the predicted block as opposed 759 * to input and reconstructed block. 760 * 761 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, 762 * 24*lambda is added to the SAD before comparison with the best SAD for 763 * inter prediction. This is an empirical value to prevent using too many intra 764 * blocks. 765 * 766 * @return none 767 * 768 ****************************************************************************** 769 */ 770 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) 771 { 772 /* Codec Context */ 773 codec_t *ps_codec = ps_proc->ps_codec; 774 775 /* SAD(distortion metric) of an 4x4 block */ 776 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; 777 778 /* lambda */ 779 UWORD32 u4_lambda = ps_proc->u4_lambda; 780 781 /* cost = distortion + lambda*rate */ 782 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; 783 784 /* cost due to mbtype */ 785 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; 786 787 /* intra mode */ 788 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; 789 790 /* neighbor pels for intra prediction */ 791 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; 792 793 /* pointer to curr partition */ 794 UWORD8 *pu1_mb_curr; 795 796 /* pointer to prediction macro block */ 797 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; 798 799 /* strides */ 800 WORD32 i4_src_strd = ps_proc->i4_src_strd; 801 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 802 803 /* neighbors left, top, top right, top left */ 804 UWORD8 *pu1_mb_a; 805 UWORD8 *pu1_mb_b; 806 UWORD8 *pu1_mb_c; 807 UWORD8 *pu1_mb_d; 808 809 /* neighbor availability */ 810 WORD32 i4_ngbr_avbl; 811 block_neighbors_t s_ngbr_avbl; 812 813 /* temp vars */ 814 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; 815 816 /* scan order inside 4x4 block */ 817 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; 818 819 /* ngbr sub mb modes */ 820 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); 821 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 822 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 823 824 /* valid intra modes map */ 825 UWORD32 u4_valid_intra_modes; 826 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; 827 828 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; 829 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d; 830 if (ps_proc->ps_ngbr_avbl->u1_mb_c) 831 { 832 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x + 1; 833 } 834 /* left pels */ 835 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) 836 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); 837 838 /* top pels */ 839 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) 840 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); 841 842 /* topleft pels */ 843 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) 844 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); 845 846 /* top right */ 847 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) 848 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1)); 849 850 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3); 851 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); 852 853 for (b8 = 0; b8 < 4; b8++) 854 { 855 u4_blk_x = (b8 & 0x01) << 3; 856 u4_blk_y = (b8 >> 1) << 3; 857 for (b4 = 0; b4 < 4; b4++) 858 { 859 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); 860 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); 861 862 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); 863 /* when rdopt is off, we use the input as reference for constructing prediction buffer */ 864 /* as opposed to using the recon pels. (open loop intra prediction) */ 865 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ 866 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ 867 pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */ 868 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ 869 870 /* locating neighbors that are available for prediction */ 871 /* TODO : update the neighbor availability information basing on constrained intra pred information */ 872 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ 873 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ 874 875 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; 876 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); 877 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; 878 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; 879 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; 880 /* set valid intra modes for evaluation */ 881 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; 882 883 /* if top partition is available and top right is not available for intra prediction, then */ 884 /* padd top right samples using top sample and make top right also available */ 885 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ 886 887 /* gather prediction pels from the neighbors */ 888 if (s_ngbr_avbl.u1_mb_a) 889 { 890 for(i = 0; i < 4; i++) 891 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd]; 892 } 893 else 894 { 895 memset(pu1_ngbr_pels_i4, 0, 4); 896 } 897 898 if (s_ngbr_avbl.u1_mb_b) 899 { 900 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); 901 } 902 else 903 { 904 memset(pu1_ngbr_pels_i4 + 5, 0, 4); 905 } 906 907 if (s_ngbr_avbl.u1_mb_d) 908 pu1_ngbr_pels_i4[4] = *pu1_mb_d; 909 else 910 pu1_ngbr_pels_i4[4] = 0; 911 912 if (s_ngbr_avbl.u1_mb_c) 913 { 914 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); 915 } 916 else if (s_ngbr_avbl.u1_mb_b) 917 { 918 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); 919 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; 920 } 921 922 i4_partition_cost_least = INT_MAX; 923 924 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */ 925 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) 926 { 927 u4_estimated_intra_4x4_mode = DC_I4x4; 928 } 929 else 930 { 931 UWORD32 u4_left_intra_4x4_mode = DC_I4x4; 932 UWORD32 u4_top_intra_4x4_mode = DC_I4x4; 933 934 if (u4_pix_x == 0) 935 { 936 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) 937 { 938 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]]; 939 } 940 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) 941 { 942 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1]; 943 } 944 } 945 else 946 { 947 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]]; 948 } 949 950 if (u4_pix_y == 0) 951 { 952 if (ps_top_mb_syn_ele->u2_mb_type == I4x4) 953 { 954 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]]; 955 } 956 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8) 957 { 958 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; 959 } 960 } 961 else 962 { 963 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]]; 964 } 965 966 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); 967 } 968 969 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode; 970 971 /* mode evaluation and prediction */ 972 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr, 973 pu1_ngbr_pels_i4, 974 pu1_pred_mb, i4_src_strd, 975 i4_pred_strd, i4_ngbr_avbl, 976 &u4_best_intra_4x4_mode, 977 &i4_partition_cost_least, 978 u4_valid_intra_modes, 979 u4_lambda, 980 u4_estimated_intra_4x4_mode); 981 982 983 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits); 984 985 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode); 986 /* macroblock distortion */ 987 i4_total_distortion += i4_partition_distortion_least; 988 i4_total_cost += i4_partition_cost_least; 989 /* mb partition mode */ 990 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; 991 } 992 } 993 994 /* update the type of the mb if necessary */ 995 if (i4_total_cost < ps_proc->i4_mb_cost) 996 { 997 ps_proc->i4_mb_cost = i4_total_cost; 998 ps_proc->i4_mb_distortion = i4_total_distortion; 999 ps_proc->u4_mb_type = I4x4; 1000 } 1001 1002 return ; 1003 } 1004 1005 /** 1006 ****************************************************************************** 1007 * 1008 * @brief evaluate best intra 4x4 mode (rate distortion opt on) 1009 * 1010 * @par Description 1011 * This function evaluates all the possible intra 4x4 modes and finds the mode 1012 * that best represents the macro-block (least distortion) and occupies fewer 1013 * bits in the bit-stream. 1014 * 1015 * @param[in] ps_proc_ctxt 1016 * pointer to proc ctxt 1017 * 1018 * @remarks 1019 * Ideally the cost of encoding a macroblock is calculated as 1020 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the 1021 * input block and the reconstructed block and rate is the number of bits taken 1022 * to place the macroblock in the bit-stream. In this routine the rate does not 1023 * exactly point to the total number of bits it takes, rather it points to header 1024 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits 1025 * and residual bits fall in to texture bits the number of bits taken to encoding 1026 * mbtype is considered as rate, we compute cost. Further we will approximate 1027 * the distortion as the deviation b/w input and the predicted block as opposed 1028 * to input and reconstructed block. 1029 * 1030 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, 1031 * 24*lambda is added to the SAD before comparison with the best SAD for 1032 * inter prediction. This is an empirical value to prevent using too many intra 1033 * blocks. 1034 * 1035 * @return none 1036 * 1037 ****************************************************************************** 1038 */ 1039 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc) 1040 { 1041 /* Codec Context */ 1042 codec_t *ps_codec = ps_proc->ps_codec; 1043 1044 /* SAD(distortion metric) of an 4x4 block */ 1045 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; 1046 1047 /* lambda */ 1048 UWORD32 u4_lambda = ps_proc->u4_lambda; 1049 1050 /* cost = distortion + lambda*rate */ 1051 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; 1052 1053 /* cost due to mbtype */ 1054 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; 1055 1056 /* intra mode */ 1057 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; 1058 1059 /* neighbor pels for intra prediction */ 1060 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; 1061 1062 /* pointer to curr partition */ 1063 UWORD8 *pu1_mb_curr; 1064 UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top; 1065 UWORD8 *pu1_ref_mb_intra_4x4; 1066 1067 /* pointer to residual macro block */ 1068 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; 1069 1070 /* pointer to prediction macro block */ 1071 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; 1072 1073 /* strides */ 1074 WORD32 i4_src_strd = ps_proc->i4_src_strd; 1075 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 1076 WORD32 i4_ref_strd_left, i4_ref_strd_top; 1077 1078 /* neighbors left, top, top right, top left */ 1079 UWORD8 *pu1_mb_a; 1080 UWORD8 *pu1_mb_b; 1081 UWORD8 *pu1_mb_c; 1082 UWORD8 *pu1_mb_d; 1083 1084 /* number of non zero coeffs*/ 1085 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4; 1086 1087 /* quantization parameters */ 1088 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 1089 1090 /* neighbor availability */ 1091 WORD32 i4_ngbr_avbl; 1092 block_neighbors_t s_ngbr_avbl; 1093 1094 /* temp vars */ 1095 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; 1096 1097 /* scan order inside 4x4 block */ 1098 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; 1099 1100 /* ngbr sub mb modes */ 1101 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); 1102 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 1103 mb_info_t *ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 1104 1105 /* valid intra modes map */ 1106 UWORD32 u4_valid_intra_modes; 1107 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; 1108 1109 /* Dummy variable for 4x4 trans function */ 1110 WORD16 i2_dc_dummy; 1111 UWORD8 u1_mb_a, u1_mb_b, u1_mb_c, u1_mb_d; 1112 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; 1113 1114 /* compute ngbr availability for sub blks */ 1115 if (ps_proc->ps_ngbr_avbl->u1_mb_c) 1116 { 1117 ps_top_right_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + (ps_proc->i4_mb_x + 1); 1118 } 1119 1120 /* left pels */ 1121 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) 1122 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); 1123 1124 /* top pels */ 1125 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) 1126 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); 1127 1128 /* topleft pels */ 1129 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) 1130 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); 1131 1132 /* top right pels */ 1133 u1_mb_c = ((ps_proc->ps_ngbr_avbl->u1_mb_c) 1134 && (u4_constrained_intra_pred ? ps_top_right_mb_syn_ele->u2_is_intra : 1)); 1135 1136 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_d << 1) + (u1_mb_b << 2) + (u1_mb_c << 3); 1137 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); 1138 1139 for(b8 = 0; b8 < 4; b8++) 1140 { 1141 u4_blk_x = (b8 & 0x01) << 3; 1142 u4_blk_y = (b8 >> 1) << 3; 1143 for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE) 1144 { 1145 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); 1146 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); 1147 1148 pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd); 1149 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); 1150 if (u4_pix_x == 0) 1151 { 1152 i4_ref_strd_left = ps_proc->i4_rec_strd; 1153 pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left); 1154 } 1155 else 1156 { 1157 i4_ref_strd_left = i4_pred_strd; 1158 pu1_mb_ref_left = pu1_ref_mb_intra_4x4; 1159 } 1160 if (u4_pix_y == 0) 1161 { 1162 i4_ref_strd_top = ps_proc->i4_rec_strd; 1163 pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top); 1164 } 1165 else 1166 { 1167 i4_ref_strd_top = i4_pred_strd; 1168 pu1_mb_ref_top = pu1_ref_mb_intra_4x4; 1169 } 1170 1171 pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */ 1172 pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */ 1173 pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */ 1174 if (u4_pix_y == 0) 1175 pu1_mb_d = pu1_mb_b - 1; 1176 else 1177 pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */ 1178 1179 /* locating neighbors that are available for prediction */ 1180 /* TODO : update the neighbor availability information basing on constrained intra pred information */ 1181 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ 1182 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ 1183 1184 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; 1185 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); 1186 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; 1187 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; 1188 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; 1189 /* set valid intra modes for evaluation */ 1190 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; 1191 1192 /* if top partition is available and top right is not available for intra prediction, then */ 1193 /* padd top right samples using top sample and make top right also available */ 1194 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ 1195 1196 /* gather prediction pels from the neighbors */ 1197 if (s_ngbr_avbl.u1_mb_a) 1198 { 1199 for(i = 0; i < 4; i++) 1200 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left]; 1201 } 1202 else 1203 { 1204 memset(pu1_ngbr_pels_i4,0,4); 1205 } 1206 if(s_ngbr_avbl.u1_mb_b) 1207 { 1208 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); 1209 } 1210 else 1211 { 1212 memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4); 1213 } 1214 if (s_ngbr_avbl.u1_mb_d) 1215 pu1_ngbr_pels_i4[4] = *pu1_mb_d; 1216 else 1217 pu1_ngbr_pels_i4[4] = 0; 1218 if (s_ngbr_avbl.u1_mb_c) 1219 { 1220 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); 1221 } 1222 else if (s_ngbr_avbl.u1_mb_b) 1223 { 1224 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); 1225 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; 1226 } 1227 1228 i4_partition_cost_least = INT_MAX; 1229 1230 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */ 1231 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) 1232 { 1233 u4_estimated_intra_4x4_mode = DC_I4x4; 1234 } 1235 else 1236 { 1237 UWORD32 u4_left_intra_4x4_mode = DC_I4x4; 1238 UWORD32 u4_top_intra_4x4_mode = DC_I4x4; 1239 1240 if (u4_pix_x == 0) 1241 { 1242 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) 1243 { 1244 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]]; 1245 } 1246 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) 1247 { 1248 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1]; 1249 } 1250 } 1251 else 1252 { 1253 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]]; 1254 } 1255 1256 if (u4_pix_y == 0) 1257 { 1258 if (ps_top_mb_syn_ele->u2_mb_type == I4x4) 1259 { 1260 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]]; 1261 } 1262 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8) 1263 { 1264 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; 1265 } 1266 } 1267 else 1268 { 1269 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]]; 1270 } 1271 1272 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); 1273 } 1274 1275 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode; 1276 1277 /*mode evaluation and prediction*/ 1278 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr, 1279 pu1_ngbr_pels_i4, 1280 pu1_pred_mb, i4_src_strd, 1281 i4_pred_strd, i4_ngbr_avbl, 1282 &u4_best_intra_4x4_mode, 1283 &i4_partition_cost_least, 1284 u4_valid_intra_modes, 1285 u4_lambda, 1286 u4_estimated_intra_4x4_mode); 1287 1288 1289 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits); 1290 1291 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode); 1292 1293 /* macroblock distortion */ 1294 i4_total_distortion += i4_partition_distortion_least; 1295 i4_total_cost += i4_partition_cost_least; 1296 1297 /* mb partition mode */ 1298 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; 1299 1300 1301 /********************************************************/ 1302 /* error estimation, */ 1303 /* transform */ 1304 /* quantization */ 1305 /********************************************************/ 1306 ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb, 1307 pi2_res_mb, i4_src_strd, 1308 i4_pred_strd, 1309 /* No op stride, this implies a buff of lenght 1x16 */ 1310 ps_qp_params->pu2_scale_mat, 1311 ps_qp_params->pu2_thres_mat, 1312 ps_qp_params->u1_qbits, 1313 ps_qp_params->u4_dead_zone, 1314 pu1_nnz, &i2_dc_dummy); 1315 1316 /********************************************************/ 1317 /* ierror estimation, */ 1318 /* itransform */ 1319 /* iquantization */ 1320 /********************************************************/ 1321 ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb, 1322 pu1_ref_mb_intra_4x4, 1323 i4_pred_strd, i4_pred_strd, 1324 ps_qp_params->pu2_iscale_mat, 1325 ps_qp_params->pu2_weigh_mat, 1326 ps_qp_params->u1_qp_div, 1327 ps_proc->pv_scratch_buff, 0, 1328 NULL); 1329 } 1330 } 1331 1332 /* update the type of the mb if necessary */ 1333 if (i4_total_cost < ps_proc->i4_mb_cost) 1334 { 1335 ps_proc->i4_mb_cost = i4_total_cost; 1336 ps_proc->i4_mb_distortion = i4_total_distortion; 1337 ps_proc->u4_mb_type = I4x4; 1338 } 1339 1340 return ; 1341 } 1342 1343 /** 1344 ****************************************************************************** 1345 * 1346 * @brief 1347 * evaluate best chroma intra 8x8 mode (rate distortion opt off) 1348 * 1349 * @par Description 1350 * This function evaluates all the possible chroma intra 8x8 modes and finds 1351 * the mode that best represents the macroblock (least distortion) and occupies 1352 * fewer bits in the bitstream. 1353 * 1354 * @param[in] ps_proc_ctxt 1355 * pointer to macroblock context (handle) 1356 * 1357 * @remarks 1358 * For chroma best intra pred mode is calculated based only on SAD 1359 * 1360 * @returns none 1361 * 1362 ****************************************************************************** 1363 */ 1364 1365 void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) 1366 { 1367 /* Codec Context */ 1368 codec_t *ps_codec = ps_proc->ps_codec; 1369 1370 /* SAD(distortion metric) of an 8x8 block */ 1371 WORD32 i4_mb_distortion, i4_chroma_mb_distortion; 1372 1373 /* intra mode */ 1374 UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8; 1375 1376 /* neighbor pels for intra prediction */ 1377 UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels; 1378 1379 /* pointer to curr macro block */ 1380 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; 1381 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma; 1382 1383 /* pointer to prediction macro block */ 1384 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; 1385 UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane; 1386 1387 /* strides */ 1388 WORD32 i4_src_strd_c = ps_proc->i4_src_chroma_strd; 1389 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 1390 WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd; 1391 1392 /* neighbors left, top, top left */ 1393 UWORD8 *pu1_mb_a = pu1_ref_mb - 2; 1394 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c; 1395 UWORD8 *pu1_mb_d = pu1_mb_b - 2; 1396 1397 /* neighbor availability */ 1398 const UWORD8 u1_valid_intra_modes[8] = {1, 3, 1, 3, 5, 7, 5, 15}; 1399 WORD32 i4_ngbr_avbl; 1400 1401 /* valid intra modes map */ 1402 UWORD32 u4_valid_intra_modes; 1403 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 1404 1405 /* temp var */ 1406 UWORD8 i; 1407 UWORD32 u4_constrained_intra_pred = ps_proc->ps_codec->s_cfg.u4_constrained_intra_pred; 1408 UWORD8 u1_mb_a, u1_mb_b, u1_mb_d; 1409 /* locating neighbors that are available for prediction */ 1410 1411 /* gather prediction pels from the neighbors */ 1412 /* left pels */ 1413 u1_mb_a = ((ps_proc->ps_ngbr_avbl->u1_mb_a) 1414 && (u4_constrained_intra_pred ? ps_proc->s_left_mb_syntax_ele.u2_is_intra : 1)); 1415 if (u1_mb_a) 1416 { 1417 for (i = 0; i < 16; i += 2) 1418 { 1419 pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c]; 1420 pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1]; 1421 } 1422 } 1423 else 1424 { 1425 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE); 1426 } 1427 1428 /* top pels */ 1429 u1_mb_b = ((ps_proc->ps_ngbr_avbl->u1_mb_b) 1430 && (u4_constrained_intra_pred ? ps_top_mb_syn_ele->u2_is_intra : 1)); 1431 if (u1_mb_b) 1432 { 1433 ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16); 1434 } 1435 else 1436 { 1437 ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE); 1438 } 1439 1440 /* top left pels */ 1441 u1_mb_d = ((ps_proc->ps_ngbr_avbl->u1_mb_d) 1442 && (u4_constrained_intra_pred ? ps_proc->s_top_left_mb_syntax_ele.u2_is_intra : 1)); 1443 if (u1_mb_d) 1444 { 1445 pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d; 1446 pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1); 1447 } 1448 i4_ngbr_avbl = (u1_mb_a) + (u1_mb_b << 2) + (u1_mb_d << 1); 1449 ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl; 1450 1451 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; 1452 1453 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST) 1454 u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8); 1455 1456 i4_chroma_mb_distortion = INT_MAX; 1457 1458 /* perform intra mode chroma 8x8 evaluation */ 1459 /* intra prediction */ 1460 ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb, 1461 pu1_ngbr_pels_c_i8x8, 1462 pu1_pred_mb, 1463 i4_src_strd_c, 1464 i4_pred_strd, 1465 i4_ngbr_avbl, 1466 &u4_best_chroma_intra_8x8_mode, 1467 &i4_chroma_mb_distortion, 1468 u4_valid_intra_modes); 1469 1470 if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/ 1471 { 1472 (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl); 1473 1474 /* evaluate distortion(sad) */ 1475 ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion); 1476 1477 /* update the least distortion information if necessary */ 1478 if(i4_mb_distortion < i4_chroma_mb_distortion) 1479 { 1480 i4_chroma_mb_distortion = i4_mb_distortion; 1481 u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8; 1482 } 1483 } 1484 1485 DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode); 1486 1487 ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode; 1488 1489 return ; 1490 } 1491 1492 1493 /** 1494 ****************************************************************************** 1495 * 1496 * @brief 1497 * Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the 1498 * prediction. 1499 * 1500 * @par Description 1501 * This function evaluates first three 16x16 modes and compute corresponding sad 1502 * and return the buffer predicted with best mode. 1503 * 1504 * @param[in] pu1_src 1505 * UWORD8 pointer to the source 1506 * 1507 * @param[in] pu1_ngbr_pels_i16 1508 * UWORD8 pointer to neighbouring pels 1509 * 1510 * @param[out] pu1_dst 1511 * UWORD8 pointer to the destination 1512 * 1513 * @param[in] src_strd 1514 * integer source stride 1515 * 1516 * @param[in] dst_strd 1517 * integer destination stride 1518 * 1519 * @param[in] u4_n_avblty 1520 * availability of neighbouring pixels 1521 * 1522 * @param[in] u4_intra_mode 1523 * Pointer to the variable in which best mode is returned 1524 * 1525 * @param[in] pu4_sadmin 1526 * Pointer to the variable in which minimum sad is returned 1527 * 1528 * @param[in] u4_valid_intra_modes 1529 * Says what all modes are valid 1530 * 1531 * @returns none 1532 * 1533 ****************************************************************************** 1534 */ 1535 void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src, 1536 UWORD8 *pu1_ngbr_pels_i16, 1537 UWORD8 *pu1_dst, 1538 UWORD32 src_strd, 1539 UWORD32 dst_strd, 1540 WORD32 u4_n_avblty, 1541 UWORD32 *u4_intra_mode, 1542 WORD32 *pu4_sadmin, 1543 UWORD32 u4_valid_intra_modes) 1544 { 1545 UWORD8 *pu1_neighbour; 1546 UWORD8 *pu1_src_temp = pu1_src; 1547 UWORD8 left = 0, top = 0; 1548 WORD32 u4_dcval = 0; 1549 WORD32 i, j; 1550 WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, 1551 i4_min_sad = INT_MAX; 1552 UWORD8 val; 1553 1554 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); 1555 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; 1556 1557 /* left available */ 1558 if (left) 1559 { 1560 i4_sad_horz = 0; 1561 1562 for (i = 0; i < 16; i++) 1563 { 1564 val = pu1_ngbr_pels_i16[15 - i]; 1565 1566 u4_dcval += val; 1567 1568 for (j = 0; j < 16; j++) 1569 { 1570 i4_sad_horz += ABS(val - pu1_src_temp[j]); 1571 } 1572 1573 pu1_src_temp += src_strd; 1574 } 1575 u4_dcval += 8; 1576 } 1577 1578 pu1_src_temp = pu1_src; 1579 /* top available */ 1580 if (top) 1581 { 1582 i4_sad_vert = 0; 1583 1584 for (i = 0; i < 16; i++) 1585 { 1586 u4_dcval += pu1_ngbr_pels_i16[17 + i]; 1587 1588 for (j = 0; j < 16; j++) 1589 { 1590 i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]); 1591 } 1592 pu1_src_temp += src_strd; 1593 1594 } 1595 u4_dcval += 8; 1596 } 1597 1598 u4_dcval = (u4_dcval) >> (3 + left + top); 1599 1600 pu1_src_temp = pu1_src; 1601 1602 /* none available */ 1603 u4_dcval += (left == 0) * (top == 0) * 128; 1604 1605 i4_sad_dc = 0; 1606 1607 for (i = 0; i < 16; i++) 1608 { 1609 for (j = 0; j < 16; j++) 1610 { 1611 i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]); 1612 } 1613 pu1_src_temp += src_strd; 1614 } 1615 1616 if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */ 1617 i4_sad_dc = INT_MAX; 1618 1619 if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */ 1620 i4_sad_vert = INT_MAX; 1621 1622 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */ 1623 i4_sad_horz = INT_MAX; 1624 1625 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); 1626 1627 /* Finding Minimum sad and doing corresponding prediction */ 1628 if (i4_min_sad < *pu4_sadmin) 1629 { 1630 *pu4_sadmin = i4_min_sad; 1631 if (i4_min_sad == i4_sad_vert) 1632 { 1633 *u4_intra_mode = VERT_I16x16; 1634 pu1_neighbour = pu1_ngbr_pels_i16 + 17; 1635 for (j = 0; j < 16; j++) 1636 { 1637 memcpy(pu1_dst, pu1_neighbour, MB_SIZE); 1638 pu1_dst += dst_strd; 1639 } 1640 } 1641 else if (i4_min_sad == i4_sad_horz) 1642 { 1643 *u4_intra_mode = HORZ_I16x16; 1644 for (j = 0; j < 16; j++) 1645 { 1646 val = pu1_ngbr_pels_i16[15 - j]; 1647 memset(pu1_dst, val, MB_SIZE); 1648 pu1_dst += dst_strd; 1649 } 1650 } 1651 else 1652 { 1653 *u4_intra_mode = DC_I16x16; 1654 for (j = 0; j < 16; j++) 1655 { 1656 memset(pu1_dst, u4_dcval, MB_SIZE); 1657 pu1_dst += dst_strd; 1658 } 1659 } 1660 } 1661 return; 1662 } 1663 1664 /** 1665 ****************************************************************************** 1666 * 1667 * @brief 1668 * Evaluate best intra 4x4 mode and perform prediction. 1669 * 1670 * @par Description 1671 * This function evaluates 4x4 modes and compute corresponding sad 1672 * and return the buffer predicted with best mode. 1673 * 1674 * @param[in] pu1_src 1675 * UWORD8 pointer to the source 1676 * 1677 * @param[in] pu1_ngbr_pels 1678 * UWORD8 pointer to neighbouring pels 1679 * 1680 * @param[out] pu1_dst 1681 * UWORD8 pointer to the destination 1682 * 1683 * @param[in] src_strd 1684 * integer source stride 1685 * 1686 * @param[in] dst_strd 1687 * integer destination stride 1688 * 1689 * @param[in] u4_n_avblty 1690 * availability of neighbouring pixels 1691 * 1692 * @param[in] u4_intra_mode 1693 * Pointer to the variable in which best mode is returned 1694 * 1695 * @param[in] pu4_sadmin 1696 * Pointer to the variable in which minimum cost is returned 1697 * 1698 * @param[in] u4_valid_intra_modes 1699 * Says what all modes are valid 1700 * 1701 * @param[in] u4_lambda 1702 * Lamda value for computing cost from SAD 1703 * 1704 * @param[in] u4_predictd_mode 1705 * Predicted mode for cost computation 1706 * 1707 * @returns none 1708 * 1709 ****************************************************************************** 1710 */ 1711 void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src, 1712 UWORD8 *pu1_ngbr_pels, 1713 UWORD8 *pu1_dst, 1714 UWORD32 src_strd, 1715 UWORD32 dst_strd, 1716 WORD32 u4_n_avblty, 1717 UWORD32 *u4_intra_mode, 1718 WORD32 *pu4_sadmin, 1719 UWORD32 u4_valid_intra_modes, 1720 UWORD32 u4_lambda, 1721 UWORD32 u4_predictd_mode) 1722 { 1723 UWORD8 *pu1_src_temp = pu1_src; 1724 UWORD8 *pu1_pred = pu1_ngbr_pels; 1725 UWORD8 left = 0, top = 0; 1726 UWORD8 u1_pred_val = 0; 1727 UWORD8 u1_pred_vals[4] = {0}; 1728 UWORD8 *pu1_pred_val = NULL; 1729 /* To store FILT121 operated values*/ 1730 UWORD8 u1_pred_vals_diag_121[15] = {0}; 1731 /* To store FILT11 operated values*/ 1732 UWORD8 u1_pred_vals_diag_11[15] = {0}; 1733 UWORD8 u1_pred_vals_vert_r[8] = {0}; 1734 UWORD8 u1_pred_vals_horz_d[10] = {0}; 1735 UWORD8 u1_pred_vals_horz_u[10] = {0}; 1736 WORD32 u4_dcval = 0; 1737 WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1738 INT_MAX, INT_MAX, INT_MAX, INT_MAX}; 1739 1740 WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1741 INT_MAX, INT_MAX, INT_MAX, INT_MAX}; 1742 WORD32 i, i4_min_cost = INT_MAX; 1743 1744 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); 1745 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; 1746 1747 /* Computing SAD */ 1748 1749 /* VERT mode valid */ 1750 if (u4_valid_intra_modes & 1) 1751 { 1752 pu1_pred = pu1_ngbr_pels + 5; 1753 i4_sad[VERT_I4x4] = 0; 1754 i4_cost[VERT_I4x4] = 0; 1755 1756 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); 1757 pu1_src_temp += src_strd; 1758 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); 1759 pu1_src_temp += src_strd; 1760 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); 1761 pu1_src_temp += src_strd; 1762 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); 1763 1764 i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? 1765 u4_lambda : 4 * u4_lambda); 1766 } 1767 1768 /* HORZ mode valid */ 1769 if (u4_valid_intra_modes & 2) 1770 { 1771 i4_sad[HORZ_I4x4] = 0; 1772 i4_cost[HORZ_I4x4] =0; 1773 pu1_src_temp = pu1_src; 1774 1775 u1_pred_val = pu1_ngbr_pels[3]; 1776 1777 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) 1778 + ABS(pu1_src_temp[1] - u1_pred_val) 1779 + ABS(pu1_src_temp[2] - u1_pred_val) 1780 + ABS(pu1_src_temp[3] - u1_pred_val); 1781 pu1_src_temp += src_strd; 1782 1783 u1_pred_val = pu1_ngbr_pels[2]; 1784 1785 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) 1786 + ABS(pu1_src_temp[1] - u1_pred_val) 1787 + ABS(pu1_src_temp[2] - u1_pred_val) 1788 + ABS(pu1_src_temp[3] - u1_pred_val); 1789 pu1_src_temp += src_strd; 1790 1791 u1_pred_val = pu1_ngbr_pels[1]; 1792 1793 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) 1794 + ABS(pu1_src_temp[1] - u1_pred_val) 1795 + ABS(pu1_src_temp[2] - u1_pred_val) 1796 + ABS(pu1_src_temp[3] - u1_pred_val); 1797 pu1_src_temp += src_strd; 1798 1799 u1_pred_val = pu1_ngbr_pels[0]; 1800 1801 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) 1802 + ABS(pu1_src_temp[1] - u1_pred_val) 1803 + ABS(pu1_src_temp[2] - u1_pred_val) 1804 + ABS(pu1_src_temp[3] - u1_pred_val); 1805 1806 i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? 1807 u4_lambda : 4 * u4_lambda); 1808 } 1809 1810 /* DC mode valid */ 1811 if (u4_valid_intra_modes & 4) 1812 { 1813 i4_sad[DC_I4x4] = 0; 1814 i4_cost[DC_I4x4] = 0; 1815 pu1_src_temp = pu1_src; 1816 1817 if (left) 1818 u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] 1819 + pu1_ngbr_pels[3] + 2; 1820 if (top) 1821 u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] 1822 + pu1_ngbr_pels[8] + 2; 1823 1824 u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128; 1825 1826 /* none available */ 1827 memset(u1_pred_vals, u4_dcval, 4); 1828 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); 1829 pu1_src_temp += src_strd; 1830 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); 1831 pu1_src_temp += src_strd; 1832 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); 1833 pu1_src_temp += src_strd; 1834 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); 1835 pu1_src_temp += src_strd; 1836 1837 i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? 1838 u4_lambda : 4 * u4_lambda); 1839 } 1840 1841 /* if modes other than VERT, HORZ and DC are valid */ 1842 if (u4_valid_intra_modes > 7) 1843 { 1844 pu1_pred = pu1_ngbr_pels; 1845 pu1_pred[13] = pu1_pred[14] = pu1_pred[12]; 1846 1847 /* Performing FILT121 and FILT11 operation for all neighbour values*/ 1848 for (i = 0; i < 13; i++) 1849 { 1850 u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]); 1851 u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]); 1852 1853 pu1_pred++; 1854 } 1855 1856 if (u4_valid_intra_modes & 8)/* DIAG_DL */ 1857 { 1858 i4_sad[DIAG_DL_I4x4] = 0; 1859 i4_cost[DIAG_DL_I4x4] = 0; 1860 pu1_src_temp = pu1_src; 1861 pu1_pred_val = u1_pred_vals_diag_121 + 5; 1862 1863 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]); 1864 pu1_src_temp += src_strd; 1865 USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]); 1866 pu1_src_temp += src_strd; 1867 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]); 1868 pu1_src_temp += src_strd; 1869 USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]); 1870 pu1_src_temp += src_strd; 1871 i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ? 1872 u4_lambda : 4 * u4_lambda); 1873 } 1874 1875 if (u4_valid_intra_modes & 16)/* DIAG_DR */ 1876 { 1877 i4_sad[DIAG_DR_I4x4] = 0; 1878 i4_cost[DIAG_DR_I4x4] = 0; 1879 pu1_src_temp = pu1_src; 1880 pu1_pred_val = u1_pred_vals_diag_121 + 3; 1881 1882 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]); 1883 pu1_src_temp += src_strd; 1884 USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]); 1885 pu1_src_temp += src_strd; 1886 USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]); 1887 pu1_src_temp += src_strd; 1888 USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]); 1889 pu1_src_temp += src_strd; 1890 i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ? 1891 u4_lambda : 4 * u4_lambda); 1892 1893 } 1894 1895 if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/ 1896 { 1897 i4_sad[VERT_R_I4x4] = 0; 1898 1899 pu1_src_temp = pu1_src; 1900 u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2]; 1901 memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3); 1902 u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1]; 1903 memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3); 1904 1905 pu1_pred_val = u1_pred_vals_diag_11 + 4; 1906 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); 1907 pu1_pred_val = u1_pred_vals_diag_121 + 3; 1908 pu1_src_temp += src_strd; 1909 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); 1910 pu1_src_temp += src_strd; 1911 USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]); 1912 pu1_src_temp += src_strd; 1913 USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), 1914 i4_sad[VERT_R_I4x4]); 1915 1916 i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ? 1917 u4_lambda : 4 * u4_lambda); 1918 } 1919 1920 if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/ 1921 { 1922 i4_sad[HORZ_D_I4x4] = 0; 1923 1924 pu1_src_temp = pu1_src; 1925 u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3]; 1926 memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3); 1927 u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0]; 1928 u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0]; 1929 u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1]; 1930 u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1]; 1931 u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2]; 1932 u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2]; 1933 1934 pu1_pred_val = u1_pred_vals_horz_d; 1935 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]); 1936 pu1_src_temp += src_strd; 1937 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]); 1938 pu1_src_temp += src_strd; 1939 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]); 1940 pu1_src_temp += src_strd; 1941 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]); 1942 1943 i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ? 1944 u4_lambda : 4 * u4_lambda); 1945 } 1946 1947 if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/ 1948 { 1949 i4_sad[VERT_L_I4x4] = 0; 1950 pu1_src_temp = pu1_src; 1951 pu1_pred_val = u1_pred_vals_diag_11 + 5; 1952 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); 1953 pu1_src_temp += src_strd; 1954 pu1_pred_val = u1_pred_vals_diag_121 + 5; 1955 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); 1956 pu1_src_temp += src_strd; 1957 pu1_pred_val = u1_pred_vals_diag_11 + 6; 1958 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); 1959 pu1_src_temp += src_strd; 1960 pu1_pred_val = u1_pred_vals_diag_121 + 6; 1961 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); 1962 1963 i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ? 1964 u4_lambda : 4 * u4_lambda); 1965 } 1966 1967 if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/ 1968 { 1969 i4_sad[HORZ_U_I4x4] = 0; 1970 pu1_src_temp = pu1_src; 1971 u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2]; 1972 u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1]; 1973 u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1]; 1974 u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0]; 1975 u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0]; 1976 u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]); 1977 1978 memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4); 1979 1980 pu1_pred_val = u1_pred_vals_horz_u; 1981 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]); 1982 pu1_src_temp += src_strd; 1983 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]); 1984 pu1_src_temp += src_strd; 1985 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]); 1986 pu1_src_temp += src_strd; 1987 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]); 1988 1989 i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ? 1990 u4_lambda : 4 * u4_lambda); 1991 } 1992 1993 i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), 1994 MIN3(i4_cost[3], i4_cost[4], i4_cost[5]), 1995 MIN3(i4_cost[6], i4_cost[7], i4_cost[8])); 1996 1997 } 1998 else 1999 { 2000 /* Only first three modes valid */ 2001 i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]); 2002 } 2003 2004 *pu4_sadmin = i4_min_cost; 2005 2006 if (i4_min_cost == i4_cost[0]) 2007 { 2008 *u4_intra_mode = VERT_I4x4; 2009 pu1_pred_val = pu1_ngbr_pels + 5; 2010 memcpy(pu1_dst, (pu1_pred_val), 4); 2011 pu1_dst += dst_strd; 2012 memcpy(pu1_dst, (pu1_pred_val), 4); 2013 pu1_dst += dst_strd; 2014 memcpy(pu1_dst, (pu1_pred_val), 4); 2015 pu1_dst += dst_strd; 2016 memcpy(pu1_dst, (pu1_pred_val), 4); 2017 } 2018 else if (i4_min_cost == i4_cost[1]) 2019 { 2020 *u4_intra_mode = HORZ_I4x4; 2021 memset(pu1_dst, pu1_ngbr_pels[3], 4); 2022 pu1_dst += dst_strd; 2023 memset(pu1_dst, pu1_ngbr_pels[2], 4); 2024 pu1_dst += dst_strd; 2025 memset(pu1_dst, pu1_ngbr_pels[1], 4); 2026 pu1_dst += dst_strd; 2027 memset(pu1_dst, pu1_ngbr_pels[0], 4); 2028 } 2029 else if (i4_min_cost == i4_cost[2]) 2030 { 2031 *u4_intra_mode = DC_I4x4; 2032 memset(pu1_dst, u4_dcval, 4); 2033 pu1_dst += dst_strd; 2034 memset(pu1_dst, u4_dcval, 4); 2035 pu1_dst += dst_strd; 2036 memset(pu1_dst, u4_dcval, 4); 2037 pu1_dst += dst_strd; 2038 memset(pu1_dst, u4_dcval, 4); 2039 } 2040 2041 else if (i4_min_cost == i4_cost[3]) 2042 { 2043 *u4_intra_mode = DIAG_DL_I4x4; 2044 pu1_pred_val = u1_pred_vals_diag_121 + 5; 2045 memcpy(pu1_dst, (pu1_pred_val), 4); 2046 pu1_dst += dst_strd; 2047 memcpy(pu1_dst, (pu1_pred_val + 1), 4); 2048 pu1_dst += dst_strd; 2049 memcpy(pu1_dst, (pu1_pred_val + 2), 4); 2050 pu1_dst += dst_strd; 2051 memcpy(pu1_dst, (pu1_pred_val + 3), 4); 2052 } 2053 else if (i4_min_cost == i4_cost[4]) 2054 { 2055 *u4_intra_mode = DIAG_DR_I4x4; 2056 pu1_pred_val = u1_pred_vals_diag_121 + 3; 2057 2058 memcpy(pu1_dst, (pu1_pred_val), 4); 2059 pu1_dst += dst_strd; 2060 memcpy(pu1_dst, (pu1_pred_val - 1), 4); 2061 pu1_dst += dst_strd; 2062 memcpy(pu1_dst, (pu1_pred_val - 2), 4); 2063 pu1_dst += dst_strd; 2064 memcpy(pu1_dst, (pu1_pred_val - 3), 4); 2065 } 2066 2067 else if (i4_min_cost == i4_cost[5]) 2068 { 2069 *u4_intra_mode = VERT_R_I4x4; 2070 pu1_pred_val = u1_pred_vals_diag_11 + 4; 2071 memcpy(pu1_dst, (pu1_pred_val), 4); 2072 pu1_dst += dst_strd; 2073 pu1_pred_val = u1_pred_vals_diag_121 + 3; 2074 memcpy(pu1_dst, (pu1_pred_val), 4); 2075 pu1_dst += dst_strd; 2076 memcpy(pu1_dst, (u1_pred_vals_vert_r), 4); 2077 pu1_dst += dst_strd; 2078 memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4); 2079 } 2080 else if (i4_min_cost == i4_cost[6]) 2081 { 2082 *u4_intra_mode = HORZ_D_I4x4; 2083 pu1_pred_val = u1_pred_vals_horz_d; 2084 memcpy(pu1_dst, (pu1_pred_val + 6), 4); 2085 pu1_dst += dst_strd; 2086 memcpy(pu1_dst, (pu1_pred_val + 4), 4); 2087 pu1_dst += dst_strd; 2088 memcpy(pu1_dst, (pu1_pred_val + 2), 4); 2089 pu1_dst += dst_strd; 2090 memcpy(pu1_dst, (pu1_pred_val), 4); 2091 pu1_dst += dst_strd; 2092 } 2093 else if (i4_min_cost == i4_cost[7]) 2094 { 2095 *u4_intra_mode = VERT_L_I4x4; 2096 pu1_pred_val = u1_pred_vals_diag_11 + 5; 2097 memcpy(pu1_dst, (pu1_pred_val), 4); 2098 pu1_dst += dst_strd; 2099 pu1_pred_val = u1_pred_vals_diag_121 + 5; 2100 memcpy(pu1_dst, (pu1_pred_val), 4); 2101 pu1_dst += dst_strd; 2102 pu1_pred_val = u1_pred_vals_diag_11 + 6; 2103 memcpy(pu1_dst, (pu1_pred_val), 4); 2104 pu1_dst += dst_strd; 2105 pu1_pred_val = u1_pred_vals_diag_121 + 6; 2106 memcpy(pu1_dst, (pu1_pred_val), 4); 2107 } 2108 else if (i4_min_cost == i4_cost[8]) 2109 { 2110 *u4_intra_mode = HORZ_U_I4x4; 2111 pu1_pred_val = u1_pred_vals_horz_u; 2112 memcpy(pu1_dst, (pu1_pred_val), 4); 2113 pu1_dst += dst_strd; 2114 memcpy(pu1_dst, (pu1_pred_val + 2), 4); 2115 pu1_dst += dst_strd; 2116 memcpy(pu1_dst, (pu1_pred_val + 4), 4); 2117 pu1_dst += dst_strd; 2118 memcpy(pu1_dst, (pu1_pred_val + 6), 4); 2119 pu1_dst += dst_strd; 2120 } 2121 2122 return; 2123 } 2124 2125 /** 2126 ****************************************************************************** 2127 * 2128 * @brief: 2129 * Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction. 2130 * 2131 * @par Description 2132 * This function evaluates first three intra chroma modes and compute corresponding sad 2133 * and return the buffer predicted with best mode. 2134 * 2135 * @param[in] pu1_src 2136 * UWORD8 pointer to the source 2137 * 2138 * @param[in] pu1_ngbr_pels 2139 * UWORD8 pointer to neighbouring pels 2140 * 2141 * @param[out] pu1_dst 2142 * UWORD8 pointer to the destination 2143 * 2144 * @param[in] src_strd 2145 * integer source stride 2146 * 2147 * @param[in] dst_strd 2148 * integer destination stride 2149 * 2150 * @param[in] u4_n_avblty 2151 * availability of neighbouring pixels 2152 * 2153 * @param[in] u4_intra_mode 2154 * Pointer to the variable in which best mode is returned 2155 * 2156 * @param[in] pu4_sadmin 2157 * Pointer to the variable in which minimum sad is returned 2158 * 2159 * @param[in] u4_valid_intra_modes 2160 * Says what all modes are valid 2161 * 2162 * @return none 2163 * 2164 ****************************************************************************** 2165 */ 2166 void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src, 2167 UWORD8 *pu1_ngbr_pels, 2168 UWORD8 *pu1_dst, 2169 UWORD32 src_strd, 2170 UWORD32 dst_strd, 2171 WORD32 u4_n_avblty, 2172 UWORD32 *u4_intra_mode, 2173 WORD32 *pu4_sadmin, 2174 UWORD32 u4_valid_intra_modes) 2175 { 2176 UWORD8 *pu1_neighbour; 2177 UWORD8 *pu1_src_temp = pu1_src; 2178 UWORD8 left = 0, top = 0; 2179 WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */ 2180 u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/ 2181 2182 WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/ 2183 u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/ 2184 2185 WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, 2186 i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX; 2187 UWORD8 val_u, val_v; 2188 2189 WORD32 u4_dc_val[2][2][2];/* ----------- 2190 | | | Chroma can have four 2191 | 00 | 01 | separate dc value... 2192 ----------- u4_dc_val corresponds to this dc values 2193 | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V] 2194 | 10 | 11 | 2195 ----------- */ 2196 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); 2197 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; 2198 2199 /*Evaluating HORZ*/ 2200 if (left)/* Ifleft available*/ 2201 { 2202 i4_sad_horz = 0; 2203 2204 for (i = 0; i < 8; i++) 2205 { 2206 val_v = pu1_ngbr_pels[15 - 2 * i]; 2207 val_u = pu1_ngbr_pels[15 - 2 * i - 1]; 2208 row = i / 4; 2209 u4_dcval_u_l[row] += val_u; 2210 u4_dcval_v_l[row] += val_v; 2211 for (j = 0; j < 8; j++) 2212 { 2213 i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/ 2214 i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]); 2215 } 2216 2217 pu1_src_temp += src_strd; 2218 } 2219 u4_dcval_u_l[0] += 2; 2220 u4_dcval_u_l[1] += 2; 2221 u4_dcval_v_l[0] += 2; 2222 u4_dcval_v_l[1] += 2; 2223 } 2224 2225 /*Evaluating VERT**/ 2226 pu1_src_temp = pu1_src; 2227 if (top) /* top available*/ 2228 { 2229 i4_sad_vert = 0; 2230 2231 for (i = 0; i < 8; i++) 2232 { 2233 col = i / 4; 2234 2235 val_u = pu1_ngbr_pels[18 + i * 2]; 2236 val_v = pu1_ngbr_pels[18 + i * 2 + 1]; 2237 u4_dcval_u_t[col] += val_u; 2238 u4_dcval_v_t[col] += val_v; 2239 2240 for (j = 0; j < 16; j++) 2241 { 2242 i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/ 2243 } 2244 pu1_src_temp += src_strd; 2245 2246 } 2247 u4_dcval_u_t[0] += 2; 2248 u4_dcval_u_t[1] += 2; 2249 u4_dcval_v_t[0] += 2; 2250 u4_dcval_v_t[1] += 2; 2251 } 2252 2253 /* computing DC value*/ 2254 /* Equation 8-128 in spec*/ 2255 u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top); 2256 u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top); 2257 u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top); 2258 u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top); 2259 2260 if (top) 2261 { 2262 /* Equation 8-132 in spec*/ 2263 u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top); 2264 u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top); 2265 } 2266 else 2267 { 2268 u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left); 2269 u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left); 2270 } 2271 2272 if (left) 2273 { 2274 u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left); 2275 u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left); 2276 } 2277 else 2278 { 2279 u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top); 2280 u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top); 2281 } 2282 2283 if (!(left || top)) 2284 { 2285 /*none available*/ 2286 u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = 2287 u4_dc_val[0][1][0] = u4_dc_val[0][1][1] = 2288 u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = 2289 u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128; 2290 } 2291 2292 /* Evaluating DC */ 2293 pu1_src_temp = pu1_src; 2294 i4_sad_dc = 0; 2295 for (i = 0; i < 8; i++) 2296 { 2297 for (j = 0; j < 8; j++) 2298 { 2299 col = j / 4; 2300 row = i / 4; 2301 val_u = u4_dc_val[row][col][0]; 2302 val_v = u4_dc_val[row][col][1]; 2303 2304 i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/ 2305 i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]); 2306 } 2307 pu1_src_temp += src_strd; 2308 } 2309 2310 if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/ 2311 i4_sad_dc = INT_MAX; 2312 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/ 2313 i4_sad_horz = INT_MAX; 2314 if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/ 2315 i4_sad_vert = INT_MAX; 2316 2317 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); 2318 2319 /* Finding Minimum sad and doing corresponding prediction*/ 2320 if (i4_min_sad < *pu4_sadmin) 2321 { 2322 *pu4_sadmin = i4_min_sad; 2323 2324 if (i4_min_sad == i4_sad_dc) 2325 { 2326 *u4_intra_mode = DC_CH_I8x8; 2327 for (i = 0; i < 8; i++) 2328 { 2329 for (j = 0; j < 8; j++) 2330 { 2331 col = j / 4; 2332 row = i / 4; 2333 2334 pu1_dst[2 * j] = u4_dc_val[row][col][0]; 2335 pu1_dst[2 * j + 1] = u4_dc_val[row][col][1]; 2336 } 2337 pu1_dst += dst_strd; 2338 } 2339 } 2340 else if (i4_min_sad == i4_sad_horz) 2341 { 2342 *u4_intra_mode = HORZ_CH_I8x8; 2343 for (j = 0; j < 8; j++) 2344 { 2345 val_v = pu1_ngbr_pels[15 - 2 * j]; 2346 val_u = pu1_ngbr_pels[15 - 2 * j - 1]; 2347 2348 for (i = 0; i < 8; i++) 2349 { 2350 pu1_dst[2 * i] = val_u; 2351 pu1_dst[2 * i + 1] = val_v; 2352 2353 } 2354 pu1_dst += dst_strd; 2355 } 2356 } 2357 else 2358 { 2359 *u4_intra_mode = VERT_CH_I8x8; 2360 pu1_neighbour = pu1_ngbr_pels + 18; 2361 for (j = 0; j < 8; j++) 2362 { 2363 memcpy(pu1_dst, pu1_neighbour, MB_SIZE); 2364 pu1_dst += dst_strd; 2365 } 2366 } 2367 } 2368 2369 return; 2370 } 2371