1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 21 /** 22 ******************************************************************************* 23 * @file 24 * ih264e_intra_modes_eval.c 25 * 26 * @brief 27 * This file contains definitions of routines that perform rate distortion 28 * analysis on a macroblock if they are to be coded as intra. 29 * 30 * @author 31 * ittiam 32 * 33 * @par List of Functions: 34 * - ih264e_derive_neighbor_availability_of_mbs() 35 * - ih264e_derive_ngbr_avbl_of_mb_partitions() 36 * - ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff() 37 * - ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff() 38 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff() 39 * - ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton() 40 * - ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff() 41 * - ih264e_evaluate_intra16x16_modes() 42 * - ih264e_evaluate_intra4x4_modes() 43 * - ih264e_evaluate_intra_chroma_modes() 44 * 45 * @remarks 46 * None 47 * 48 ******************************************************************************* 49 */ 50 51 /*****************************************************************************/ 52 /* File Includes */ 53 /*****************************************************************************/ 54 55 /* System include files */ 56 #include <stdio.h> 57 #include <string.h> 58 #include <limits.h> 59 #include <assert.h> 60 61 /* User include files */ 62 #include "ih264e_config.h" 63 #include "ih264_typedefs.h" 64 #include "ih264e_defs.h" 65 #include "iv2.h" 66 #include "ive2.h" 67 #include "ih264_debug.h" 68 #include "ih264_defs.h" 69 #include "ih264_macros.h" 70 #include "ih264_intra_pred_filters.h" 71 #include "ih264_structs.h" 72 #include "ih264_common_tables.h" 73 #include "ih264_trans_quant_itrans_iquant.h" 74 #include "ih264_inter_pred_filters.h" 75 #include "ih264_mem_fns.h" 76 #include "ih264_padding.h" 77 #include "ih264_deblk_edge_filters.h" 78 #include "ih264_cabac_tables.h" 79 #include "ime_distortion_metrics.h" 80 #include "ih264e_error.h" 81 #include "ih264e_bitstream.h" 82 #include "ime_defs.h" 83 #include "ime_structs.h" 84 #include "irc_cntrl_param.h" 85 #include "irc_frame_info_collector.h" 86 #include "ih264e_rate_control.h" 87 #include "ih264e_cabac_structs.h" 88 #include "ih264e_structs.h" 89 #include "ih264e_intra_modes_eval.h" 90 #include "ih264e_globals.h" 91 #include "ime_platform_macros.h" 92 93 94 /*****************************************************************************/ 95 /* Function Definitions */ 96 /*****************************************************************************/ 97 98 /** 99 ****************************************************************************** 100 * 101 * @brief 102 * derivation process for macroblock availability 103 * 104 * @par Description 105 * Calculates the availability of the left, top, topright and topleft macroblocks. 106 * 107 * @param[in] ps_proc_ctxt 108 * pointer to proc context (handle) 109 * 110 * @remarks Based on section 6.4.5 in H264 spec 111 * 112 * @return none 113 * 114 ****************************************************************************** 115 */ 116 void ih264e_derive_nghbr_avbl_of_mbs(process_ctxt_t *ps_proc) 117 { 118 UWORD8 *pu1_slice_idx_curr = ps_proc->pu1_slice_idx; 119 UWORD8 *pu1_slice_idx_b; 120 UWORD8 *pu1_slice_idx_a; 121 UWORD8 *pu1_slice_idx_c; 122 UWORD8 *pu1_slice_idx_d; 123 block_neighbors_t *ps_ngbr_avbl; 124 WORD32 i4_mb_x, i4_mb_y; 125 WORD32 i4_wd_mbs; 126 127 i4_mb_x = ps_proc->i4_mb_x; 128 i4_mb_y = ps_proc->i4_mb_y; 129 130 i4_wd_mbs = ps_proc->i4_wd_mbs; 131 132 pu1_slice_idx_curr += (i4_mb_y * i4_wd_mbs) + i4_mb_x; 133 pu1_slice_idx_a = pu1_slice_idx_curr - 1; 134 pu1_slice_idx_b = pu1_slice_idx_curr - i4_wd_mbs; 135 pu1_slice_idx_c = pu1_slice_idx_b + 1; 136 pu1_slice_idx_d = pu1_slice_idx_b - 1; 137 ps_ngbr_avbl = ps_proc->ps_ngbr_avbl; 138 139 /**********************************************************************/ 140 /* The macroblock is marked as available, unless one of the following */ 141 /* conditions is true in which case the macroblock shall be marked as */ 142 /* not available. */ 143 /* 1. mbAddr < 0 */ 144 /* 2 mbAddr > CurrMbAddr */ 145 /* 3. the macroblock with address mbAddr belongs to a different slice */ 146 /* than the macroblock with address CurrMbAddr */ 147 /**********************************************************************/ 148 149 /* left macroblock availability */ 150 if (i4_mb_x == 0) 151 { /* macroblocks along first column */ 152 ps_ngbr_avbl->u1_mb_a = 0; 153 } 154 else 155 { /* macroblocks belong to same slice? */ 156 if (*pu1_slice_idx_a != *pu1_slice_idx_curr) 157 ps_ngbr_avbl->u1_mb_a = 0; 158 else 159 ps_ngbr_avbl->u1_mb_a = 1; 160 } 161 162 /* top macroblock availability */ 163 if (i4_mb_y == 0) 164 { /* macroblocks along first row */ 165 ps_ngbr_avbl->u1_mb_b = 0; 166 } 167 else 168 { /* macroblocks belong to same slice? */ 169 if (*pu1_slice_idx_b != *pu1_slice_idx_curr) 170 ps_ngbr_avbl->u1_mb_b = 0; 171 else 172 ps_ngbr_avbl->u1_mb_b = 1; 173 } 174 175 /* top right macroblock availability */ 176 if (i4_mb_x == i4_wd_mbs-1 || i4_mb_y == 0) 177 { /* macroblocks along last column */ 178 ps_ngbr_avbl->u1_mb_c = 0; 179 } 180 else 181 { /* macroblocks belong to same slice? */ 182 if (*pu1_slice_idx_c != *pu1_slice_idx_curr) 183 ps_ngbr_avbl->u1_mb_c = 0; 184 else 185 ps_ngbr_avbl->u1_mb_c = 1; 186 } 187 188 /* top left macroblock availability */ 189 if (i4_mb_x == 0 || i4_mb_y == 0) 190 { /* macroblocks along first column */ 191 ps_ngbr_avbl->u1_mb_d = 0; 192 } 193 else 194 { /* macroblocks belong to same slice? */ 195 if (*pu1_slice_idx_d != *pu1_slice_idx_curr) 196 ps_ngbr_avbl->u1_mb_d = 0; 197 else 198 ps_ngbr_avbl->u1_mb_d = 1; 199 } 200 } 201 202 /** 203 ****************************************************************************** 204 * 205 * @brief 206 * derivation process for subblock/partition availability 207 * 208 * @par Description 209 * Calculates the availability of the left, top, topright and topleft subblock 210 * or partitions. 211 * 212 * @param[in] ps_proc_ctxt 213 * pointer to macroblock context (handle) 214 * 215 * @param[in] i1_pel_pos_x 216 * column position of the pel wrt the current block 217 * 218 * @param[in] i1_pel_pos_y 219 * row position of the pel in wrt current block 220 * 221 * @remarks Assumptions: before calling this function it is assumed that 222 * the neighbor availability of the current macroblock is already derived. 223 * Based on table 6-3 of H264 specification 224 * 225 * @return availability status (yes or no) 226 * 227 ****************************************************************************** 228 */ 229 UWORD8 ih264e_derive_ngbr_avbl_of_mb_partitions(block_neighbors_t *ps_ngbr_avbl, 230 WORD8 i1_pel_pos_x, 231 WORD8 i1_pel_pos_y) 232 { 233 UWORD8 u1_neighbor_avail=0; 234 235 /**********************************************************************/ 236 /* values of i1_pel_pos_x in the range 0-15 inclusive correspond to */ 237 /* various columns of a macroblock */ 238 /* */ 239 /* values of i1_pel_pos_y in the range 0-15 inclusive correspond to */ 240 /* various rows of a macroblock */ 241 /* */ 242 /* other values of i1_pel_pos_x & i1_pel_pos_y represents elements */ 243 /* outside the bound of an mb ie., represents its neighbors. */ 244 /**********************************************************************/ 245 if (i1_pel_pos_x < 0) 246 { /* column(-1) */ 247 if (i1_pel_pos_y < 0) 248 { /* row(-1) */ 249 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_d; /* current mb topleft availability */ 250 } 251 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) 252 { /* all rows of a macroblock */ 253 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_a; /* current mb left availability */ 254 } 255 else /* if (i1_pel_pos_y >= 16) */ 256 { /* rows(+16) */ 257 u1_neighbor_avail = 0; /* current mb bottom left availability */ 258 } 259 } 260 else if (i1_pel_pos_x >= 0 && i1_pel_pos_x < 16) 261 { /* all columns of a macroblock */ 262 if (i1_pel_pos_y < 0) 263 { /* row(-1) */ 264 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_b; /* current mb top availability */ 265 } 266 else if (i1_pel_pos_y >= 0 && i1_pel_pos_y < 16) 267 { /* all rows of a macroblock */ 268 u1_neighbor_avail = 1; /* current mb availability */ 269 /* availability of the partition is dependent on the position of the partition inside the mb */ 270 /* although the availability is declared as 1 in all cases these needs to be corrected somewhere else and this is not done in here */ 271 } 272 else /* if (i1_pel_pos_y >= 16) */ 273 { /* rows(+16) */ 274 u1_neighbor_avail = 0; /* current mb bottom availability */ 275 } 276 } 277 else if (i1_pel_pos_x >= 16) 278 { /* column(+16) */ 279 if (i1_pel_pos_y < 0) 280 { /* row(-1) */ 281 u1_neighbor_avail = ps_ngbr_avbl->u1_mb_c; /* current mb top right availability */ 282 } 283 else /* if (i1_pel_pos_y >= 0) */ 284 { /* all other rows */ 285 u1_neighbor_avail = 0; /* current mb right & bottom right availability */ 286 } 287 } 288 289 return u1_neighbor_avail; 290 } 291 292 /** 293 ****************************************************************************** 294 * 295 * @brief 296 * evaluate best intra 16x16 mode (rate distortion opt off) 297 * 298 * @par Description 299 * This function evaluates all the possible intra 16x16 modes and finds the mode 300 * that best represents the macro-block (least distortion) and occupies fewer 301 * bits in the bit-stream. 302 * 303 * @param[in] ps_proc_ctxt 304 * pointer to process context (handle) 305 * 306 * @remarks 307 * Ideally the cost of encoding a macroblock is calculated as 308 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the 309 * input block and the reconstructed block and rate is the number of bits taken 310 * to place the macroblock in the bit-stream. In this routine the rate does not 311 * exactly point to the total number of bits it takes, rather it points to header 312 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits 313 * and residual bits fall in to texture bits the number of bits taken to encoding 314 * mbtype is considered as rate, we compute cost. Further we will approximate 315 * the distortion as the deviation b/w input and the predicted block as opposed 316 * to input and reconstructed block. 317 * 318 * NOTE: As per the Document JVT-O079, for intra 16x16 macroblock, 319 * the SAD and cost are one and the same. 320 * 321 * @return none 322 * 323 ****************************************************************************** 324 */ 325 326 void ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) 327 { 328 /* Codec Context */ 329 codec_t *ps_codec = ps_proc->ps_codec; 330 331 /* SAD(distortion metric) of an 8x8 block */ 332 WORD32 i4_mb_distortion = INT_MAX, i4_mb_distortion_least = INT_MAX; 333 334 /* lambda */ 335 UWORD32 u4_lambda = ps_proc->u4_lambda; 336 337 /* cost = distortion + lambda*rate */ 338 WORD32 i4_mb_cost= INT_MAX, i4_mb_cost_least = INT_MAX; 339 340 /* intra mode */ 341 UWORD32 u4_intra_mode, u4_best_intra_16x16_mode = DC_I16x16; 342 343 /* neighbor pels for intra prediction */ 344 UWORD8 *pu1_ngbr_pels_i16 = ps_proc->au1_ngbr_pels; 345 346 /* neighbor availability */ 347 WORD32 i4_ngbr_avbl; 348 349 /* pointer to src macro block */ 350 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_luma; 351 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_luma; 352 353 /* pointer to prediction macro block */ 354 UWORD8 *pu1_pred_mb_intra_16x16 = ps_proc->pu1_pred_mb_intra_16x16; 355 UWORD8 *pu1_pred_mb_intra_16x16_plane = ps_proc->pu1_pred_mb_intra_16x16_plane; 356 357 /* strides */ 358 WORD32 i4_src_strd = ps_proc->i4_src_strd; 359 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 360 WORD32 i4_rec_strd = ps_proc->i4_rec_strd; 361 362 /* pointer to neighbors left, top, topleft */ 363 UWORD8 *pu1_mb_a = pu1_ref_mb - 1; 364 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd; 365 UWORD8 *pu1_mb_d = pu1_mb_b - 1; 366 367 /* valid intra modes map */ 368 UWORD32 u4_valid_intra_modes; 369 370 /* lut for valid intra modes */ 371 const UWORD8 u1_valid_intra_modes[8] = {4, 6, 12, 14, 5, 7, 13, 15}; 372 373 /* temp var */ 374 UWORD32 i, u4_enable_fast_sad = 0, offset = 0; 375 376 /* init temp var */ 377 if (ps_proc->i4_slice_type != ISLICE) 378 { 379 /* Offset for MBtype */ 380 offset = (ps_proc->i4_slice_type == PSLICE) ? 5 : 23; 381 u4_enable_fast_sad = ps_proc->s_me_ctxt.u4_enable_fast_sad; 382 } 383 384 /* locating neighbors that are available for prediction */ 385 /* TODO : update the neighbor availability information basing on constrained intra pred information */ 386 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines 387 * basing on neighbors available and hence evade the computation of neighbor availability totally. */ 388 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */ 389 i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1); 390 ps_proc->i4_ngbr_avbl_16x16_mb = i4_ngbr_avbl; 391 392 /* gather prediction pels from the neighbors, if particular set is not available 393 * it is set to zero*/ 394 /* left pels */ 395 if (ps_proc->ps_ngbr_avbl->u1_mb_a) 396 { 397 for(i = 0; i < 16; i++) 398 pu1_ngbr_pels_i16[16-1-i] = pu1_mb_a[i * i4_rec_strd]; 399 } 400 else 401 { 402 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16,0,MB_SIZE); 403 } 404 /* top pels */ 405 if (ps_proc->ps_ngbr_avbl->u1_mb_b) 406 { 407 ps_codec->pf_mem_cpy_mul8(pu1_ngbr_pels_i16+16+1,pu1_mb_b,16); 408 /*for(i = 0; i < 16; i++) 409 pu1_ngbr_pels_i16[16+1+i] = pu1_mb_b[i];*/ 410 } 411 else 412 { 413 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_i16+16+1,0,MB_SIZE); 414 } 415 /* topleft pels */ 416 if (ps_proc->ps_ngbr_avbl->u1_mb_d) 417 pu1_ngbr_pels_i16[16] = *pu1_mb_d; 418 else 419 pu1_ngbr_pels_i16[16] = 0; 420 421 /* set valid intra modes for evaluation */ 422 // u4_valid_intra_modes = 15; 423 //// ih264e_filter_intra16x16modes(pu1_mb_curr, i4_src_strd, &u4_valid_intra_modes); 424 // if (!ps_proc->ps_ngbr_avbl->u1_mb_a) 425 // u4_valid_intra_modes &= ~(1 << HORZ_I16x16); 426 // if (!ps_proc->ps_ngbr_avbl->u1_mb_b) 427 // u4_valid_intra_modes &= ~(1 << VERT_I16x16); 428 //// if (!ps_proc->ps_ngbr_avbl->u1_mb_a || !ps_proc->ps_ngbr_avbl->u1_mb_b || !ps_proc->ps_ngbr_avbl->u1_mb_d) 429 // if (i4_ngbr_avbl != 7) 430 // u4_valid_intra_modes &= ~(1 << PLANE_I16x16); 431 432 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; 433 434 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST) 435 u4_valid_intra_modes &= ~(1 << PLANE_I16x16); 436 437 /* evaluate b/w HORZ_I16x16, VERT_I16x16 & DC_I16x16 */ 438 ps_codec->pf_ih264e_evaluate_intra16x16_modes(pu1_curr_mb, pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16, 439 i4_src_strd, i4_pred_strd, 440 i4_ngbr_avbl, &u4_intra_mode, &i4_mb_distortion_least, 441 u4_valid_intra_modes); 442 443 /* cost = distortion + lambda*rate */ 444 i4_mb_cost_least = i4_mb_distortion_least; 445 446 if (( (u4_valid_intra_modes >> 3) & 1) != 0 && (ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST || 447 ps_proc->i4_slice_type == ISLICE)) 448 { 449 /* intra prediction for PLANE mode*/ 450 (ps_codec->apf_intra_pred_16_l)[PLANE_I16x16](pu1_ngbr_pels_i16, pu1_pred_mb_intra_16x16_plane, 0, i4_pred_strd, i4_ngbr_avbl); 451 452 /* evaluate distortion between the actual blk and the estimated blk for the given mode */ 453 ps_codec->apf_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_pred_mb_intra_16x16_plane, i4_src_strd, i4_pred_strd, i4_mb_cost_least, &i4_mb_distortion); 454 455 /* cost = distortion + lambda*rate */ 456 i4_mb_cost = i4_mb_distortion; 457 458 /* update the least cost information if necessary */ 459 if(i4_mb_cost < i4_mb_distortion_least) 460 { 461 u4_intra_mode = PLANE_I16x16; 462 463 i4_mb_cost_least = i4_mb_cost; 464 i4_mb_distortion_least = i4_mb_distortion; 465 } 466 } 467 468 u4_best_intra_16x16_mode = u4_intra_mode; 469 470 DEBUG("%d partition cost, %d intra mode\n", i4_mb_cost_least * 32, u4_best_intra_16x16_mode); 471 472 ps_proc->u1_l_i16_mode = u4_best_intra_16x16_mode; 473 474 /* cost = distortion + lambda*rate */ 475 i4_mb_cost_least = i4_mb_distortion_least + u4_lambda*u1_uev_codelength[offset + u4_best_intra_16x16_mode]; 476 477 478 /* update the type of the mb if necessary */ 479 if (i4_mb_cost_least < ps_proc->i4_mb_cost) 480 { 481 ps_proc->i4_mb_cost = i4_mb_cost_least; 482 ps_proc->i4_mb_distortion = i4_mb_distortion_least; 483 ps_proc->u4_mb_type = I16x16; 484 } 485 486 return ; 487 } 488 489 490 /** 491 ****************************************************************************** 492 * 493 * @brief 494 * evaluate best intra 8x8 mode (rate distortion opt on) 495 * 496 * @par Description 497 * This function evaluates all the possible intra 8x8 modes and finds the mode 498 * that best represents the macro-block (least distortion) and occupies fewer 499 * bits in the bit-stream. 500 * 501 * @param[in] ps_proc_ctxt 502 * pointer to proc ctxt 503 * 504 * @remarks Ideally the cost of encoding a macroblock is calculated as 505 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the 506 * input block and the reconstructed block and rate is the number of bits taken 507 * to place the macroblock in the bit-stream. In this routine the rate does not 508 * exactly point to the total number of bits it takes, rather it points to header 509 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits 510 * and residual bits fall in to texture bits the number of bits taken to encoding 511 * mbtype is considered as rate, we compute cost. Further we will approximate 512 * the distortion as the deviation b/w input and the predicted block as opposed 513 * to input and reconstructed block. 514 * 515 * NOTE: TODO: This function needs to be tested 516 * 517 * @return none 518 * 519 ****************************************************************************** 520 */ 521 void ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) 522 { 523 /* Codec Context */ 524 codec_t *ps_codec = ps_proc->ps_codec; 525 526 /* SAD(distortion metric) of an 4x4 block */ 527 WORD32 i4_partition_distortion, i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; 528 529 /* lambda */ 530 UWORD32 u4_lambda = ps_proc->u4_lambda; 531 532 /* cost = distortion + lambda*rate */ 533 WORD32 i4_partition_cost, i4_partition_cost_least, i4_total_cost = u4_lambda; 534 535 /* cost due to mbtype */ 536 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; 537 538 /* intra mode */ 539 UWORD32 u4_intra_mode, u4_best_intra_8x8_mode = DC_I8x8, u4_estimated_intra_8x8_mode; 540 541 /* neighbor pels for intra prediction */ 542 UWORD8 *pu1_ngbr_pels_i8 = ps_proc->au1_ngbr_pels; 543 544 /* pointer to curr partition */ 545 UWORD8 *pu1_mb_curr; 546 547 /* pointer to prediction macro block */ 548 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; 549 550 /* strides */ 551 WORD32 i4_src_strd = ps_proc->i4_src_strd; 552 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 553 554 /* neighbors left, top, top right, top left */ 555 UWORD8 *pu1_mb_a; 556 UWORD8 *pu1_mb_b; 557 UWORD8 *pu1_mb_d; 558 559 /* neighbor availability */ 560 WORD32 i4_ngbr_avbl; 561 block_neighbors_t s_ngbr_avbl; 562 563 /* temp vars */ 564 UWORD32 b8, u4_pix_x, u4_pix_y; 565 566 /* ngbr mb syntax information */ 567 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); 568 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 569 570 /* valid intra modes map */ 571 UWORD32 u4_valid_intra_modes; 572 573 for(b8 = 0; b8 < 4; b8++) 574 { 575 u4_pix_x = (b8 & 0x01) << 3; 576 u4_pix_y = (b8 >> 1) << 3; 577 578 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); 579 /* when rdopt is off, we use the input as reference for constructing prediction buffer */ 580 /* as opposed to using the recon pels. (open loop intra prediction) */ 581 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ 582 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ 583 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ 584 585 /* locating neighbors that are available for prediction */ 586 /* TODO : update the neighbor availability information basing on constrained intra pred information */ 587 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ 588 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ 589 s_ngbr_avbl.u1_mb_a = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y); /* xD = -1, yD = 0 */ 590 s_ngbr_avbl.u1_mb_b = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x, u4_pix_y - 1); /* xD = 0, yD = -1 */ 591 s_ngbr_avbl.u1_mb_c = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x + 8, u4_pix_y - 1); /* xD = BLK_8x8_SIZE, yD = -1 */ 592 s_ngbr_avbl.u1_mb_d = ih264e_derive_ngbr_avbl_of_mb_partitions(ps_proc->ps_ngbr_avbl, u4_pix_x - 1, u4_pix_y - 1); /* xD = -1, yD = -1 */ 593 594 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_c * TOP_RIGHT_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */ 595 i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + (s_ngbr_avbl.u1_mb_c << 3) + 596 (s_ngbr_avbl.u1_mb_a << 4); 597 /* if top partition is available and top right is not available for intra prediction, then */ 598 /* padd top right samples using top sample and make top right also available */ 599 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ 600 ps_proc->ai4_neighbor_avail_8x8_subblks[b8] = i4_ngbr_avbl; 601 602 603 ih264_intra_pred_luma_8x8_mode_ref_filtering(pu1_mb_a, pu1_mb_b, pu1_mb_d, pu1_ngbr_pels_i8, 604 i4_src_strd, i4_ngbr_avbl); 605 606 i4_partition_cost_least = INT_MAX; 607 /* set valid intra modes for evaluation */ 608 u4_valid_intra_modes = 0x1ff; 609 610 if (!s_ngbr_avbl.u1_mb_b) 611 { 612 u4_valid_intra_modes &= ~(1 << VERT_I4x4); 613 u4_valid_intra_modes &= ~(1 << DIAG_DL_I4x4); 614 u4_valid_intra_modes &= ~(1 << VERT_L_I4x4); 615 } 616 if (!s_ngbr_avbl.u1_mb_a) 617 { 618 u4_valid_intra_modes &= ~(1 << HORZ_I4x4); 619 u4_valid_intra_modes &= ~(1 << HORZ_U_I4x4); 620 } 621 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b || !s_ngbr_avbl.u1_mb_d) 622 { 623 u4_valid_intra_modes &= ~(1 << DIAG_DR_I4x4); 624 u4_valid_intra_modes &= ~(1 << VERT_R_I4x4); 625 u4_valid_intra_modes &= ~(1 << HORZ_D_I4x4); 626 } 627 628 /* estimate the intra 8x8 mode for the current partition (for evaluating cost) */ 629 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) 630 { 631 u4_estimated_intra_8x8_mode = DC_I8x8; 632 } 633 else 634 { 635 UWORD32 u4_left_intra_8x8_mode = DC_I8x8; 636 UWORD32 u4_top_intra_8x8_mode = DC_I8x8; 637 638 if (u4_pix_x == 0) 639 { 640 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) 641 { 642 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[b8+1]; 643 } 644 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) 645 { 646 u4_left_intra_8x8_mode = ps_proc->au1_left_mb_intra_modes[(b8+1)*4+2]; 647 } 648 } 649 else 650 { 651 u4_left_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-1]; 652 } 653 654 if (u4_pix_y == 0) 655 { 656 if (ps_top_mb_syn_ele->u2_mb_type == I8x8) 657 { 658 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[b8+2]; 659 } 660 else if (ps_top_mb_syn_ele->u2_mb_type == I4x4) 661 { 662 u4_top_intra_8x8_mode = pu1_top_mb_intra_modes[(b8+2)*4+2]; 663 } 664 } 665 else 666 { 667 u4_top_intra_8x8_mode = ps_proc->au1_intra_luma_mb_8x8_modes[b8-2]; 668 } 669 670 u4_estimated_intra_8x8_mode = MIN(u4_left_intra_8x8_mode, u4_top_intra_8x8_mode); 671 } 672 673 /* perform intra mode 8x8 evaluation */ 674 for (u4_intra_mode = VERT_I8x8; u4_valid_intra_modes != 0; u4_intra_mode++, u4_valid_intra_modes >>= 1) 675 { 676 if ( (u4_valid_intra_modes & 1) == 0) 677 continue; 678 679 /* intra prediction */ 680 (ps_codec->apf_intra_pred_8_l)[u4_intra_mode](pu1_ngbr_pels_i8, pu1_pred_mb, 0, i4_pred_strd, i4_ngbr_avbl); 681 682 /* evaluate distortion between the actual blk and the estimated blk for the given mode */ 683 ime_compute_sad_8x8(pu1_mb_curr, pu1_pred_mb, i4_src_strd, i4_pred_strd, i4_partition_cost_least, &i4_partition_distortion); 684 685 i4_partition_cost = i4_partition_distortion + ((u4_estimated_intra_8x8_mode == u4_intra_mode)?u4_cost_one_bit:u4_cost_four_bits); 686 687 /* update the least cost information if necessary */ 688 if (i4_partition_cost < i4_partition_cost_least) 689 { 690 i4_partition_cost_least = i4_partition_cost; 691 i4_partition_distortion_least = i4_partition_distortion; 692 u4_best_intra_8x8_mode = u4_intra_mode; 693 } 694 } 695 /* macroblock distortion */ 696 i4_total_cost += i4_partition_cost_least; 697 i4_total_distortion += i4_partition_distortion_least; 698 /* mb partition mode */ 699 ps_proc->au1_intra_luma_mb_8x8_modes[b8] = u4_best_intra_8x8_mode; 700 701 } 702 703 /* update the type of the mb if necessary */ 704 if (i4_total_cost < ps_proc->i4_mb_cost) 705 { 706 ps_proc->i4_mb_cost = i4_total_cost; 707 ps_proc->i4_mb_distortion = i4_total_distortion; 708 ps_proc->u4_mb_type = I8x8; 709 } 710 711 return ; 712 } 713 714 715 /** 716 ****************************************************************************** 717 * 718 * @brief 719 * evaluate best intra 4x4 mode (rate distortion opt off) 720 * 721 * @par Description 722 * This function evaluates all the possible intra 4x4 modes and finds the mode 723 * that best represents the macro-block (least distortion) and occupies fewer 724 * bits in the bit-stream. 725 * 726 * @param[in] ps_proc_ctxt 727 * pointer to proc ctxt 728 * 729 * @remarks 730 * Ideally the cost of encoding a macroblock is calculated as 731 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the 732 * input block and the reconstructed block and rate is the number of bits taken 733 * to place the macroblock in the bit-stream. In this routine the rate does not 734 * exactly point to the total number of bits it takes, rather it points to header 735 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits 736 * and residual bits fall in to texture bits the number of bits taken to encoding 737 * mbtype is considered as rate, we compute cost. Further we will approximate 738 * the distortion as the deviation b/w input and the predicted block as opposed 739 * to input and reconstructed block. 740 * 741 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, 742 * 24*lambda is added to the SAD before comparison with the best SAD for 743 * inter prediction. This is an empirical value to prevent using too many intra 744 * blocks. 745 * 746 * @return none 747 * 748 ****************************************************************************** 749 */ 750 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) 751 { 752 /* Codec Context */ 753 codec_t *ps_codec = ps_proc->ps_codec; 754 755 /* SAD(distortion metric) of an 4x4 block */ 756 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; 757 758 /* lambda */ 759 UWORD32 u4_lambda = ps_proc->u4_lambda; 760 761 /* cost = distortion + lambda*rate */ 762 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; 763 764 /* cost due to mbtype */ 765 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; 766 767 /* intra mode */ 768 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; 769 770 /* neighbor pels for intra prediction */ 771 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; 772 773 /* pointer to curr partition */ 774 UWORD8 *pu1_mb_curr; 775 776 /* pointer to prediction macro block */ 777 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; 778 779 /* strides */ 780 WORD32 i4_src_strd = ps_proc->i4_src_strd; 781 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 782 783 /* neighbors left, top, top right, top left */ 784 UWORD8 *pu1_mb_a; 785 UWORD8 *pu1_mb_b; 786 UWORD8 *pu1_mb_c; 787 UWORD8 *pu1_mb_d; 788 789 /* neighbor availability */ 790 WORD32 i4_ngbr_avbl; 791 block_neighbors_t s_ngbr_avbl; 792 793 /* temp vars */ 794 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; 795 796 /* scan order inside 4x4 block */ 797 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; 798 799 /* ngbr sub mb modes */ 800 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); 801 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 802 803 /* valid intra modes map */ 804 UWORD32 u4_valid_intra_modes; 805 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; 806 807 i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3); 808 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); 809 810 for (b8 = 0; b8 < 4; b8++) 811 { 812 u4_blk_x = (b8 & 0x01) << 3; 813 u4_blk_y = (b8 >> 1) << 3; 814 for (b4 = 0; b4 < 4; b4++) 815 { 816 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); 817 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); 818 819 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); 820 /* when rdopt is off, we use the input as reference for constructing prediction buffer */ 821 /* as opposed to using the recon pels. (open loop intra prediction) */ 822 pu1_mb_a = pu1_mb_curr - 1; /* pointer to left macro block */ 823 pu1_mb_b = pu1_mb_curr - i4_src_strd; /* pointer to top macro block */ 824 pu1_mb_c = pu1_mb_b + 4; /* pointer to top macro block */ 825 pu1_mb_d = pu1_mb_b - 1; /* pointer to top left macro block */ 826 827 /* locating neighbors that are available for prediction */ 828 /* TODO : update the neighbor availability information basing on constrained intra pred information */ 829 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ 830 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ 831 832 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; 833 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); 834 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; 835 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; 836 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; 837 /* set valid intra modes for evaluation */ 838 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; 839 840 /* if top partition is available and top right is not available for intra prediction, then */ 841 /* padd top right samples using top sample and make top right also available */ 842 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ 843 844 /* gather prediction pels from the neighbors */ 845 if (s_ngbr_avbl.u1_mb_a) 846 { 847 for(i = 0; i < 4; i++) 848 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_src_strd]; 849 } 850 else 851 { 852 memset(pu1_ngbr_pels_i4, 0, 4); 853 } 854 855 if (s_ngbr_avbl.u1_mb_b) 856 { 857 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); 858 } 859 else 860 { 861 memset(pu1_ngbr_pels_i4 + 5, 0, 4); 862 } 863 864 if (s_ngbr_avbl.u1_mb_d) 865 pu1_ngbr_pels_i4[4] = *pu1_mb_d; 866 else 867 pu1_ngbr_pels_i4[4] = 0; 868 869 if (s_ngbr_avbl.u1_mb_c) 870 { 871 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); 872 } 873 else if (s_ngbr_avbl.u1_mb_b) 874 { 875 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); 876 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; 877 } 878 879 i4_partition_cost_least = INT_MAX; 880 881 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */ 882 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) 883 { 884 u4_estimated_intra_4x4_mode = DC_I4x4; 885 } 886 else 887 { 888 UWORD32 u4_left_intra_4x4_mode = DC_I4x4; 889 UWORD32 u4_top_intra_4x4_mode = DC_I4x4; 890 891 if (u4_pix_x == 0) 892 { 893 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) 894 { 895 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]]; 896 } 897 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) 898 { 899 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1]; 900 } 901 } 902 else 903 { 904 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]]; 905 } 906 907 if (u4_pix_y == 0) 908 { 909 if (ps_top_mb_syn_ele->u2_mb_type == I4x4) 910 { 911 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]]; 912 } 913 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8) 914 { 915 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; 916 } 917 } 918 else 919 { 920 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]]; 921 } 922 923 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); 924 } 925 926 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode; 927 928 /* mode evaluation and prediction */ 929 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr, 930 pu1_ngbr_pels_i4, 931 pu1_pred_mb, i4_src_strd, 932 i4_pred_strd, i4_ngbr_avbl, 933 &u4_best_intra_4x4_mode, 934 &i4_partition_cost_least, 935 u4_valid_intra_modes, 936 u4_lambda, 937 u4_estimated_intra_4x4_mode); 938 939 940 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode) ? u4_cost_one_bit : u4_cost_four_bits); 941 942 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode); 943 /* macroblock distortion */ 944 i4_total_distortion += i4_partition_distortion_least; 945 i4_total_cost += i4_partition_cost_least; 946 /* mb partition mode */ 947 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; 948 } 949 } 950 951 /* update the type of the mb if necessary */ 952 if (i4_total_cost < ps_proc->i4_mb_cost) 953 { 954 ps_proc->i4_mb_cost = i4_total_cost; 955 ps_proc->i4_mb_distortion = i4_total_distortion; 956 ps_proc->u4_mb_type = I4x4; 957 } 958 959 return ; 960 } 961 962 /** 963 ****************************************************************************** 964 * 965 * @brief evaluate best intra 4x4 mode (rate distortion opt on) 966 * 967 * @par Description 968 * This function evaluates all the possible intra 4x4 modes and finds the mode 969 * that best represents the macro-block (least distortion) and occupies fewer 970 * bits in the bit-stream. 971 * 972 * @param[in] ps_proc_ctxt 973 * pointer to proc ctxt 974 * 975 * @remarks 976 * Ideally the cost of encoding a macroblock is calculated as 977 * (distortion + lambda*rate). Where distortion is SAD/SATD,... between the 978 * input block and the reconstructed block and rate is the number of bits taken 979 * to place the macroblock in the bit-stream. In this routine the rate does not 980 * exactly point to the total number of bits it takes, rather it points to header 981 * bits necessary for encoding the macroblock. Assuming the deltaQP, cbp bits 982 * and residual bits fall in to texture bits the number of bits taken to encoding 983 * mbtype is considered as rate, we compute cost. Further we will approximate 984 * the distortion as the deviation b/w input and the predicted block as opposed 985 * to input and reconstructed block. 986 * 987 * NOTE: As per the Document JVT-O079, for the whole intra 4x4 macroblock, 988 * 24*lambda is added to the SAD before comparison with the best SAD for 989 * inter prediction. This is an empirical value to prevent using too many intra 990 * blocks. 991 * 992 * @return none 993 * 994 ****************************************************************************** 995 */ 996 void ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(process_ctxt_t *ps_proc) 997 { 998 /* Codec Context */ 999 codec_t *ps_codec = ps_proc->ps_codec; 1000 1001 /* SAD(distortion metric) of an 4x4 block */ 1002 WORD32 i4_partition_distortion_least = INT_MAX, i4_total_distortion = 0; 1003 1004 /* lambda */ 1005 UWORD32 u4_lambda = ps_proc->u4_lambda; 1006 1007 /* cost = distortion + lambda*rate */ 1008 WORD32 i4_partition_cost_least, i4_total_cost = (24 + 1) * u4_lambda; 1009 1010 /* cost due to mbtype */ 1011 UWORD32 u4_cost_one_bit = u4_lambda, u4_cost_four_bits = 4 * u4_lambda; 1012 1013 /* intra mode */ 1014 UWORD32 u4_best_intra_4x4_mode = DC_I4x4, u4_estimated_intra_4x4_mode; 1015 1016 /* neighbor pels for intra prediction */ 1017 UWORD8 *pu1_ngbr_pels_i4 = ps_proc->au1_ngbr_pels; 1018 1019 /* pointer to curr partition */ 1020 UWORD8 *pu1_mb_curr; 1021 UWORD8 *pu1_mb_ref_left, *pu1_mb_ref_top; 1022 UWORD8 *pu1_ref_mb_intra_4x4; 1023 1024 /* pointer to residual macro block */ 1025 WORD16 *pi2_res_mb = ps_proc->pi2_res_buf_intra_4x4; 1026 1027 /* pointer to prediction macro block */ 1028 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb; 1029 1030 /* strides */ 1031 WORD32 i4_src_strd = ps_proc->i4_src_strd; 1032 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 1033 WORD32 i4_ref_strd_left, i4_ref_strd_top; 1034 1035 /* neighbors left, top, top right, top left */ 1036 UWORD8 *pu1_mb_a; 1037 UWORD8 *pu1_mb_b; 1038 UWORD8 *pu1_mb_c; 1039 UWORD8 *pu1_mb_d; 1040 1041 /* number of non zero coeffs*/ 1042 UWORD8 *pu1_nnz = (UWORD8 *)ps_proc->au4_nnz_intra_4x4; 1043 1044 /* quantization parameters */ 1045 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0]; 1046 1047 /* neighbor availability */ 1048 WORD32 i4_ngbr_avbl; 1049 block_neighbors_t s_ngbr_avbl; 1050 1051 /* temp vars */ 1052 UWORD32 i, b8, b4, u4_blk_x, u4_blk_y, u4_pix_x, u4_pix_y; 1053 1054 /* scan order inside 4x4 block */ 1055 const UWORD8 u1_scan_order[16] = {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}; 1056 1057 /* ngbr sub mb modes */ 1058 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (ps_proc->i4_mb_x << 4); 1059 mb_info_t *ps_top_mb_syn_ele = ps_proc->ps_top_row_mb_syntax_ele + ps_proc->i4_mb_x; 1060 1061 /* valid intra modes map */ 1062 UWORD32 u4_valid_intra_modes; 1063 UWORD16 u2_valid_modes[8] = {4, 262, 4, 262, 141, 399, 141, 511}; 1064 1065 /* Dummy variable for 4x4 trans function */ 1066 WORD16 i2_dc_dummy; 1067 1068 /* compute ngbr availability for sub blks */ 1069 i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_c << 3); 1070 memcpy(ps_proc->au1_ngbr_avbl_4x4_subblks, gau1_ih264_4x4_ngbr_avbl[i4_ngbr_avbl], 16); 1071 1072 for(b8 = 0; b8 < 4; b8++) 1073 { 1074 u4_blk_x = (b8 & 0x01) << 3; 1075 u4_blk_y = (b8 >> 1) << 3; 1076 for(b4 = 0; b4 < 4; b4++, pu1_nnz++, pi2_res_mb += MB_SIZE) 1077 { 1078 u4_pix_x = u4_blk_x + ((b4 & 0x01) << 2); 1079 u4_pix_y = u4_blk_y + ((b4 >> 1) << 2); 1080 1081 pu1_ref_mb_intra_4x4 = ps_proc->pu1_ref_mb_intra_4x4 + u4_pix_x + (u4_pix_y * i4_pred_strd); 1082 pu1_mb_curr = ps_proc->pu1_src_buf_luma + u4_pix_x + (u4_pix_y * i4_src_strd); 1083 if (u4_pix_x == 0) 1084 { 1085 i4_ref_strd_left = ps_proc->i4_rec_strd; 1086 pu1_mb_ref_left = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_left); 1087 } 1088 else 1089 { 1090 i4_ref_strd_left = i4_pred_strd; 1091 pu1_mb_ref_left = pu1_ref_mb_intra_4x4; 1092 } 1093 if (u4_pix_y == 0) 1094 { 1095 i4_ref_strd_top = ps_proc->i4_rec_strd; 1096 pu1_mb_ref_top = ps_proc->pu1_rec_buf_luma + u4_pix_x + (u4_pix_y * i4_ref_strd_top); 1097 } 1098 else 1099 { 1100 i4_ref_strd_top = i4_pred_strd; 1101 pu1_mb_ref_top = pu1_ref_mb_intra_4x4; 1102 } 1103 1104 pu1_mb_a = pu1_mb_ref_left - 1; /* pointer to left macro block */ 1105 pu1_mb_b = pu1_mb_ref_top - i4_ref_strd_top; /* pointer to top macro block */ 1106 pu1_mb_c = pu1_mb_b + 4; /* pointer to top right macro block */ 1107 if (u4_pix_y == 0) 1108 pu1_mb_d = pu1_mb_b - 1; 1109 else 1110 pu1_mb_d = pu1_mb_a - i4_ref_strd_left; /* pointer to top left macro block */ 1111 1112 /* locating neighbors that are available for prediction */ 1113 /* TODO : update the neighbor availability information basing on constrained intra pred information */ 1114 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines */ 1115 /* basing on neighbors available and hence evade the computation of neighbor availability totally. */ 1116 1117 i4_ngbr_avbl = ps_proc->au1_ngbr_avbl_4x4_subblks[(b8 << 2) + b4]; 1118 s_ngbr_avbl.u1_mb_a = (i4_ngbr_avbl & 0x1); 1119 s_ngbr_avbl.u1_mb_d = (i4_ngbr_avbl & 0x2) >> 1; 1120 s_ngbr_avbl.u1_mb_b = (i4_ngbr_avbl & 0x4) >> 2; 1121 s_ngbr_avbl.u1_mb_c = (i4_ngbr_avbl & 0x8) >> 3; 1122 /* set valid intra modes for evaluation */ 1123 u4_valid_intra_modes = u2_valid_modes[i4_ngbr_avbl & 0x7]; 1124 1125 /* if top partition is available and top right is not available for intra prediction, then */ 1126 /* padd top right samples using top sample and make top right also available */ 1127 /* i4_ngbr_avbl = (s_ngbr_avbl.u1_mb_a) + (s_ngbr_avbl.u1_mb_d << 1) + (s_ngbr_avbl.u1_mb_b << 2) + ((s_ngbr_avbl.u1_mb_b | s_ngbr_avbl.u1_mb_c) << 3); */ 1128 1129 /* gather prediction pels from the neighbors */ 1130 if (s_ngbr_avbl.u1_mb_a) 1131 { 1132 for(i = 0; i < 4; i++) 1133 pu1_ngbr_pels_i4[4 - 1 -i] = pu1_mb_a[i * i4_ref_strd_left]; 1134 } 1135 else 1136 { 1137 memset(pu1_ngbr_pels_i4,0,4); 1138 } 1139 if(s_ngbr_avbl.u1_mb_b) 1140 { 1141 memcpy(pu1_ngbr_pels_i4 + 4 + 1, pu1_mb_b, 4); 1142 } 1143 else 1144 { 1145 memset(pu1_ngbr_pels_i4 + 4 + 1, 0, 4); 1146 } 1147 if (s_ngbr_avbl.u1_mb_d) 1148 pu1_ngbr_pels_i4[4] = *pu1_mb_d; 1149 else 1150 pu1_ngbr_pels_i4[4] = 0; 1151 if (s_ngbr_avbl.u1_mb_c) 1152 { 1153 memcpy(pu1_ngbr_pels_i4 + 8 + 1, pu1_mb_c, 4); 1154 } 1155 else if (s_ngbr_avbl.u1_mb_b) 1156 { 1157 memset(pu1_ngbr_pels_i4 + 8 + 1, pu1_ngbr_pels_i4[8], 4); 1158 s_ngbr_avbl.u1_mb_c = s_ngbr_avbl.u1_mb_b; 1159 } 1160 1161 i4_partition_cost_least = INT_MAX; 1162 1163 /* predict the intra 4x4 mode for the current partition (for evaluating cost) */ 1164 if (!s_ngbr_avbl.u1_mb_a || !s_ngbr_avbl.u1_mb_b) 1165 { 1166 u4_estimated_intra_4x4_mode = DC_I4x4; 1167 } 1168 else 1169 { 1170 UWORD32 u4_left_intra_4x4_mode = DC_I4x4; 1171 UWORD32 u4_top_intra_4x4_mode = DC_I4x4; 1172 1173 if (u4_pix_x == 0) 1174 { 1175 if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I4x4) 1176 { 1177 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[u1_scan_order[3 + u4_pix_y]]; 1178 } 1179 else if (ps_proc->s_left_mb_syntax_ele.u2_mb_type == I8x8) 1180 { 1181 u4_left_intra_4x4_mode = ps_proc->au1_left_mb_intra_modes[b8 + 1]; 1182 } 1183 } 1184 else 1185 { 1186 u4_left_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 1]]; 1187 } 1188 1189 if (u4_pix_y == 0) 1190 { 1191 if (ps_top_mb_syn_ele->u2_mb_type == I4x4) 1192 { 1193 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[u1_scan_order[12 + (u4_pix_x >> 2)]]; 1194 } 1195 else if (ps_top_mb_syn_ele->u2_mb_type == I8x8) 1196 { 1197 u4_top_intra_4x4_mode = pu1_top_mb_intra_modes[b8 + 2]; 1198 } 1199 } 1200 else 1201 { 1202 u4_top_intra_4x4_mode = ps_proc->au1_intra_luma_mb_4x4_modes[u1_scan_order[(u4_pix_x >> 2) + u4_pix_y - 4]]; 1203 } 1204 1205 u4_estimated_intra_4x4_mode = MIN(u4_left_intra_4x4_mode, u4_top_intra_4x4_mode); 1206 } 1207 1208 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_estimated_intra_4x4_mode; 1209 1210 /*mode evaluation and prediction*/ 1211 ps_codec->pf_ih264e_evaluate_intra_4x4_modes(pu1_mb_curr, 1212 pu1_ngbr_pels_i4, 1213 pu1_pred_mb, i4_src_strd, 1214 i4_pred_strd, i4_ngbr_avbl, 1215 &u4_best_intra_4x4_mode, 1216 &i4_partition_cost_least, 1217 u4_valid_intra_modes, 1218 u4_lambda, 1219 u4_estimated_intra_4x4_mode); 1220 1221 1222 i4_partition_distortion_least = i4_partition_cost_least - ((u4_estimated_intra_4x4_mode == u4_best_intra_4x4_mode)?u4_cost_one_bit:u4_cost_four_bits); 1223 1224 DEBUG("%d partition cost, %d intra mode\n", i4_partition_cost_least, u4_best_intra_4x4_mode); 1225 1226 /* macroblock distortion */ 1227 i4_total_distortion += i4_partition_distortion_least; 1228 i4_total_cost += i4_partition_cost_least; 1229 1230 /* mb partition mode */ 1231 ps_proc->au1_intra_luma_mb_4x4_modes[(b8 << 2) + b4] = u4_best_intra_4x4_mode; 1232 1233 1234 /********************************************************/ 1235 /* error estimation, */ 1236 /* transform */ 1237 /* quantization */ 1238 /********************************************************/ 1239 ps_codec->pf_resi_trans_quant_4x4(pu1_mb_curr, pu1_pred_mb, 1240 pi2_res_mb, i4_src_strd, 1241 i4_pred_strd, 1242 /* No op stride, this implies a buff of lenght 1x16 */ 1243 ps_qp_params->pu2_scale_mat, 1244 ps_qp_params->pu2_thres_mat, 1245 ps_qp_params->u1_qbits, 1246 ps_qp_params->u4_dead_zone, 1247 pu1_nnz, &i2_dc_dummy); 1248 1249 /********************************************************/ 1250 /* ierror estimation, */ 1251 /* itransform */ 1252 /* iquantization */ 1253 /********************************************************/ 1254 ps_codec->pf_iquant_itrans_recon_4x4(pi2_res_mb, pu1_pred_mb, 1255 pu1_ref_mb_intra_4x4, 1256 i4_pred_strd, i4_pred_strd, 1257 ps_qp_params->pu2_iscale_mat, 1258 ps_qp_params->pu2_weigh_mat, 1259 ps_qp_params->u1_qp_div, 1260 ps_proc->pv_scratch_buff, 0, 1261 NULL); 1262 } 1263 } 1264 1265 /* update the type of the mb if necessary */ 1266 if (i4_total_cost < ps_proc->i4_mb_cost) 1267 { 1268 ps_proc->i4_mb_cost = i4_total_cost; 1269 ps_proc->i4_mb_distortion = i4_total_distortion; 1270 ps_proc->u4_mb_type = I4x4; 1271 } 1272 1273 return ; 1274 } 1275 1276 /** 1277 ****************************************************************************** 1278 * 1279 * @brief 1280 * evaluate best chroma intra 8x8 mode (rate distortion opt off) 1281 * 1282 * @par Description 1283 * This function evaluates all the possible chroma intra 8x8 modes and finds 1284 * the mode that best represents the macroblock (least distortion) and occupies 1285 * fewer bits in the bitstream. 1286 * 1287 * @param[in] ps_proc_ctxt 1288 * pointer to macroblock context (handle) 1289 * 1290 * @remarks 1291 * For chroma best intra pred mode is calculated based only on SAD 1292 * 1293 * @returns none 1294 * 1295 ****************************************************************************** 1296 */ 1297 1298 void ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(process_ctxt_t *ps_proc) 1299 { 1300 /* Codec Context */ 1301 codec_t *ps_codec = ps_proc->ps_codec; 1302 1303 /* SAD(distortion metric) of an 8x8 block */ 1304 WORD32 i4_mb_distortion, i4_chroma_mb_distortion; 1305 1306 /* intra mode */ 1307 UWORD32 u4_best_chroma_intra_8x8_mode = DC_CH_I8x8; 1308 1309 /* neighbor pels for intra prediction */ 1310 UWORD8 *pu1_ngbr_pels_c_i8x8 = ps_proc->au1_ngbr_pels; 1311 1312 /* pointer to curr macro block */ 1313 UWORD8 *pu1_curr_mb = ps_proc->pu1_src_buf_chroma; 1314 UWORD8 *pu1_ref_mb = ps_proc->pu1_rec_buf_chroma; 1315 1316 /* pointer to prediction macro block */ 1317 UWORD8 *pu1_pred_mb = ps_proc->pu1_pred_mb_intra_chroma; 1318 UWORD8 *pu1_pred_mb_plane = ps_proc->pu1_pred_mb_intra_chroma_plane; 1319 1320 /* strides */ 1321 WORD32 i4_src_strd_c = ps_proc->i4_src_chroma_strd; 1322 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 1323 WORD32 i4_rec_strd_c = ps_proc->i4_rec_strd; 1324 1325 /* neighbors left, top, top left */ 1326 UWORD8 *pu1_mb_a = pu1_ref_mb - 2; 1327 UWORD8 *pu1_mb_b = pu1_ref_mb - i4_rec_strd_c; 1328 UWORD8 *pu1_mb_d = pu1_mb_b - 2; 1329 1330 /* neighbor availability */ 1331 const UWORD8 u1_valid_intra_modes[8] = {1, 3, 9, 11, 5, 7, 13, 15,}; 1332 WORD32 i4_ngbr_avbl; 1333 1334 /* valid intra modes map */ 1335 UWORD32 u4_valid_intra_modes; 1336 1337 /* temp var */ 1338 UWORD8 i; 1339 1340 /* locating neighbors that are available for prediction */ 1341 /* TODO : update the neighbor availability information basing on constrained intra pred information */ 1342 /* TODO : i4_ngbr_avbl is only being used in DC mode. Can the DC mode be split in to distinct routines 1343 * basing on neighbors available and hence evade the computation of neighbor availability totally. */ 1344 /* i4_ngbr_avbl = blk_a * LEFT_MB_AVAILABLE_MASK + blk_b * TOP_MB_AVAILABLE_MASK + blk_d * TOP_LEFT_MB_AVAILABLE_MASK */ 1345 i4_ngbr_avbl = (ps_proc->ps_ngbr_avbl->u1_mb_a) + (ps_proc->ps_ngbr_avbl->u1_mb_b << 2) + (ps_proc->ps_ngbr_avbl->u1_mb_d << 1); 1346 ps_proc->i4_chroma_neighbor_avail_8x8_mb = i4_ngbr_avbl; 1347 1348 /* gather prediction pels from the neighbors */ 1349 /* left pels */ 1350 if (ps_proc->ps_ngbr_avbl->u1_mb_a) 1351 { 1352 for (i = 0; i < 16; i += 2) 1353 { 1354 pu1_ngbr_pels_c_i8x8[16 - 2 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c]; 1355 pu1_ngbr_pels_c_i8x8[16 - 1 - i] = pu1_mb_a[(i / 2) * i4_rec_strd_c + 1]; 1356 } 1357 } 1358 else 1359 { 1360 ps_codec->pf_mem_set_mul8(pu1_ngbr_pels_c_i8x8, 0, MB_SIZE); 1361 } 1362 1363 /* top pels */ 1364 if (ps_proc->ps_ngbr_avbl->u1_mb_b) 1365 { 1366 ps_codec->pf_mem_cpy_mul8(&pu1_ngbr_pels_c_i8x8[18], pu1_mb_b, 16); 1367 } 1368 else 1369 { 1370 ps_codec->pf_mem_set_mul8((pu1_ngbr_pels_c_i8x8 + 18), 0, MB_SIZE); 1371 } 1372 1373 /* top left pels */ 1374 if (ps_proc->ps_ngbr_avbl->u1_mb_d) 1375 { 1376 pu1_ngbr_pels_c_i8x8[16] = *pu1_mb_d; 1377 pu1_ngbr_pels_c_i8x8[17] = *(pu1_mb_d + 1); 1378 } 1379 1380 u4_valid_intra_modes = u1_valid_intra_modes[i4_ngbr_avbl]; 1381 1382 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_FAST) 1383 u4_valid_intra_modes &= ~(1 << PLANE_CH_I8x8); 1384 1385 i4_chroma_mb_distortion = INT_MAX; 1386 1387 /* perform intra mode chroma 8x8 evaluation */ 1388 /* intra prediction */ 1389 ps_codec->pf_ih264e_evaluate_intra_chroma_modes(pu1_curr_mb, 1390 pu1_ngbr_pels_c_i8x8, 1391 pu1_pred_mb, 1392 i4_src_strd_c, 1393 i4_pred_strd, 1394 i4_ngbr_avbl, 1395 &u4_best_chroma_intra_8x8_mode, 1396 &i4_chroma_mb_distortion, 1397 u4_valid_intra_modes); 1398 1399 if (u4_valid_intra_modes & 8)/* if Chroma PLANE is valid*/ 1400 { 1401 (ps_codec->apf_intra_pred_c)[PLANE_CH_I8x8](pu1_ngbr_pels_c_i8x8, pu1_pred_mb_plane, 0, i4_pred_strd, i4_ngbr_avbl); 1402 1403 /* evaluate distortion(sad) */ 1404 ps_codec->pf_compute_sad_16x8(pu1_curr_mb, pu1_pred_mb_plane, i4_src_strd_c, i4_pred_strd, i4_chroma_mb_distortion, &i4_mb_distortion); 1405 1406 /* update the least distortion information if necessary */ 1407 if(i4_mb_distortion < i4_chroma_mb_distortion) 1408 { 1409 i4_chroma_mb_distortion = i4_mb_distortion; 1410 u4_best_chroma_intra_8x8_mode = PLANE_CH_I8x8; 1411 } 1412 } 1413 1414 DEBUG("%d partition cost, %d intra mode\n", i4_chroma_mb_distortion, u4_best_chroma_intra_8x8_mode); 1415 1416 ps_proc->u1_c_i8_mode = u4_best_chroma_intra_8x8_mode; 1417 1418 return ; 1419 } 1420 1421 1422 /** 1423 ****************************************************************************** 1424 * 1425 * @brief 1426 * Evaluate best intra 16x16 mode (among VERT, HORZ and DC) and do the 1427 * prediction. 1428 * 1429 * @par Description 1430 * This function evaluates first three 16x16 modes and compute corresponding sad 1431 * and return the buffer predicted with best mode. 1432 * 1433 * @param[in] pu1_src 1434 * UWORD8 pointer to the source 1435 * 1436 * @param[in] pu1_ngbr_pels_i16 1437 * UWORD8 pointer to neighbouring pels 1438 * 1439 * @param[out] pu1_dst 1440 * UWORD8 pointer to the destination 1441 * 1442 * @param[in] src_strd 1443 * integer source stride 1444 * 1445 * @param[in] dst_strd 1446 * integer destination stride 1447 * 1448 * @param[in] u4_n_avblty 1449 * availability of neighbouring pixels 1450 * 1451 * @param[in] u4_intra_mode 1452 * Pointer to the variable in which best mode is returned 1453 * 1454 * @param[in] pu4_sadmin 1455 * Pointer to the variable in which minimum sad is returned 1456 * 1457 * @param[in] u4_valid_intra_modes 1458 * Says what all modes are valid 1459 * 1460 * @returns none 1461 * 1462 ****************************************************************************** 1463 */ 1464 void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src, 1465 UWORD8 *pu1_ngbr_pels_i16, 1466 UWORD8 *pu1_dst, 1467 UWORD32 src_strd, 1468 UWORD32 dst_strd, 1469 WORD32 u4_n_avblty, 1470 UWORD32 *u4_intra_mode, 1471 WORD32 *pu4_sadmin, 1472 UWORD32 u4_valid_intra_modes) 1473 { 1474 UWORD8 *pu1_neighbour; 1475 UWORD8 *pu1_src_temp = pu1_src; 1476 UWORD8 left = 0, top = 0; 1477 WORD32 u4_dcval = 0; 1478 WORD32 i, j; 1479 WORD32 i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, i4_sad_dc = INT_MAX, 1480 i4_min_sad = INT_MAX; 1481 UWORD8 val; 1482 1483 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); 1484 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; 1485 1486 /* left available */ 1487 if (left) 1488 { 1489 i4_sad_horz = 0; 1490 1491 for (i = 0; i < 16; i++) 1492 { 1493 val = pu1_ngbr_pels_i16[15 - i]; 1494 1495 u4_dcval += val; 1496 1497 for (j = 0; j < 16; j++) 1498 { 1499 i4_sad_horz += ABS(val - pu1_src_temp[j]); 1500 } 1501 1502 pu1_src_temp += src_strd; 1503 } 1504 u4_dcval += 8; 1505 } 1506 1507 pu1_src_temp = pu1_src; 1508 /* top available */ 1509 if (top) 1510 { 1511 i4_sad_vert = 0; 1512 1513 for (i = 0; i < 16; i++) 1514 { 1515 u4_dcval += pu1_ngbr_pels_i16[17 + i]; 1516 1517 for (j = 0; j < 16; j++) 1518 { 1519 i4_sad_vert += ABS(pu1_ngbr_pels_i16[17 + j] - pu1_src_temp[j]); 1520 } 1521 pu1_src_temp += src_strd; 1522 1523 } 1524 u4_dcval += 8; 1525 } 1526 1527 u4_dcval = (u4_dcval) >> (3 + left + top); 1528 1529 pu1_src_temp = pu1_src; 1530 1531 /* none available */ 1532 u4_dcval += (left == 0) * (top == 0) * 128; 1533 1534 i4_sad_dc = 0; 1535 1536 for (i = 0; i < 16; i++) 1537 { 1538 for (j = 0; j < 16; j++) 1539 { 1540 i4_sad_dc += ABS(u4_dcval - pu1_src_temp[j]); 1541 } 1542 pu1_src_temp += src_strd; 1543 } 1544 1545 if ((u4_valid_intra_modes & 04) == 0)/* If DC is disabled */ 1546 i4_sad_dc = INT_MAX; 1547 1548 if ((u4_valid_intra_modes & 01) == 0)/* If VERT is disabled */ 1549 i4_sad_vert = INT_MAX; 1550 1551 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled */ 1552 i4_sad_horz = INT_MAX; 1553 1554 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); 1555 1556 /* Finding Minimum sad and doing corresponding prediction */ 1557 if (i4_min_sad < *pu4_sadmin) 1558 { 1559 *pu4_sadmin = i4_min_sad; 1560 if (i4_min_sad == i4_sad_vert) 1561 { 1562 *u4_intra_mode = VERT_I16x16; 1563 pu1_neighbour = pu1_ngbr_pels_i16 + 17; 1564 for (j = 0; j < 16; j++) 1565 { 1566 memcpy(pu1_dst, pu1_neighbour, MB_SIZE); 1567 pu1_dst += dst_strd; 1568 } 1569 } 1570 else if (i4_min_sad == i4_sad_horz) 1571 { 1572 *u4_intra_mode = HORZ_I16x16; 1573 for (j = 0; j < 16; j++) 1574 { 1575 val = pu1_ngbr_pels_i16[15 - j]; 1576 memset(pu1_dst, val, MB_SIZE); 1577 pu1_dst += dst_strd; 1578 } 1579 } 1580 else 1581 { 1582 *u4_intra_mode = DC_I16x16; 1583 for (j = 0; j < 16; j++) 1584 { 1585 memset(pu1_dst, u4_dcval, MB_SIZE); 1586 pu1_dst += dst_strd; 1587 } 1588 } 1589 } 1590 return; 1591 } 1592 1593 /** 1594 ****************************************************************************** 1595 * 1596 * @brief 1597 * Evaluate best intra 4x4 mode and perform prediction. 1598 * 1599 * @par Description 1600 * This function evaluates 4x4 modes and compute corresponding sad 1601 * and return the buffer predicted with best mode. 1602 * 1603 * @param[in] pu1_src 1604 * UWORD8 pointer to the source 1605 * 1606 * @param[in] pu1_ngbr_pels 1607 * UWORD8 pointer to neighbouring pels 1608 * 1609 * @param[out] pu1_dst 1610 * UWORD8 pointer to the destination 1611 * 1612 * @param[in] src_strd 1613 * integer source stride 1614 * 1615 * @param[in] dst_strd 1616 * integer destination stride 1617 * 1618 * @param[in] u4_n_avblty 1619 * availability of neighbouring pixels 1620 * 1621 * @param[in] u4_intra_mode 1622 * Pointer to the variable in which best mode is returned 1623 * 1624 * @param[in] pu4_sadmin 1625 * Pointer to the variable in which minimum cost is returned 1626 * 1627 * @param[in] u4_valid_intra_modes 1628 * Says what all modes are valid 1629 * 1630 * @param[in] u4_lambda 1631 * Lamda value for computing cost from SAD 1632 * 1633 * @param[in] u4_predictd_mode 1634 * Predicted mode for cost computation 1635 * 1636 * @returns none 1637 * 1638 ****************************************************************************** 1639 */ 1640 void ih264e_evaluate_intra_4x4_modes(UWORD8 *pu1_src, 1641 UWORD8 *pu1_ngbr_pels, 1642 UWORD8 *pu1_dst, 1643 UWORD32 src_strd, 1644 UWORD32 dst_strd, 1645 WORD32 u4_n_avblty, 1646 UWORD32 *u4_intra_mode, 1647 WORD32 *pu4_sadmin, 1648 UWORD32 u4_valid_intra_modes, 1649 UWORD32 u4_lambda, 1650 UWORD32 u4_predictd_mode) 1651 { 1652 UWORD8 *pu1_src_temp = pu1_src; 1653 UWORD8 *pu1_pred = pu1_ngbr_pels; 1654 UWORD8 left = 0, top = 0; 1655 UWORD8 u1_pred_val = 0; 1656 UWORD8 u1_pred_vals[4] = {0}; 1657 UWORD8 *pu1_pred_val = NULL; 1658 /* To store FILT121 operated values*/ 1659 UWORD8 u1_pred_vals_diag_121[15] = {0}; 1660 /* To store FILT11 operated values*/ 1661 UWORD8 u1_pred_vals_diag_11[15] = {0}; 1662 UWORD8 u1_pred_vals_vert_r[8] = {0}; 1663 UWORD8 u1_pred_vals_horz_d[10] = {0}; 1664 UWORD8 u1_pred_vals_horz_u[10] = {0}; 1665 WORD32 u4_dcval = 0; 1666 WORD32 i4_sad[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1667 INT_MAX, INT_MAX, INT_MAX, INT_MAX}; 1668 1669 WORD32 i4_cost[MAX_I4x4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1670 INT_MAX, INT_MAX, INT_MAX, INT_MAX}; 1671 WORD32 i, i4_min_cost = INT_MAX; 1672 1673 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); 1674 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; 1675 1676 /* Computing SAD */ 1677 1678 /* VERT mode valid */ 1679 if (u4_valid_intra_modes & 1) 1680 { 1681 pu1_pred = pu1_ngbr_pels + 5; 1682 i4_sad[VERT_I4x4] = 0; 1683 i4_cost[VERT_I4x4] = 0; 1684 1685 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); 1686 pu1_src_temp += src_strd; 1687 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); 1688 pu1_src_temp += src_strd; 1689 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); 1690 pu1_src_temp += src_strd; 1691 USADA8(pu1_src_temp, pu1_pred, i4_sad[VERT_I4x4]); 1692 1693 i4_cost[VERT_I4x4] = i4_sad[VERT_I4x4] + ((u4_predictd_mode == VERT_I4x4) ? 1694 u4_lambda : 4 * u4_lambda); 1695 } 1696 1697 /* HORZ mode valid */ 1698 if (u4_valid_intra_modes & 2) 1699 { 1700 i4_sad[HORZ_I4x4] = 0; 1701 i4_cost[HORZ_I4x4] =0; 1702 pu1_src_temp = pu1_src; 1703 1704 u1_pred_val = pu1_ngbr_pels[3]; 1705 1706 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) 1707 + ABS(pu1_src_temp[1] - u1_pred_val) 1708 + ABS(pu1_src_temp[2] - u1_pred_val) 1709 + ABS(pu1_src_temp[3] - u1_pred_val); 1710 pu1_src_temp += src_strd; 1711 1712 u1_pred_val = pu1_ngbr_pels[2]; 1713 1714 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) 1715 + ABS(pu1_src_temp[1] - u1_pred_val) 1716 + ABS(pu1_src_temp[2] - u1_pred_val) 1717 + ABS(pu1_src_temp[3] - u1_pred_val); 1718 pu1_src_temp += src_strd; 1719 1720 u1_pred_val = pu1_ngbr_pels[1]; 1721 1722 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) 1723 + ABS(pu1_src_temp[1] - u1_pred_val) 1724 + ABS(pu1_src_temp[2] - u1_pred_val) 1725 + ABS(pu1_src_temp[3] - u1_pred_val); 1726 pu1_src_temp += src_strd; 1727 1728 u1_pred_val = pu1_ngbr_pels[0]; 1729 1730 i4_sad[HORZ_I4x4] += ABS(pu1_src_temp[0] - u1_pred_val) 1731 + ABS(pu1_src_temp[1] - u1_pred_val) 1732 + ABS(pu1_src_temp[2] - u1_pred_val) 1733 + ABS(pu1_src_temp[3] - u1_pred_val); 1734 1735 i4_cost[HORZ_I4x4] = i4_sad[HORZ_I4x4] + ((u4_predictd_mode == HORZ_I4x4) ? 1736 u4_lambda : 4 * u4_lambda); 1737 } 1738 1739 /* DC mode valid */ 1740 if (u4_valid_intra_modes & 4) 1741 { 1742 i4_sad[DC_I4x4] = 0; 1743 i4_cost[DC_I4x4] = 0; 1744 pu1_src_temp = pu1_src; 1745 1746 if (left) 1747 u4_dcval = pu1_ngbr_pels[0] + pu1_ngbr_pels[1] + pu1_ngbr_pels[2] 1748 + pu1_ngbr_pels[3] + 2; 1749 if (top) 1750 u4_dcval += pu1_ngbr_pels[5] + pu1_ngbr_pels[6] + pu1_ngbr_pels[7] 1751 + pu1_ngbr_pels[8] + 2; 1752 1753 u4_dcval = (u4_dcval) ? (u4_dcval >> (1 + left + top)) : 128; 1754 1755 /* none available */ 1756 memset(u1_pred_vals, u4_dcval, 4); 1757 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); 1758 pu1_src_temp += src_strd; 1759 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); 1760 pu1_src_temp += src_strd; 1761 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); 1762 pu1_src_temp += src_strd; 1763 USADA8(pu1_src_temp, u1_pred_vals, i4_sad[DC_I4x4]); 1764 pu1_src_temp += src_strd; 1765 1766 i4_cost[DC_I4x4] = i4_sad[DC_I4x4] + ((u4_predictd_mode == DC_I4x4) ? 1767 u4_lambda : 4 * u4_lambda); 1768 } 1769 1770 /* if modes other than VERT, HORZ and DC are valid */ 1771 if (u4_valid_intra_modes > 7) 1772 { 1773 pu1_pred = pu1_ngbr_pels; 1774 pu1_pred[13] = pu1_pred[14] = pu1_pred[12]; 1775 1776 /* Performing FILT121 and FILT11 operation for all neighbour values*/ 1777 for (i = 0; i < 13; i++) 1778 { 1779 u1_pred_vals_diag_121[i] = FILT121(pu1_pred[0], pu1_pred[1], pu1_pred[2]); 1780 u1_pred_vals_diag_11[i] = FILT11(pu1_pred[0], pu1_pred[1]); 1781 1782 pu1_pred++; 1783 } 1784 1785 if (u4_valid_intra_modes & 8)/* DIAG_DL */ 1786 { 1787 i4_sad[DIAG_DL_I4x4] = 0; 1788 i4_cost[DIAG_DL_I4x4] = 0; 1789 pu1_src_temp = pu1_src; 1790 pu1_pred_val = u1_pred_vals_diag_121 + 5; 1791 1792 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DL_I4x4]); 1793 pu1_src_temp += src_strd; 1794 USADA8(pu1_src_temp, (pu1_pred_val + 1), i4_sad[DIAG_DL_I4x4]); 1795 pu1_src_temp += src_strd; 1796 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[DIAG_DL_I4x4]); 1797 pu1_src_temp += src_strd; 1798 USADA8(pu1_src_temp, (pu1_pred_val + 3), i4_sad[DIAG_DL_I4x4]); 1799 pu1_src_temp += src_strd; 1800 i4_cost[DIAG_DL_I4x4] = i4_sad[DIAG_DL_I4x4] + ((u4_predictd_mode == DIAG_DL_I4x4) ? 1801 u4_lambda : 4 * u4_lambda); 1802 } 1803 1804 if (u4_valid_intra_modes & 16)/* DIAG_DR */ 1805 { 1806 i4_sad[DIAG_DR_I4x4] = 0; 1807 i4_cost[DIAG_DR_I4x4] = 0; 1808 pu1_src_temp = pu1_src; 1809 pu1_pred_val = u1_pred_vals_diag_121 + 3; 1810 1811 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[DIAG_DR_I4x4]); 1812 pu1_src_temp += src_strd; 1813 USADA8(pu1_src_temp, (pu1_pred_val - 1), i4_sad[DIAG_DR_I4x4]); 1814 pu1_src_temp += src_strd; 1815 USADA8(pu1_src_temp, (pu1_pred_val - 2), i4_sad[DIAG_DR_I4x4]); 1816 pu1_src_temp += src_strd; 1817 USADA8(pu1_src_temp, (pu1_pred_val - 3), i4_sad[DIAG_DR_I4x4]); 1818 pu1_src_temp += src_strd; 1819 i4_cost[DIAG_DR_I4x4] = i4_sad[DIAG_DR_I4x4] + ((u4_predictd_mode == DIAG_DR_I4x4) ? 1820 u4_lambda : 4 * u4_lambda); 1821 1822 } 1823 1824 if (u4_valid_intra_modes & 32)/* VERT_R mode valid ????*/ 1825 { 1826 i4_sad[VERT_R_I4x4] = 0; 1827 1828 pu1_src_temp = pu1_src; 1829 u1_pred_vals_vert_r[0] = u1_pred_vals_diag_121[2]; 1830 memcpy((u1_pred_vals_vert_r + 1), (u1_pred_vals_diag_11 + 4), 3); 1831 u1_pred_vals_vert_r[4] = u1_pred_vals_diag_121[1]; 1832 memcpy((u1_pred_vals_vert_r + 5), (u1_pred_vals_diag_121 + 3), 3); 1833 1834 pu1_pred_val = u1_pred_vals_diag_11 + 4; 1835 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); 1836 pu1_pred_val = u1_pred_vals_diag_121 + 3; 1837 pu1_src_temp += src_strd; 1838 USADA8(pu1_src_temp, pu1_pred_val, i4_sad[VERT_R_I4x4]); 1839 pu1_src_temp += src_strd; 1840 USADA8(pu1_src_temp, (u1_pred_vals_vert_r), i4_sad[VERT_R_I4x4]); 1841 pu1_src_temp += src_strd; 1842 USADA8(pu1_src_temp, (u1_pred_vals_vert_r + 4), 1843 i4_sad[VERT_R_I4x4]); 1844 1845 i4_cost[VERT_R_I4x4] = i4_sad[VERT_R_I4x4] + ((u4_predictd_mode == VERT_R_I4x4) ? 1846 u4_lambda : 4 * u4_lambda); 1847 } 1848 1849 if (u4_valid_intra_modes & 64)/* HORZ_D mode valid ????*/ 1850 { 1851 i4_sad[HORZ_D_I4x4] = 0; 1852 1853 pu1_src_temp = pu1_src; 1854 u1_pred_vals_horz_d[6] = u1_pred_vals_diag_11[3]; 1855 memcpy((u1_pred_vals_horz_d + 7), (u1_pred_vals_diag_121 + 3), 3); 1856 u1_pred_vals_horz_d[0] = u1_pred_vals_diag_11[0]; 1857 u1_pred_vals_horz_d[1] = u1_pred_vals_diag_121[0]; 1858 u1_pred_vals_horz_d[2] = u1_pred_vals_diag_11[1]; 1859 u1_pred_vals_horz_d[3] = u1_pred_vals_diag_121[1]; 1860 u1_pred_vals_horz_d[4] = u1_pred_vals_diag_11[2]; 1861 u1_pred_vals_horz_d[5] = u1_pred_vals_diag_121[2]; 1862 1863 pu1_pred_val = u1_pred_vals_horz_d; 1864 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_D_I4x4]); 1865 pu1_src_temp += src_strd; 1866 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_D_I4x4]); 1867 pu1_src_temp += src_strd; 1868 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_D_I4x4]); 1869 pu1_src_temp += src_strd; 1870 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_D_I4x4]); 1871 1872 i4_cost[HORZ_D_I4x4] = i4_sad[HORZ_D_I4x4] + ((u4_predictd_mode == HORZ_D_I4x4) ? 1873 u4_lambda : 4 * u4_lambda); 1874 } 1875 1876 if (u4_valid_intra_modes & 128)/* VERT_L mode valid ????*/ 1877 { 1878 i4_sad[VERT_L_I4x4] = 0; 1879 pu1_src_temp = pu1_src; 1880 pu1_pred_val = u1_pred_vals_diag_11 + 5; 1881 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); 1882 pu1_src_temp += src_strd; 1883 pu1_pred_val = u1_pred_vals_diag_121 + 5; 1884 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); 1885 pu1_src_temp += src_strd; 1886 pu1_pred_val = u1_pred_vals_diag_11 + 6; 1887 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); 1888 pu1_src_temp += src_strd; 1889 pu1_pred_val = u1_pred_vals_diag_121 + 6; 1890 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[VERT_L_I4x4]); 1891 1892 i4_cost[VERT_L_I4x4] = i4_sad[VERT_L_I4x4] + ((u4_predictd_mode == VERT_L_I4x4) ? 1893 u4_lambda : 4 * u4_lambda); 1894 } 1895 1896 if (u4_valid_intra_modes & 256)/* HORZ_U mode valid ????*/ 1897 { 1898 i4_sad[HORZ_U_I4x4] = 0; 1899 pu1_src_temp = pu1_src; 1900 u1_pred_vals_horz_u[0] = u1_pred_vals_diag_11[2]; 1901 u1_pred_vals_horz_u[1] = u1_pred_vals_diag_121[1]; 1902 u1_pred_vals_horz_u[2] = u1_pred_vals_diag_11[1]; 1903 u1_pred_vals_horz_u[3] = u1_pred_vals_diag_121[0]; 1904 u1_pred_vals_horz_u[4] = u1_pred_vals_diag_11[0]; 1905 u1_pred_vals_horz_u[5] = FILT121(pu1_ngbr_pels[0], pu1_ngbr_pels[0], pu1_ngbr_pels[1]); 1906 1907 memset((u1_pred_vals_horz_u + 6), pu1_ngbr_pels[0], 4); 1908 1909 pu1_pred_val = u1_pred_vals_horz_u; 1910 USADA8(pu1_src_temp, (pu1_pred_val), i4_sad[HORZ_U_I4x4]); 1911 pu1_src_temp += src_strd; 1912 USADA8(pu1_src_temp, (pu1_pred_val + 2), i4_sad[HORZ_U_I4x4]); 1913 pu1_src_temp += src_strd; 1914 USADA8(pu1_src_temp, (pu1_pred_val + 4), i4_sad[HORZ_U_I4x4]); 1915 pu1_src_temp += src_strd; 1916 USADA8(pu1_src_temp, (pu1_pred_val + 6), i4_sad[HORZ_U_I4x4]); 1917 1918 i4_cost[HORZ_U_I4x4] = i4_sad[HORZ_U_I4x4] + ((u4_predictd_mode == HORZ_U_I4x4) ? 1919 u4_lambda : 4 * u4_lambda); 1920 } 1921 1922 i4_min_cost = MIN3(MIN3(i4_cost[0], i4_cost[1], i4_cost[2]), 1923 MIN3(i4_cost[3], i4_cost[4], i4_cost[5]), 1924 MIN3(i4_cost[6], i4_cost[7], i4_cost[8])); 1925 1926 } 1927 else 1928 { 1929 /* Only first three modes valid */ 1930 i4_min_cost = MIN3(i4_cost[0], i4_cost[1], i4_cost[2]); 1931 } 1932 1933 *pu4_sadmin = i4_min_cost; 1934 1935 if (i4_min_cost == i4_cost[0]) 1936 { 1937 *u4_intra_mode = VERT_I4x4; 1938 pu1_pred_val = pu1_ngbr_pels + 5; 1939 memcpy(pu1_dst, (pu1_pred_val), 4); 1940 pu1_dst += dst_strd; 1941 memcpy(pu1_dst, (pu1_pred_val), 4); 1942 pu1_dst += dst_strd; 1943 memcpy(pu1_dst, (pu1_pred_val), 4); 1944 pu1_dst += dst_strd; 1945 memcpy(pu1_dst, (pu1_pred_val), 4); 1946 } 1947 else if (i4_min_cost == i4_cost[1]) 1948 { 1949 *u4_intra_mode = HORZ_I4x4; 1950 memset(pu1_dst, pu1_ngbr_pels[3], 4); 1951 pu1_dst += dst_strd; 1952 memset(pu1_dst, pu1_ngbr_pels[2], 4); 1953 pu1_dst += dst_strd; 1954 memset(pu1_dst, pu1_ngbr_pels[1], 4); 1955 pu1_dst += dst_strd; 1956 memset(pu1_dst, pu1_ngbr_pels[0], 4); 1957 } 1958 else if (i4_min_cost == i4_cost[2]) 1959 { 1960 *u4_intra_mode = DC_I4x4; 1961 memset(pu1_dst, u4_dcval, 4); 1962 pu1_dst += dst_strd; 1963 memset(pu1_dst, u4_dcval, 4); 1964 pu1_dst += dst_strd; 1965 memset(pu1_dst, u4_dcval, 4); 1966 pu1_dst += dst_strd; 1967 memset(pu1_dst, u4_dcval, 4); 1968 } 1969 1970 else if (i4_min_cost == i4_cost[3]) 1971 { 1972 *u4_intra_mode = DIAG_DL_I4x4; 1973 pu1_pred_val = u1_pred_vals_diag_121 + 5; 1974 memcpy(pu1_dst, (pu1_pred_val), 4); 1975 pu1_dst += dst_strd; 1976 memcpy(pu1_dst, (pu1_pred_val + 1), 4); 1977 pu1_dst += dst_strd; 1978 memcpy(pu1_dst, (pu1_pred_val + 2), 4); 1979 pu1_dst += dst_strd; 1980 memcpy(pu1_dst, (pu1_pred_val + 3), 4); 1981 } 1982 else if (i4_min_cost == i4_cost[4]) 1983 { 1984 *u4_intra_mode = DIAG_DR_I4x4; 1985 pu1_pred_val = u1_pred_vals_diag_121 + 3; 1986 1987 memcpy(pu1_dst, (pu1_pred_val), 4); 1988 pu1_dst += dst_strd; 1989 memcpy(pu1_dst, (pu1_pred_val - 1), 4); 1990 pu1_dst += dst_strd; 1991 memcpy(pu1_dst, (pu1_pred_val - 2), 4); 1992 pu1_dst += dst_strd; 1993 memcpy(pu1_dst, (pu1_pred_val - 3), 4); 1994 } 1995 1996 else if (i4_min_cost == i4_cost[5]) 1997 { 1998 *u4_intra_mode = VERT_R_I4x4; 1999 pu1_pred_val = u1_pred_vals_diag_11 + 4; 2000 memcpy(pu1_dst, (pu1_pred_val), 4); 2001 pu1_dst += dst_strd; 2002 pu1_pred_val = u1_pred_vals_diag_121 + 3; 2003 memcpy(pu1_dst, (pu1_pred_val), 4); 2004 pu1_dst += dst_strd; 2005 memcpy(pu1_dst, (u1_pred_vals_vert_r), 4); 2006 pu1_dst += dst_strd; 2007 memcpy(pu1_dst, (u1_pred_vals_vert_r + 4), 4); 2008 } 2009 else if (i4_min_cost == i4_cost[6]) 2010 { 2011 *u4_intra_mode = HORZ_D_I4x4; 2012 pu1_pred_val = u1_pred_vals_horz_d; 2013 memcpy(pu1_dst, (pu1_pred_val + 6), 4); 2014 pu1_dst += dst_strd; 2015 memcpy(pu1_dst, (pu1_pred_val + 4), 4); 2016 pu1_dst += dst_strd; 2017 memcpy(pu1_dst, (pu1_pred_val + 2), 4); 2018 pu1_dst += dst_strd; 2019 memcpy(pu1_dst, (pu1_pred_val), 4); 2020 pu1_dst += dst_strd; 2021 } 2022 else if (i4_min_cost == i4_cost[7]) 2023 { 2024 *u4_intra_mode = VERT_L_I4x4; 2025 pu1_pred_val = u1_pred_vals_diag_11 + 5; 2026 memcpy(pu1_dst, (pu1_pred_val), 4); 2027 pu1_dst += dst_strd; 2028 pu1_pred_val = u1_pred_vals_diag_121 + 5; 2029 memcpy(pu1_dst, (pu1_pred_val), 4); 2030 pu1_dst += dst_strd; 2031 pu1_pred_val = u1_pred_vals_diag_11 + 6; 2032 memcpy(pu1_dst, (pu1_pred_val), 4); 2033 pu1_dst += dst_strd; 2034 pu1_pred_val = u1_pred_vals_diag_121 + 6; 2035 memcpy(pu1_dst, (pu1_pred_val), 4); 2036 } 2037 else if (i4_min_cost == i4_cost[8]) 2038 { 2039 *u4_intra_mode = HORZ_U_I4x4; 2040 pu1_pred_val = u1_pred_vals_horz_u; 2041 memcpy(pu1_dst, (pu1_pred_val), 4); 2042 pu1_dst += dst_strd; 2043 memcpy(pu1_dst, (pu1_pred_val + 2), 4); 2044 pu1_dst += dst_strd; 2045 memcpy(pu1_dst, (pu1_pred_val + 4), 4); 2046 pu1_dst += dst_strd; 2047 memcpy(pu1_dst, (pu1_pred_val + 6), 4); 2048 pu1_dst += dst_strd; 2049 } 2050 2051 return; 2052 } 2053 2054 /** 2055 ****************************************************************************** 2056 * 2057 * @brief: 2058 * Evaluate best intr chroma mode (among VERT, HORZ and DC ) and do the prediction. 2059 * 2060 * @par Description 2061 * This function evaluates first three intra chroma modes and compute corresponding sad 2062 * and return the buffer predicted with best mode. 2063 * 2064 * @param[in] pu1_src 2065 * UWORD8 pointer to the source 2066 * 2067 * @param[in] pu1_ngbr_pels 2068 * UWORD8 pointer to neighbouring pels 2069 * 2070 * @param[out] pu1_dst 2071 * UWORD8 pointer to the destination 2072 * 2073 * @param[in] src_strd 2074 * integer source stride 2075 * 2076 * @param[in] dst_strd 2077 * integer destination stride 2078 * 2079 * @param[in] u4_n_avblty 2080 * availability of neighbouring pixels 2081 * 2082 * @param[in] u4_intra_mode 2083 * Pointer to the variable in which best mode is returned 2084 * 2085 * @param[in] pu4_sadmin 2086 * Pointer to the variable in which minimum sad is returned 2087 * 2088 * @param[in] u4_valid_intra_modes 2089 * Says what all modes are valid 2090 * 2091 * @return none 2092 * 2093 ****************************************************************************** 2094 */ 2095 void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src, 2096 UWORD8 *pu1_ngbr_pels, 2097 UWORD8 *pu1_dst, 2098 UWORD32 src_strd, 2099 UWORD32 dst_strd, 2100 WORD32 u4_n_avblty, 2101 UWORD32 *u4_intra_mode, 2102 WORD32 *pu4_sadmin, 2103 UWORD32 u4_valid_intra_modes) 2104 { 2105 UWORD8 *pu1_neighbour; 2106 UWORD8 *pu1_src_temp = pu1_src; 2107 UWORD8 left = 0, top = 0; 2108 WORD32 u4_dcval_u_l[2] = { 0, 0 }, /*sum left neighbours for 'U' ,two separate sets - sum of first four from top,and sum of four values from bottom */ 2109 u4_dcval_u_t[2] = { 0, 0 }; /*sum top neighbours for 'U'*/ 2110 2111 WORD32 u4_dcval_v_l[2] = { 0, 0 }, /*sum left neighbours for 'V'*/ 2112 u4_dcval_v_t[2] = { 0, 0 }; /*sum top neighbours for 'V'*/ 2113 2114 WORD32 i, j, row, col, i4_sad_vert = INT_MAX, i4_sad_horz = INT_MAX, 2115 i4_sad_dc = INT_MAX, i4_min_sad = INT_MAX; 2116 UWORD8 val_u, val_v; 2117 2118 WORD32 u4_dc_val[2][2][2];/* ----------- 2119 | | | Chroma can have four 2120 | 00 | 01 | separate dc value... 2121 ----------- u4_dc_val corresponds to this dc values 2122 | | | with u4_dc_val[2][2][U] and u4_dc_val[2][2][V] 2123 | 10 | 11 | 2124 ----------- */ 2125 left = (u4_n_avblty & LEFT_MB_AVAILABLE_MASK); 2126 top = (u4_n_avblty & TOP_MB_AVAILABLE_MASK) >> 2; 2127 2128 /*Evaluating HORZ*/ 2129 if (left)/* Ifleft available*/ 2130 { 2131 i4_sad_horz = 0; 2132 2133 for (i = 0; i < 8; i++) 2134 { 2135 val_v = pu1_ngbr_pels[15 - 2 * i]; 2136 val_u = pu1_ngbr_pels[15 - 2 * i - 1]; 2137 row = i / 4; 2138 u4_dcval_u_l[row] += val_u; 2139 u4_dcval_v_l[row] += val_v; 2140 for (j = 0; j < 8; j++) 2141 { 2142 i4_sad_horz += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for HORZ mode*/ 2143 i4_sad_horz += ABS(val_v - pu1_src_temp[2 * j + 1]); 2144 } 2145 2146 pu1_src_temp += src_strd; 2147 } 2148 u4_dcval_u_l[0] += 2; 2149 u4_dcval_u_l[1] += 2; 2150 u4_dcval_v_l[0] += 2; 2151 u4_dcval_v_l[1] += 2; 2152 } 2153 2154 /*Evaluating VERT**/ 2155 pu1_src_temp = pu1_src; 2156 if (top) /* top available*/ 2157 { 2158 i4_sad_vert = 0; 2159 2160 for (i = 0; i < 8; i++) 2161 { 2162 col = i / 4; 2163 2164 val_u = pu1_ngbr_pels[18 + i * 2]; 2165 val_v = pu1_ngbr_pels[18 + i * 2 + 1]; 2166 u4_dcval_u_t[col] += val_u; 2167 u4_dcval_v_t[col] += val_v; 2168 2169 for (j = 0; j < 16; j++) 2170 { 2171 i4_sad_vert += ABS(pu1_ngbr_pels[18 + j] - pu1_src_temp[j]);/* Finding SAD for VERT mode*/ 2172 } 2173 pu1_src_temp += src_strd; 2174 2175 } 2176 u4_dcval_u_t[0] += 2; 2177 u4_dcval_u_t[1] += 2; 2178 u4_dcval_v_t[0] += 2; 2179 u4_dcval_v_t[1] += 2; 2180 } 2181 2182 /* computing DC value*/ 2183 /* Equation 8-128 in spec*/ 2184 u4_dc_val[0][0][0] = (u4_dcval_u_l[0] + u4_dcval_u_t[0]) >> (1 + left + top); 2185 u4_dc_val[0][0][1] = (u4_dcval_v_l[0] + u4_dcval_v_t[0]) >> (1 + left + top); 2186 u4_dc_val[1][1][0] = (u4_dcval_u_l[1] + u4_dcval_u_t[1]) >> (1 + left + top); 2187 u4_dc_val[1][1][1] = (u4_dcval_v_l[1] + u4_dcval_v_t[1]) >> (1 + left + top); 2188 2189 if (top) 2190 { 2191 /* Equation 8-132 in spec*/ 2192 u4_dc_val[0][1][0] = (u4_dcval_u_t[1]) >> (1 + top); 2193 u4_dc_val[0][1][1] = (u4_dcval_v_t[1]) >> (1 + top); 2194 } 2195 else 2196 { 2197 u4_dc_val[0][1][0] = (u4_dcval_u_l[0]) >> (1 + left); 2198 u4_dc_val[0][1][1] = (u4_dcval_v_l[0]) >> (1 + left); 2199 } 2200 2201 if (left) 2202 { 2203 u4_dc_val[1][0][0] = (u4_dcval_u_l[1]) >> (1 + left); 2204 u4_dc_val[1][0][1] = (u4_dcval_v_l[1]) >> (1 + left); 2205 } 2206 else 2207 { 2208 u4_dc_val[1][0][0] = (u4_dcval_u_t[0]) >> (1 + top); 2209 u4_dc_val[1][0][1] = (u4_dcval_v_t[0]) >> (1 + top); 2210 } 2211 2212 if (!(left || top)) 2213 { 2214 /*none available*/ 2215 u4_dc_val[0][0][0] = u4_dc_val[0][0][1] = 2216 u4_dc_val[0][1][0] = u4_dc_val[0][1][1] = 2217 u4_dc_val[1][0][0] = u4_dc_val[1][0][1] = 2218 u4_dc_val[1][1][0] = u4_dc_val[1][1][1] = 128; 2219 } 2220 2221 /* Evaluating DC */ 2222 pu1_src_temp = pu1_src; 2223 i4_sad_dc = 0; 2224 for (i = 0; i < 8; i++) 2225 { 2226 for (j = 0; j < 8; j++) 2227 { 2228 col = j / 4; 2229 row = i / 4; 2230 val_u = u4_dc_val[row][col][0]; 2231 val_v = u4_dc_val[row][col][1]; 2232 2233 i4_sad_dc += ABS(val_u - pu1_src_temp[2 * j]);/* Finding SAD for DC mode*/ 2234 i4_sad_dc += ABS(val_v - pu1_src_temp[2 * j + 1]); 2235 } 2236 pu1_src_temp += src_strd; 2237 } 2238 2239 if ((u4_valid_intra_modes & 01) == 0)/* If DC is disabled*/ 2240 i4_sad_dc = INT_MAX; 2241 if ((u4_valid_intra_modes & 02) == 0)/* If HORZ is disabled*/ 2242 i4_sad_horz = INT_MAX; 2243 if ((u4_valid_intra_modes & 04) == 0)/* If VERT is disabled*/ 2244 i4_sad_vert = INT_MAX; 2245 2246 i4_min_sad = MIN3(i4_sad_horz, i4_sad_dc, i4_sad_vert); 2247 2248 /* Finding Minimum sad and doing corresponding prediction*/ 2249 if (i4_min_sad < *pu4_sadmin) 2250 { 2251 *pu4_sadmin = i4_min_sad; 2252 2253 if (i4_min_sad == i4_sad_dc) 2254 { 2255 *u4_intra_mode = DC_CH_I8x8; 2256 for (i = 0; i < 8; i++) 2257 { 2258 for (j = 0; j < 8; j++) 2259 { 2260 col = j / 4; 2261 row = i / 4; 2262 2263 pu1_dst[2 * j] = u4_dc_val[row][col][0]; 2264 pu1_dst[2 * j + 1] = u4_dc_val[row][col][1]; 2265 } 2266 pu1_dst += dst_strd; 2267 } 2268 } 2269 else if (i4_min_sad == i4_sad_horz) 2270 { 2271 *u4_intra_mode = HORZ_CH_I8x8; 2272 for (j = 0; j < 8; j++) 2273 { 2274 val_v = pu1_ngbr_pels[15 - 2 * j]; 2275 val_u = pu1_ngbr_pels[15 - 2 * j - 1]; 2276 2277 for (i = 0; i < 8; i++) 2278 { 2279 pu1_dst[2 * i] = val_u; 2280 pu1_dst[2 * i + 1] = val_v; 2281 2282 } 2283 pu1_dst += dst_strd; 2284 } 2285 } 2286 else 2287 { 2288 *u4_intra_mode = VERT_CH_I8x8; 2289 pu1_neighbour = pu1_ngbr_pels + 18; 2290 for (j = 0; j < 8; j++) 2291 { 2292 memcpy(pu1_dst, pu1_neighbour, MB_SIZE); 2293 pu1_dst += dst_strd; 2294 } 2295 } 2296 } 2297 2298 return; 2299 } 2300