1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 #include "avcenc_lib.h" 19 20 #define MIN_GOP 1 /* minimum size of GOP, 1/23/01, need to be tested */ 21 22 #define DEFAULT_REF_IDX 0 /* always from the first frame in the reflist */ 23 24 #define ALL_CAND_EQUAL 10 /* any number greater than 5 will work */ 25 26 27 /* from TMN 3.2 */ 28 #define PREF_NULL_VEC 129 /* zero vector bias */ 29 #define PREF_16_VEC 129 /* 1MV bias versus 4MVs*/ 30 #define PREF_INTRA 3024//512 /* bias for INTRA coding */ 31 32 const static int tab_exclude[9][9] = // [last_loc][curr_loc] 33 { 34 {0, 0, 0, 0, 0, 0, 0, 0, 0}, 35 {0, 0, 0, 0, 1, 1, 1, 0, 0}, 36 {0, 0, 0, 0, 1, 1, 1, 1, 1}, 37 {0, 0, 0, 0, 0, 0, 1, 1, 1}, 38 {0, 1, 1, 0, 0, 0, 1, 1, 1}, 39 {0, 1, 1, 0, 0, 0, 0, 0, 1}, 40 {0, 1, 1, 1, 1, 0, 0, 0, 1}, 41 {0, 0, 1, 1, 1, 0, 0, 0, 0}, 42 {0, 0, 1, 1, 1, 1, 1, 0, 0} 43 }; //to decide whether to continue or compute 44 45 const static int refine_next[8][2] = /* [curr_k][increment] */ 46 { 47 {0, 0}, {2, 0}, {1, 1}, {0, 2}, { -1, 1}, { -2, 0}, { -1, -1}, {0, -2} 48 }; 49 50 #ifdef _SAD_STAT 51 uint32 num_MB = 0; 52 uint32 num_cand = 0; 53 #endif 54 55 /************************************************************************/ 56 #define TH_INTER_2 100 /* temporary for now */ 57 58 //#define FIXED_INTERPRED_MODE AVC_P16 59 #define FIXED_REF_IDX 0 60 #define FIXED_MVX 0 61 #define FIXED_MVY 0 62 63 // only use when AVC_P8 or AVC_P8ref0 64 #define FIXED_SUBMB_MODE AVC_4x4 65 /*************************************************************************/ 66 67 /* Initialize arrays necessary for motion search */ 68 AVCEnc_Status InitMotionSearchModule(AVCHandle *avcHandle) 69 { 70 AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject; 71 AVCRateControl *rateCtrl = encvid->rateCtrl; 72 int search_range = rateCtrl->mvRange; 73 int number_of_subpel_positions = 4 * (2 * search_range + 3); 74 int max_mv_bits, max_mvd; 75 int temp_bits = 0; 76 uint8 *mvbits; 77 int bits, imax, imin, i; 78 uint8* subpel_pred = (uint8*) encvid->subpel_pred; // all 16 sub-pel positions 79 80 81 while (number_of_subpel_positions > 0) 82 { 83 temp_bits++; 84 number_of_subpel_positions >>= 1; 85 } 86 87 max_mv_bits = 3 + 2 * temp_bits; 88 max_mvd = (1 << (max_mv_bits >> 1)) - 1; 89 90 encvid->mvbits_array = (uint8*) avcHandle->CBAVC_Malloc(encvid->avcHandle->userData, 91 sizeof(uint8) * (2 * max_mvd + 1), DEFAULT_ATTR); 92 93 if (encvid->mvbits_array == NULL) 94 { 95 return AVCENC_MEMORY_FAIL; 96 } 97 98 mvbits = encvid->mvbits = encvid->mvbits_array + max_mvd; 99 100 mvbits[0] = 1; 101 for (bits = 3; bits <= max_mv_bits; bits += 2) 102 { 103 imax = 1 << (bits >> 1); 104 imin = imax >> 1; 105 106 for (i = imin; i < imax; i++) mvbits[-i] = mvbits[i] = bits; 107 } 108 109 /* initialize half-pel search */ 110 encvid->hpel_cand[0] = subpel_pred + REF_CENTER; 111 encvid->hpel_cand[1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1 ; 112 encvid->hpel_cand[2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1; 113 encvid->hpel_cand[3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; 114 encvid->hpel_cand[4] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; 115 encvid->hpel_cand[5] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25; 116 encvid->hpel_cand[6] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; 117 encvid->hpel_cand[7] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; 118 encvid->hpel_cand[8] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; 119 120 /* For quarter-pel interpolation around best half-pel result */ 121 122 encvid->bilin_base[0][0] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; 123 encvid->bilin_base[0][1] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1; 124 encvid->bilin_base[0][2] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; 125 encvid->bilin_base[0][3] = subpel_pred + REF_CENTER; 126 127 128 encvid->bilin_base[1][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; 129 encvid->bilin_base[1][1] = subpel_pred + REF_CENTER - 24; 130 encvid->bilin_base[1][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; 131 encvid->bilin_base[1][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1; 132 133 encvid->bilin_base[2][0] = subpel_pred + REF_CENTER - 24; 134 encvid->bilin_base[2][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1; 135 encvid->bilin_base[2][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1; 136 encvid->bilin_base[2][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1; 137 138 encvid->bilin_base[3][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 1; 139 encvid->bilin_base[3][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 1; 140 encvid->bilin_base[3][2] = subpel_pred + REF_CENTER; 141 encvid->bilin_base[3][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; 142 143 encvid->bilin_base[4][0] = subpel_pred + REF_CENTER; 144 encvid->bilin_base[4][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; 145 encvid->bilin_base[4][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25; 146 encvid->bilin_base[4][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 25; 147 148 encvid->bilin_base[5][0] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; 149 encvid->bilin_base[5][1] = subpel_pred + REF_CENTER; 150 encvid->bilin_base[5][2] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; 151 encvid->bilin_base[5][3] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 25; 152 153 encvid->bilin_base[6][0] = subpel_pred + REF_CENTER - 1; 154 encvid->bilin_base[6][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; 155 encvid->bilin_base[6][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE + 24; 156 encvid->bilin_base[6][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; 157 158 encvid->bilin_base[7][0] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; 159 encvid->bilin_base[7][1] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; 160 encvid->bilin_base[7][2] = subpel_pred + REF_CENTER - 1; 161 encvid->bilin_base[7][3] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE + 24; 162 163 encvid->bilin_base[8][0] = subpel_pred + REF_CENTER - 25; 164 encvid->bilin_base[8][1] = subpel_pred + V0Q_H2Q * SUBPEL_PRED_BLK_SIZE; 165 encvid->bilin_base[8][2] = subpel_pred + V2Q_H0Q * SUBPEL_PRED_BLK_SIZE; 166 encvid->bilin_base[8][3] = subpel_pred + V2Q_H2Q * SUBPEL_PRED_BLK_SIZE; 167 168 169 return AVCENC_SUCCESS; 170 } 171 172 /* Clean-up memory */ 173 void CleanMotionSearchModule(AVCHandle *avcHandle) 174 { 175 AVCEncObject *encvid = (AVCEncObject*) avcHandle->AVCObject; 176 177 if (encvid->mvbits_array) 178 { 179 avcHandle->CBAVC_Free(avcHandle->userData, encvid->mvbits_array); 180 encvid->mvbits = NULL; 181 } 182 183 return ; 184 } 185 186 187 bool IntraDecisionABE(int *min_cost, uint8 *cur, int pitch, bool ave) 188 { 189 int j; 190 uint8 *out; 191 int temp, SBE; 192 OsclFloat ABE; 193 bool intra = true; 194 195 SBE = 0; 196 /* top neighbor */ 197 out = cur - pitch; 198 for (j = 0; j < 16; j++) 199 { 200 temp = out[j] - cur[j]; 201 SBE += ((temp >= 0) ? temp : -temp); 202 } 203 204 /* left neighbor */ 205 out = cur - 1; 206 out -= pitch; 207 cur -= pitch; 208 for (j = 0; j < 16; j++) 209 { 210 temp = *(out += pitch) - *(cur += pitch); 211 SBE += ((temp >= 0) ? temp : -temp); 212 } 213 214 /* compare mincost/384 and SBE/64 */ 215 ABE = SBE / 32.0; //ABE = SBE/64.0; // 216 if (ABE >= *min_cost / 256.0) //if( ABE*0.8 >= min_cost/384.0) // 217 { 218 intra = false; // no possibility of intra, just use inter 219 } 220 else 221 { 222 if (ave == true) 223 { 224 *min_cost = (*min_cost + (int)(SBE * 8)) >> 1; // possibility of intra, averaging the cost 225 } 226 else 227 { 228 *min_cost = (int)(SBE * 8); 229 } 230 } 231 232 return intra; 233 } 234 235 /******* main function for macroblock prediction for the entire frame ***/ 236 /* if turns out to be IDR frame, set video->nal_unit_type to AVC_NALTYPE_IDR */ 237 void AVCMotionEstimation(AVCEncObject *encvid) 238 { 239 AVCCommonObj *video = encvid->common; 240 int slice_type = video->slice_type; 241 AVCFrameIO *currInput = encvid->currInput; 242 AVCPictureData *refPic = video->RefPicList0[0]; 243 int i, j, k; 244 int mbwidth = video->PicWidthInMbs; 245 int mbheight = video->PicHeightInMbs; 246 int totalMB = video->PicSizeInMbs; 247 int pitch = currInput->pitch; 248 AVCMacroblock *currMB, *mblock = video->mblock; 249 AVCMV *mot_mb_16x16, *mot16x16 = encvid->mot16x16; 250 // AVCMV *mot_mb_16x8, *mot_mb_8x16, *mot_mb_8x8, etc; 251 AVCRateControl *rateCtrl = encvid->rateCtrl; 252 uint8 *intraSearch = encvid->intraSearch; 253 uint FS_en = encvid->fullsearch_enable; 254 255 int NumIntraSearch, start_i, numLoop, incr_i; 256 int mbnum, offset; 257 uint8 *cur, *best_cand[5]; 258 int totalSAD = 0; /* average SAD for rate control */ 259 int type_pred; 260 int abe_cost; 261 262 #ifdef HTFM 263 /***** HYPOTHESIS TESTING ********/ /* 2/28/01 */ 264 int collect = 0; 265 HTFM_Stat htfm_stat; 266 double newvar[16]; 267 double exp_lamda[15]; 268 /*********************************/ 269 #endif 270 int hp_guess = 0; 271 uint32 mv_uint32; 272 273 offset = 0; 274 275 if (slice_type == AVC_I_SLICE) 276 { 277 /* cannot do I16 prediction here because it needs full decoding. */ 278 for (i = 0; i < totalMB; i++) 279 { 280 encvid->min_cost[i] = 0x7FFFFFFF; /* max value for int */ 281 } 282 283 memset(intraSearch, 1, sizeof(uint8)*totalMB); 284 285 encvid->firstIntraRefreshMBIndx = 0; /* reset this */ 286 287 return ; 288 } 289 else // P_SLICE 290 { 291 for (i = 0; i < totalMB; i++) 292 { 293 mblock[i].mb_intra = 0; 294 } 295 memset(intraSearch, 1, sizeof(uint8)*totalMB); 296 } 297 298 if (refPic->padded == 0) 299 { 300 AVCPaddingEdge(refPic); 301 refPic->padded = 1; 302 } 303 /* Random INTRA update */ 304 if (rateCtrl->intraMBRate) 305 { 306 AVCRasterIntraUpdate(encvid, mblock, totalMB, rateCtrl->intraMBRate); 307 } 308 309 encvid->sad_extra_info = NULL; 310 #ifdef HTFM 311 /***** HYPOTHESIS TESTING ********/ 312 InitHTFM(video, &htfm_stat, newvar, &collect); 313 /*********************************/ 314 #endif 315 316 if ((rateCtrl->scdEnable == 1) 317 && ((rateCtrl->frame_rate < 5.0) || (video->sliceHdr->frame_num > MIN_GOP))) 318 /* do not try to detect a new scene if low frame rate and too close to previous I-frame */ 319 { 320 incr_i = 2; 321 numLoop = 2; 322 start_i = 1; 323 type_pred = 0; /* for initial candidate selection */ 324 } 325 else 326 { 327 incr_i = 1; 328 numLoop = 1; 329 start_i = 0; 330 type_pred = 2; 331 } 332 333 /* First pass, loop thru half the macroblock */ 334 /* determine scene change */ 335 /* Second pass, for the rest of macroblocks */ 336 NumIntraSearch = 0; // to be intra searched in the encoding loop. 337 while (numLoop--) 338 { 339 for (j = 0; j < mbheight; j++) 340 { 341 if (incr_i > 1) 342 start_i = (start_i == 0 ? 1 : 0) ; /* toggle 0 and 1 */ 343 344 offset = pitch * (j << 4) + (start_i << 4); 345 346 mbnum = j * mbwidth + start_i; 347 348 for (i = start_i; i < mbwidth; i += incr_i) 349 { 350 video->mbNum = mbnum; 351 video->currMB = currMB = mblock + mbnum; 352 mot_mb_16x16 = mot16x16 + mbnum; 353 354 cur = currInput->YCbCr[0] + offset; 355 356 if (currMB->mb_intra == 0) /* for INTER mode */ 357 { 358 #if defined(HTFM) 359 HTFMPrepareCurMB_AVC(encvid, &htfm_stat, cur, pitch); 360 #else 361 AVCPrepareCurMB(encvid, cur, pitch); 362 #endif 363 /************************************************************/ 364 /******** full-pel 1MV search **********************/ 365 366 AVCMBMotionSearch(encvid, cur, best_cand, i << 4, j << 4, type_pred, 367 FS_en, &hp_guess); 368 369 abe_cost = encvid->min_cost[mbnum] = mot_mb_16x16->sad; 370 371 /* set mbMode and MVs */ 372 currMB->mbMode = AVC_P16; 373 currMB->MBPartPredMode[0][0] = AVC_Pred_L0; 374 mv_uint32 = ((mot_mb_16x16->y) << 16) | ((mot_mb_16x16->x) & 0xffff); 375 for (k = 0; k < 32; k += 2) 376 { 377 currMB->mvL0[k>>1] = mv_uint32; 378 } 379 380 /* make a decision whether it should be tested for intra or not */ 381 if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0) 382 { 383 if (false == IntraDecisionABE(&abe_cost, cur, pitch, true)) 384 { 385 intraSearch[mbnum] = 0; 386 } 387 else 388 { 389 NumIntraSearch++; 390 rateCtrl->MADofMB[mbnum] = abe_cost; 391 } 392 } 393 else // boundary MBs, always do intra search 394 { 395 NumIntraSearch++; 396 } 397 398 totalSAD += (int) rateCtrl->MADofMB[mbnum];//mot_mb_16x16->sad; 399 } 400 else /* INTRA update, use for prediction */ 401 { 402 mot_mb_16x16[0].x = mot_mb_16x16[0].y = 0; 403 404 /* reset all other MVs to zero */ 405 /* mot_mb_16x8, mot_mb_8x16, mot_mb_8x8, etc. */ 406 abe_cost = encvid->min_cost[mbnum] = 0x7FFFFFFF; /* max value for int */ 407 408 if (i != mbwidth - 1 && j != mbheight - 1 && i != 0 && j != 0) 409 { 410 IntraDecisionABE(&abe_cost, cur, pitch, false); 411 412 rateCtrl->MADofMB[mbnum] = abe_cost; 413 totalSAD += abe_cost; 414 } 415 416 NumIntraSearch++ ; 417 /* cannot do I16 prediction here because it needs full decoding. */ 418 // intraSearch[mbnum] = 1; 419 420 } 421 422 mbnum += incr_i; 423 offset += (incr_i << 4); 424 425 } /* for i */ 426 } /* for j */ 427 428 /* since we cannot do intra/inter decision here, the SCD has to be 429 based on other criteria such as motion vectors coherency or the SAD */ 430 if (incr_i > 1 && numLoop) /* scene change on and first loop */ 431 { 432 //if(NumIntraSearch > ((totalMB>>3)<<1) + (totalMB>>3)) /* 75% of 50%MBs */ 433 if (NumIntraSearch*99 > (48*totalMB)) /* 20% of 50%MBs */ 434 /* need to do more investigation about this threshold since the NumIntraSearch 435 only show potential intra MBs, not the actual one */ 436 { 437 /* we can choose to just encode I_SLICE without IDR */ 438 //video->nal_unit_type = AVC_NALTYPE_IDR; 439 video->nal_unit_type = AVC_NALTYPE_SLICE; 440 video->sliceHdr->slice_type = AVC_I_ALL_SLICE; 441 video->slice_type = AVC_I_SLICE; 442 memset(intraSearch, 1, sizeof(uint8)*totalMB); 443 i = totalMB; 444 while (i--) 445 { 446 mblock[i].mb_intra = 1; 447 encvid->min_cost[i] = 0x7FFFFFFF; /* max value for int */ 448 } 449 450 rateCtrl->totalSAD = totalSAD * 2; /* SAD */ 451 452 return ; 453 } 454 } 455 /******** no scene change, continue motion search **********************/ 456 start_i = 0; 457 type_pred++; /* second pass */ 458 } 459 460 rateCtrl->totalSAD = totalSAD; /* SAD */ 461 462 #ifdef HTFM 463 /***** HYPOTHESIS TESTING ********/ 464 if (collect) 465 { 466 collect = 0; 467 UpdateHTFM(encvid, newvar, exp_lamda, &htfm_stat); 468 } 469 /*********************************/ 470 #endif 471 472 return ; 473 } 474 475 /*===================================================================== 476 Function: PaddingEdge 477 Date: 09/16/2000 478 Purpose: Pad edge of a Vop 479 =====================================================================*/ 480 481 void AVCPaddingEdge(AVCPictureData *refPic) 482 { 483 uint8 *src, *dst; 484 int i; 485 int pitch, width, height; 486 uint32 temp1, temp2; 487 488 width = refPic->width; 489 height = refPic->height; 490 pitch = refPic->pitch; 491 492 /* pad top */ 493 src = refPic->Sl; 494 495 temp1 = *src; /* top-left corner */ 496 temp2 = src[width-1]; /* top-right corner */ 497 temp1 |= (temp1 << 8); 498 temp1 |= (temp1 << 16); 499 temp2 |= (temp2 << 8); 500 temp2 |= (temp2 << 16); 501 502 dst = src - (pitch << 4); 503 504 *((uint32*)(dst - 16)) = temp1; 505 *((uint32*)(dst - 12)) = temp1; 506 *((uint32*)(dst - 8)) = temp1; 507 *((uint32*)(dst - 4)) = temp1; 508 509 memcpy(dst, src, width); 510 511 *((uint32*)(dst += width)) = temp2; 512 *((uint32*)(dst + 4)) = temp2; 513 *((uint32*)(dst + 8)) = temp2; 514 *((uint32*)(dst + 12)) = temp2; 515 516 dst = dst - width - 16; 517 518 i = 15; 519 while (i--) 520 { 521 memcpy(dst + pitch, dst, pitch); 522 dst += pitch; 523 } 524 525 /* pad sides */ 526 dst += (pitch + 16); 527 src = dst; 528 i = height; 529 while (i--) 530 { 531 temp1 = *src; 532 temp2 = src[width-1]; 533 temp1 |= (temp1 << 8); 534 temp1 |= (temp1 << 16); 535 temp2 |= (temp2 << 8); 536 temp2 |= (temp2 << 16); 537 538 *((uint32*)(dst - 16)) = temp1; 539 *((uint32*)(dst - 12)) = temp1; 540 *((uint32*)(dst - 8)) = temp1; 541 *((uint32*)(dst - 4)) = temp1; 542 543 *((uint32*)(dst += width)) = temp2; 544 *((uint32*)(dst + 4)) = temp2; 545 *((uint32*)(dst + 8)) = temp2; 546 *((uint32*)(dst + 12)) = temp2; 547 548 src += pitch; 549 dst = src; 550 } 551 552 /* pad bottom */ 553 dst -= 16; 554 i = 16; 555 while (i--) 556 { 557 memcpy(dst, dst - pitch, pitch); 558 dst += pitch; 559 } 560 561 562 return ; 563 } 564 565 /*=========================================================================== 566 Function: AVCRasterIntraUpdate 567 Date: 2/26/01 568 Purpose: To raster-scan assign INTRA-update . 569 N macroblocks are updated (also was programmable). 570 ===========================================================================*/ 571 void AVCRasterIntraUpdate(AVCEncObject *encvid, AVCMacroblock *mblock, int totalMB, int numRefresh) 572 { 573 int indx, i; 574 575 indx = encvid->firstIntraRefreshMBIndx; 576 for (i = 0; i < numRefresh && indx < totalMB; i++) 577 { 578 (mblock + indx)->mb_intra = 1; 579 encvid->intraSearch[indx++] = 1; 580 } 581 582 /* if read the end of frame, reset and loop around */ 583 if (indx >= totalMB - 1) 584 { 585 indx = 0; 586 while (i < numRefresh && indx < totalMB) 587 { 588 (mblock + indx)->mb_intra = 1; 589 encvid->intraSearch[indx++] = 1; 590 i++; 591 } 592 } 593 594 encvid->firstIntraRefreshMBIndx = indx; /* update with a new value */ 595 596 return ; 597 } 598 599 600 #ifdef HTFM 601 void InitHTFM(VideoEncData *encvid, HTFM_Stat *htfm_stat, double *newvar, int *collect) 602 { 603 AVCCommonObj *video = encvid->common; 604 int i; 605 int lx = video->currPic->width; // padding 606 int lx2 = lx << 1; 607 int lx3 = lx2 + lx; 608 int rx = video->currPic->pitch; 609 int rx2 = rx << 1; 610 int rx3 = rx2 + rx; 611 612 int *offset, *offset2; 613 614 /* 4/11/01, collect data every 30 frames, doesn't have to be base layer */ 615 if (((int)video->sliceHdr->frame_num) % 30 == 1) 616 { 617 618 *collect = 1; 619 620 htfm_stat->countbreak = 0; 621 htfm_stat->abs_dif_mad_avg = 0; 622 623 for (i = 0; i < 16; i++) 624 { 625 newvar[i] = 0.0; 626 } 627 // encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM_Collect; 628 encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM_Collect; 629 encvid->functionPointer->SAD_MB_HalfPel[0] = NULL; 630 encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFM_Collectxh; 631 encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFM_Collectyh; 632 encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFM_Collectxhyh; 633 encvid->sad_extra_info = (void*)(htfm_stat); 634 offset = htfm_stat->offsetArray; 635 offset2 = htfm_stat->offsetRef; 636 } 637 else 638 { 639 // encvid->functionPointer->SAD_MB_PADDING = &SAD_MB_PADDING_HTFM; 640 encvid->functionPointer->SAD_Macroblock = &SAD_MB_HTFM; 641 encvid->functionPointer->SAD_MB_HalfPel[0] = NULL; 642 encvid->functionPointer->SAD_MB_HalfPel[1] = &SAD_MB_HP_HTFMxh; 643 encvid->functionPointer->SAD_MB_HalfPel[2] = &SAD_MB_HP_HTFMyh; 644 encvid->functionPointer->SAD_MB_HalfPel[3] = &SAD_MB_HP_HTFMxhyh; 645 encvid->sad_extra_info = (void*)(encvid->nrmlz_th); 646 offset = encvid->nrmlz_th + 16; 647 offset2 = encvid->nrmlz_th + 32; 648 } 649 650 offset[0] = 0; 651 offset[1] = lx2 + 2; 652 offset[2] = 2; 653 offset[3] = lx2; 654 offset[4] = lx + 1; 655 offset[5] = lx3 + 3; 656 offset[6] = lx + 3; 657 offset[7] = lx3 + 1; 658 offset[8] = lx; 659 offset[9] = lx3 + 2; 660 offset[10] = lx3 ; 661 offset[11] = lx + 2 ; 662 offset[12] = 1; 663 offset[13] = lx2 + 3; 664 offset[14] = lx2 + 1; 665 offset[15] = 3; 666 667 offset2[0] = 0; 668 offset2[1] = rx2 + 2; 669 offset2[2] = 2; 670 offset2[3] = rx2; 671 offset2[4] = rx + 1; 672 offset2[5] = rx3 + 3; 673 offset2[6] = rx + 3; 674 offset2[7] = rx3 + 1; 675 offset2[8] = rx; 676 offset2[9] = rx3 + 2; 677 offset2[10] = rx3 ; 678 offset2[11] = rx + 2 ; 679 offset2[12] = 1; 680 offset2[13] = rx2 + 3; 681 offset2[14] = rx2 + 1; 682 offset2[15] = 3; 683 684 return ; 685 } 686 687 void UpdateHTFM(AVCEncObject *encvid, double *newvar, double *exp_lamda, HTFM_Stat *htfm_stat) 688 { 689 if (htfm_stat->countbreak == 0) 690 htfm_stat->countbreak = 1; 691 692 newvar[0] = (double)(htfm_stat->abs_dif_mad_avg) / (htfm_stat->countbreak * 16.); 693 694 if (newvar[0] < 0.001) 695 { 696 newvar[0] = 0.001; /* to prevent floating overflow */ 697 } 698 exp_lamda[0] = 1 / (newvar[0] * 1.4142136); 699 exp_lamda[1] = exp_lamda[0] * 1.5825; 700 exp_lamda[2] = exp_lamda[0] * 2.1750; 701 exp_lamda[3] = exp_lamda[0] * 3.5065; 702 exp_lamda[4] = exp_lamda[0] * 3.1436; 703 exp_lamda[5] = exp_lamda[0] * 3.5315; 704 exp_lamda[6] = exp_lamda[0] * 3.7449; 705 exp_lamda[7] = exp_lamda[0] * 4.5854; 706 exp_lamda[8] = exp_lamda[0] * 4.6191; 707 exp_lamda[9] = exp_lamda[0] * 5.4041; 708 exp_lamda[10] = exp_lamda[0] * 6.5974; 709 exp_lamda[11] = exp_lamda[0] * 10.5341; 710 exp_lamda[12] = exp_lamda[0] * 10.0719; 711 exp_lamda[13] = exp_lamda[0] * 12.0516; 712 exp_lamda[14] = exp_lamda[0] * 15.4552; 713 714 CalcThreshold(HTFM_Pf, exp_lamda, encvid->nrmlz_th); 715 return ; 716 } 717 718 719 void CalcThreshold(double pf, double exp_lamda[], int nrmlz_th[]) 720 { 721 int i; 722 double temp[15]; 723 // printf("\nLamda: "); 724 725 /* parametric PREMODELling */ 726 for (i = 0; i < 15; i++) 727 { 728 // printf("%g ",exp_lamda[i]); 729 if (pf < 0.5) 730 temp[i] = 1 / exp_lamda[i] * M4VENC_LOG(2 * pf); 731 else 732 temp[i] = -1 / exp_lamda[i] * M4VENC_LOG(2 * (1 - pf)); 733 } 734 735 nrmlz_th[15] = 0; 736 for (i = 0; i < 15; i++) /* scale upto no.pixels */ 737 nrmlz_th[i] = (int)(temp[i] * ((i + 1) << 4) + 0.5); 738 739 return ; 740 } 741 742 void HTFMPrepareCurMB_AVC(AVCEncObject *encvid, HTFM_Stat *htfm_stat, uint8 *cur, int pitch) 743 { 744 AVCCommonObj *video = encvid->common; 745 uint32 *htfmMB = (uint32*)(encvid->currYMB); 746 uint8 *ptr, byte; 747 int *offset; 748 int i; 749 uint32 word; 750 751 if (((int)video->sliceHdr->frame_num) % 30 == 1) 752 { 753 offset = htfm_stat->offsetArray; 754 } 755 else 756 { 757 offset = encvid->nrmlz_th + 16; 758 } 759 760 for (i = 0; i < 16; i++) 761 { 762 ptr = cur + offset[i]; 763 word = ptr[0]; 764 byte = ptr[4]; 765 word |= (byte << 8); 766 byte = ptr[8]; 767 word |= (byte << 16); 768 byte = ptr[12]; 769 word |= (byte << 24); 770 *htfmMB++ = word; 771 772 word = *(ptr += (pitch << 2)); 773 byte = ptr[4]; 774 word |= (byte << 8); 775 byte = ptr[8]; 776 word |= (byte << 16); 777 byte = ptr[12]; 778 word |= (byte << 24); 779 *htfmMB++ = word; 780 781 word = *(ptr += (pitch << 2)); 782 byte = ptr[4]; 783 word |= (byte << 8); 784 byte = ptr[8]; 785 word |= (byte << 16); 786 byte = ptr[12]; 787 word |= (byte << 24); 788 *htfmMB++ = word; 789 790 word = *(ptr += (pitch << 2)); 791 byte = ptr[4]; 792 word |= (byte << 8); 793 byte = ptr[8]; 794 word |= (byte << 16); 795 byte = ptr[12]; 796 word |= (byte << 24); 797 *htfmMB++ = word; 798 } 799 800 return ; 801 } 802 803 804 #endif // HTFM 805 806 void AVCPrepareCurMB(AVCEncObject *encvid, uint8 *cur, int pitch) 807 { 808 void* tmp = (void*)(encvid->currYMB); 809 uint32 *currYMB = (uint32*) tmp; 810 int i; 811 812 cur -= pitch; 813 814 for (i = 0; i < 16; i++) 815 { 816 *currYMB++ = *((uint32*)(cur += pitch)); 817 *currYMB++ = *((uint32*)(cur + 4)); 818 *currYMB++ = *((uint32*)(cur + 8)); 819 *currYMB++ = *((uint32*)(cur + 12)); 820 } 821 822 return ; 823 } 824 825 #ifdef FIXED_INTERPRED_MODE 826 827 /* due to the complexity of the predicted motion vector, we may not decide to skip 828 a macroblock here just yet. */ 829 /* We will find the best motion vector and the best intra prediction mode for each block. */ 830 /* output are 831 currMB->NumMbPart, currMB->MbPartWidth, currMB->MbPartHeight, 832 currMB->NumSubMbPart[], currMB->SubMbPartWidth[], currMB->SubMbPartHeight, 833 currMB->MBPartPredMode[][] (L0 or L1 or BiPred) 834 currMB->RefIdx[], currMB->ref_idx_L0[], 835 currMB->mvL0[], currMB->mvL1[] 836 */ 837 838 AVCEnc_Status AVCMBMotionSearch(AVCEncObject *encvid, AVCMacroblock *currMB, int mbNum, 839 int num_pass) 840 { 841 AVCCommonObj *video = encvid->common; 842 int mbPartIdx, subMbPartIdx; 843 int16 *mv; 844 int i; 845 int SubMbPartHeight, SubMbPartWidth, NumSubMbPart; 846 847 /* assign value to currMB->MBPartPredMode[][x],subMbMode[],NumSubMbPart[],SubMbPartWidth[],SubMbPartHeight[] */ 848 849 currMB->mbMode = FIXED_INTERPRED_MODE; 850 currMB->mb_intra = 0; 851 852 if (currMB->mbMode == AVC_P16) 853 { 854 currMB->NumMbPart = 1; 855 currMB->MbPartWidth = 16; 856 currMB->MbPartHeight = 16; 857 currMB->SubMbPartHeight[0] = 16; 858 currMB->SubMbPartWidth[0] = 16; 859 currMB->NumSubMbPart[0] = 1; 860 } 861 else if (currMB->mbMode == AVC_P16x8) 862 { 863 currMB->NumMbPart = 2; 864 currMB->MbPartWidth = 16; 865 currMB->MbPartHeight = 8; 866 for (i = 0; i < 2; i++) 867 { 868 currMB->SubMbPartWidth[i] = 16; 869 currMB->SubMbPartHeight[i] = 8; 870 currMB->NumSubMbPart[i] = 1; 871 } 872 } 873 else if (currMB->mbMode == AVC_P8x16) 874 { 875 currMB->NumMbPart = 2; 876 currMB->MbPartWidth = 8; 877 currMB->MbPartHeight = 16; 878 for (i = 0; i < 2; i++) 879 { 880 currMB->SubMbPartWidth[i] = 8; 881 currMB->SubMbPartHeight[i] = 16; 882 currMB->NumSubMbPart[i] = 1; 883 } 884 } 885 else if (currMB->mbMode == AVC_P8 || currMB->mbMode == AVC_P8ref0) 886 { 887 currMB->NumMbPart = 4; 888 currMB->MbPartWidth = 8; 889 currMB->MbPartHeight = 8; 890 if (FIXED_SUBMB_MODE == AVC_8x8) 891 { 892 SubMbPartHeight = 8; 893 SubMbPartWidth = 8; 894 NumSubMbPart = 1; 895 } 896 else if (FIXED_SUBMB_MODE == AVC_8x4) 897 { 898 SubMbPartHeight = 4; 899 SubMbPartWidth = 8; 900 NumSubMbPart = 2; 901 } 902 else if (FIXED_SUBMB_MODE == AVC_4x8) 903 { 904 SubMbPartHeight = 8; 905 SubMbPartWidth = 4; 906 NumSubMbPart = 2; 907 } 908 else if (FIXED_SUBMB_MODE == AVC_4x4) 909 { 910 SubMbPartHeight = 4; 911 SubMbPartWidth = 4; 912 NumSubMbPart = 4; 913 } 914 915 for (i = 0; i < 4; i++) 916 { 917 currMB->subMbMode[i] = FIXED_SUBMB_MODE; 918 currMB->SubMbPartHeight[i] = SubMbPartHeight; 919 currMB->SubMbPartWidth[i] = SubMbPartWidth; 920 currMB->NumSubMbPart[i] = NumSubMbPart; 921 } 922 } 923 else /* it's probably intra mode */ 924 { 925 return AVCENC_SUCCESS; 926 } 927 928 for (mbPartIdx = 0; mbPartIdx < 4; mbPartIdx++) 929 { 930 currMB->MBPartPredMode[mbPartIdx][0] = AVC_Pred_L0; 931 currMB->ref_idx_L0[mbPartIdx] = FIXED_REF_IDX; 932 currMB->RefIdx[mbPartIdx] = video->RefPicList0[FIXED_REF_IDX]->RefIdx; 933 934 for (subMbPartIdx = 0; subMbPartIdx < 4; subMbPartIdx++) 935 { 936 mv = (int16*)(currMB->mvL0 + (mbPartIdx << 2) + subMbPartIdx); 937 938 *mv++ = FIXED_MVX; 939 *mv = FIXED_MVY; 940 } 941 } 942 943 encvid->min_cost = 0; 944 945 return AVCENC_SUCCESS; 946 } 947 948 #else /* perform the search */ 949 950 /* This option #1 search is very similar to PV's MPEG4 motion search algorithm. 951 The search is done in hierarchical manner from 16x16 MB down to smaller and smaller 952 partition. At each level, a decision can be made to stop the search if the expected 953 prediction gain is not worth the computation. The decision can also be made at the finest 954 level for more fullsearch-like behavior with the price of heavier computation. */ 955 void AVCMBMotionSearch(AVCEncObject *encvid, uint8 *cur, uint8 *best_cand[], 956 int i0, int j0, int type_pred, int FS_en, int *hp_guess) 957 { 958 AVCCommonObj *video = encvid->common; 959 AVCPictureData *currPic = video->currPic; 960 AVCSeqParamSet *currSPS = video->currSeqParams; 961 AVCRateControl *rateCtrl = encvid->rateCtrl; 962 AVCMacroblock *currMB = video->currMB; 963 uint8 *ref, *cand, *ncand; 964 void *extra_info = encvid->sad_extra_info; 965 int mbnum = video->mbNum; 966 int width = currPic->width; /* 6/12/01, must be multiple of 16 */ 967 int height = currPic->height; 968 AVCMV *mot16x16 = encvid->mot16x16; 969 int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock; 970 971 int range = rateCtrl->mvRange; 972 973 int lx = currPic->pitch; /* padding */ 974 int i, j, imin, jmin, ilow, ihigh, jlow, jhigh; 975 int d, dmin, dn[9]; 976 int k; 977 int mvx[5], mvy[5]; 978 int num_can, center_again; 979 int last_loc, new_loc = 0; 980 int step, max_step = range >> 1; 981 int next; 982 983 int cmvx, cmvy; /* estimated predicted MV */ 984 int lev_idx; 985 int lambda_motion = encvid->lambda_motion; 986 uint8 *mvbits = encvid->mvbits; 987 int mvshift = 2; 988 int mvcost; 989 990 int min_sad = 65535; 991 992 ref = video->RefPicList0[DEFAULT_REF_IDX]->Sl; /* origin of actual frame */ 993 994 /* have to initialize these params, necessary for interprediction part */ 995 currMB->NumMbPart = 1; 996 currMB->SubMbPartHeight[0] = 16; 997 currMB->SubMbPartWidth[0] = 16; 998 currMB->NumSubMbPart[0] = 1; 999 currMB->ref_idx_L0[0] = currMB->ref_idx_L0[1] = 1000 currMB->ref_idx_L0[2] = currMB->ref_idx_L0[3] = DEFAULT_REF_IDX; 1001 currMB->ref_idx_L1[0] = currMB->ref_idx_L1[1] = 1002 currMB->ref_idx_L1[2] = currMB->ref_idx_L1[3] = DEFAULT_REF_IDX; 1003 currMB->RefIdx[0] = currMB->RefIdx[1] = 1004 currMB->RefIdx[2] = currMB->RefIdx[3] = video->RefPicList0[DEFAULT_REF_IDX]->RefIdx; 1005 1006 cur = encvid->currYMB; /* use smaller memory space for current MB */ 1007 1008 /* find limit of the search (adjusting search range)*/ 1009 lev_idx = mapLev2Idx[currSPS->level_idc]; 1010 1011 /* we can make this part dynamic based on previous statistics */ 1012 ilow = i0 - range; 1013 if (i0 - ilow > 2047) /* clip to conform with the standard */ 1014 { 1015 ilow = i0 - 2047; 1016 } 1017 if (ilow < -13) // change it from -15 to -13 because of 6-tap filter needs extra 2 lines. 1018 { 1019 ilow = -13; 1020 } 1021 1022 ihigh = i0 + range - 1; 1023 if (ihigh - i0 > 2047) /* clip to conform with the standard */ 1024 { 1025 ihigh = i0 + 2047; 1026 } 1027 if (ihigh > width - 3) 1028 { 1029 ihigh = width - 3; // change from width-1 to width-3 for the same reason as above 1030 } 1031 1032 jlow = j0 - range; 1033 if (j0 - jlow > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */ 1034 { 1035 jlow = j0 - MaxVmvR[lev_idx] + 1; 1036 } 1037 if (jlow < -13) // same reason as above 1038 { 1039 jlow = -13; 1040 } 1041 1042 jhigh = j0 + range - 1; 1043 if (jhigh - j0 > MaxVmvR[lev_idx] - 1) /* clip to conform with the standard */ 1044 { 1045 jhigh = j0 + MaxVmvR[lev_idx] - 1; 1046 } 1047 if (jhigh > height - 3) // same reason as above 1048 { 1049 jhigh = height - 3; 1050 } 1051 1052 /* find initial motion vector & predicted MV*/ 1053 AVCCandidateSelection(mvx, mvy, &num_can, i0 >> 4, j0 >> 4, encvid, type_pred, &cmvx, &cmvy); 1054 1055 imin = i0; 1056 jmin = j0; /* needed for fullsearch */ 1057 ncand = ref + i0 + j0 * lx; 1058 1059 /* for first row of MB, fullsearch can be used */ 1060 if (FS_en) 1061 { 1062 *hp_guess = 0; /* no guess for fast half-pel */ 1063 1064 dmin = AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy); 1065 1066 ncand = ref + imin + jmin * lx; 1067 } 1068 else 1069 { /* fullsearch the top row to only upto (0,3) MB */ 1070 /* upto 30% complexity saving with the same complexity */ 1071 if (video->PrevRefFrameNum == 0 && j0 == 0 && i0 <= 64 && type_pred != 1) 1072 { 1073 *hp_guess = 0; /* no guess for fast half-pel */ 1074 dmin = AVCFullSearch(encvid, ref, cur, &imin, &jmin, ilow, ihigh, jlow, jhigh, cmvx, cmvy); 1075 ncand = ref + imin + jmin * lx; 1076 } 1077 else 1078 { 1079 /************** initialize candidate **************************/ 1080 1081 dmin = 65535; 1082 1083 /* check if all are equal */ 1084 if (num_can == ALL_CAND_EQUAL) 1085 { 1086 i = i0 + mvx[0]; 1087 j = j0 + mvy[0]; 1088 1089 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh) 1090 { 1091 cand = ref + i + j * lx; 1092 1093 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info); 1094 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy); 1095 d += mvcost; 1096 1097 if (d < dmin) 1098 { 1099 dmin = d; 1100 imin = i; 1101 jmin = j; 1102 ncand = cand; 1103 min_sad = d - mvcost; // for rate control 1104 } 1105 } 1106 } 1107 else 1108 { 1109 /************** evaluate unique candidates **********************/ 1110 for (k = 0; k < num_can; k++) 1111 { 1112 i = i0 + mvx[k]; 1113 j = j0 + mvy[k]; 1114 1115 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh) 1116 { 1117 cand = ref + i + j * lx; 1118 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info); 1119 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy); 1120 d += mvcost; 1121 1122 if (d < dmin) 1123 { 1124 dmin = d; 1125 imin = i; 1126 jmin = j; 1127 ncand = cand; 1128 min_sad = d - mvcost; // for rate control 1129 } 1130 } 1131 } 1132 } 1133 1134 /******************* local refinement ***************************/ 1135 center_again = 0; 1136 last_loc = new_loc = 0; 1137 // ncand = ref + jmin*lx + imin; /* center of the search */ 1138 step = 0; 1139 dn[0] = dmin; 1140 while (!center_again && step <= max_step) 1141 { 1142 1143 AVCMoveNeighborSAD(dn, last_loc); 1144 1145 center_again = 1; 1146 i = imin; 1147 j = jmin - 1; 1148 cand = ref + i + j * lx; 1149 1150 /* starting from [0,-1] */ 1151 /* spiral check one step at a time*/ 1152 for (k = 2; k <= 8; k += 2) 1153 { 1154 if (!tab_exclude[last_loc][k]) /* exclude last step computation */ 1155 { /* not already computed */ 1156 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh) 1157 { 1158 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, extra_info); 1159 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy); 1160 d += mvcost; 1161 1162 dn[k] = d; /* keep it for half pel use */ 1163 1164 if (d < dmin) 1165 { 1166 ncand = cand; 1167 dmin = d; 1168 imin = i; 1169 jmin = j; 1170 center_again = 0; 1171 new_loc = k; 1172 min_sad = d - mvcost; // for rate control 1173 } 1174 } 1175 } 1176 if (k == 8) /* end side search*/ 1177 { 1178 if (!center_again) 1179 { 1180 k = -1; /* start diagonal search */ 1181 cand -= lx; 1182 j--; 1183 } 1184 } 1185 else 1186 { 1187 next = refine_next[k][0]; 1188 i += next; 1189 cand += next; 1190 next = refine_next[k][1]; 1191 j += next; 1192 cand += lx * next; 1193 } 1194 } 1195 last_loc = new_loc; 1196 step ++; 1197 } 1198 if (!center_again) 1199 AVCMoveNeighborSAD(dn, last_loc); 1200 1201 *hp_guess = AVCFindMin(dn); 1202 1203 encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0; 1204 } 1205 } 1206 1207 mot16x16[mbnum].sad = dmin; 1208 mot16x16[mbnum].x = (imin - i0) << 2; 1209 mot16x16[mbnum].y = (jmin - j0) << 2; 1210 best_cand[0] = ncand; 1211 1212 if (rateCtrl->subPelEnable) // always enable half-pel search 1213 { 1214 /* find half-pel resolution motion vector */ 1215 min_sad = AVCFindHalfPelMB(encvid, cur, mot16x16 + mbnum, best_cand[0], i0, j0, *hp_guess, cmvx, cmvy); 1216 1217 encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0; 1218 1219 1220 if (encvid->best_qpel_pos == -1) 1221 { 1222 ncand = encvid->hpel_cand[encvid->best_hpel_pos]; 1223 } 1224 else 1225 { 1226 ncand = encvid->qpel_cand[encvid->best_qpel_pos]; 1227 } 1228 } 1229 else 1230 { 1231 encvid->rateCtrl->MADofMB[mbnum] = min_sad / 256.0; 1232 } 1233 1234 /** do motion comp here for now */ 1235 ref = currPic->Sl + i0 + j0 * lx; 1236 /* copy from the best result to current Picture */ 1237 for (j = 0; j < 16; j++) 1238 { 1239 for (i = 0; i < 16; i++) 1240 { 1241 *ref++ = *ncand++; 1242 } 1243 ref += (lx - 16); 1244 ncand += 8; 1245 } 1246 1247 return ; 1248 } 1249 1250 #endif 1251 1252 /*=============================================================================== 1253 Function: AVCFullSearch 1254 Date: 09/16/2000 1255 Purpose: Perform full-search motion estimation over the range of search 1256 region in a spiral-outward manner. 1257 Input/Output: VideoEncData, current Vol, previou Vop, pointer to the left corner of 1258 current VOP, current coord (also output), boundaries. 1259 ===============================================================================*/ 1260 int AVCFullSearch(AVCEncObject *encvid, uint8 *prev, uint8 *cur, 1261 int *imin, int *jmin, int ilow, int ihigh, int jlow, int jhigh, 1262 int cmvx, int cmvy) 1263 { 1264 int range = encvid->rateCtrl->mvRange; 1265 AVCPictureData *currPic = encvid->common->currPic; 1266 uint8 *cand; 1267 int i, j, k, l; 1268 int d, dmin; 1269 int i0 = *imin; /* current position */ 1270 int j0 = *jmin; 1271 int (*SAD_Macroblock)(uint8*, uint8*, int, void*) = encvid->functionPointer->SAD_Macroblock; 1272 void *extra_info = encvid->sad_extra_info; 1273 int lx = currPic->pitch; /* with padding */ 1274 1275 int offset = i0 + j0 * lx; 1276 1277 int lambda_motion = encvid->lambda_motion; 1278 uint8 *mvbits = encvid->mvbits; 1279 int mvshift = 2; 1280 int mvcost; 1281 int min_sad; 1282 1283 cand = prev + offset; 1284 1285 dmin = (*SAD_Macroblock)(cand, cur, (65535 << 16) | lx, (void*)extra_info); 1286 mvcost = MV_COST(lambda_motion, mvshift, 0, 0, cmvx, cmvy); 1287 min_sad = dmin; 1288 dmin += mvcost; 1289 1290 /* perform spiral search */ 1291 for (k = 1; k <= range; k++) 1292 { 1293 1294 i = i0 - k; 1295 j = j0 - k; 1296 1297 cand = prev + i + j * lx; 1298 1299 for (l = 0; l < 8*k; l++) 1300 { 1301 /* no need for boundary checking again */ 1302 if (i >= ilow && i <= ihigh && j >= jlow && j <= jhigh) 1303 { 1304 d = (*SAD_Macroblock)(cand, cur, (dmin << 16) | lx, (void*)extra_info); 1305 mvcost = MV_COST(lambda_motion, mvshift, i - i0, j - j0, cmvx, cmvy); 1306 d += mvcost; 1307 1308 if (d < dmin) 1309 { 1310 dmin = d; 1311 *imin = i; 1312 *jmin = j; 1313 min_sad = d - mvcost; 1314 } 1315 } 1316 1317 if (l < (k << 1)) 1318 { 1319 i++; 1320 cand++; 1321 } 1322 else if (l < (k << 2)) 1323 { 1324 j++; 1325 cand += lx; 1326 } 1327 else if (l < ((k << 2) + (k << 1))) 1328 { 1329 i--; 1330 cand--; 1331 } 1332 else 1333 { 1334 j--; 1335 cand -= lx; 1336 } 1337 } 1338 } 1339 1340 encvid->rateCtrl->MADofMB[encvid->common->mbNum] = (min_sad / 256.0); // for rate control 1341 1342 return dmin; 1343 } 1344 1345 /*=============================================================================== 1346 Function: AVCCandidateSelection 1347 Date: 09/16/2000 1348 Purpose: Fill up the list of candidate using spatio-temporal correlation 1349 among neighboring blocks. 1350 Input/Output: type_pred = 0: first pass, 1: second pass, or no SCD 1351 Modified: , 09/23/01, get rid of redundant candidates before passing back. 1352 , 09/11/07, added return for modified predicted MV, this will be 1353 needed for both fast search and fullsearch. 1354 ===============================================================================*/ 1355 1356 void AVCCandidateSelection(int *mvx, int *mvy, int *num_can, int imb, int jmb, 1357 AVCEncObject *encvid, int type_pred, int *cmvx, int *cmvy) 1358 { 1359 AVCCommonObj *video = encvid->common; 1360 AVCMV *mot16x16 = encvid->mot16x16; 1361 AVCMV *pmot; 1362 int mbnum = video->mbNum; 1363 int mbwidth = video->PicWidthInMbs; 1364 int mbheight = video->PicHeightInMbs; 1365 int i, j, same, num1; 1366 1367 /* this part is for predicted MV */ 1368 int pmvA_x = 0, pmvA_y = 0, pmvB_x = 0, pmvB_y = 0, pmvC_x = 0, pmvC_y = 0; 1369 int availA = 0, availB = 0, availC = 0; 1370 1371 *num_can = 0; 1372 1373 if (video->PrevRefFrameNum != 0) // previous frame is an IDR frame 1374 { 1375 /* Spatio-Temporal Candidate (five candidates) */ 1376 if (type_pred == 0) /* first pass */ 1377 { 1378 pmot = &mot16x16[mbnum]; /* same coordinate previous frame */ 1379 mvx[(*num_can)] = (pmot->x) >> 2; 1380 mvy[(*num_can)++] = (pmot->y) >> 2; 1381 if (imb >= (mbwidth >> 1) && imb > 0) /*left neighbor previous frame */ 1382 { 1383 pmot = &mot16x16[mbnum-1]; 1384 mvx[(*num_can)] = (pmot->x) >> 2; 1385 mvy[(*num_can)++] = (pmot->y) >> 2; 1386 } 1387 else if (imb + 1 < mbwidth) /*right neighbor previous frame */ 1388 { 1389 pmot = &mot16x16[mbnum+1]; 1390 mvx[(*num_can)] = (pmot->x) >> 2; 1391 mvy[(*num_can)++] = (pmot->y) >> 2; 1392 } 1393 1394 if (jmb < mbheight - 1) /*bottom neighbor previous frame */ 1395 { 1396 pmot = &mot16x16[mbnum+mbwidth]; 1397 mvx[(*num_can)] = (pmot->x) >> 2; 1398 mvy[(*num_can)++] = (pmot->y) >> 2; 1399 } 1400 else if (jmb > 0) /*upper neighbor previous frame */ 1401 { 1402 pmot = &mot16x16[mbnum-mbwidth]; 1403 mvx[(*num_can)] = (pmot->x) >> 2; 1404 mvy[(*num_can)++] = (pmot->y) >> 2; 1405 } 1406 1407 if (imb > 0 && jmb > 0) /* upper-left neighbor current frame*/ 1408 { 1409 pmot = &mot16x16[mbnum-mbwidth-1]; 1410 mvx[(*num_can)] = (pmot->x) >> 2; 1411 mvy[(*num_can)++] = (pmot->y) >> 2; 1412 } 1413 if (jmb > 0 && imb < mbheight - 1) /* upper right neighbor current frame*/ 1414 { 1415 pmot = &mot16x16[mbnum-mbwidth+1]; 1416 mvx[(*num_can)] = (pmot->x) >> 2; 1417 mvy[(*num_can)++] = (pmot->y) >> 2; 1418 } 1419 } 1420 else /* second pass */ 1421 /* original ST1 algorithm */ 1422 { 1423 pmot = &mot16x16[mbnum]; /* same coordinate previous frame */ 1424 mvx[(*num_can)] = (pmot->x) >> 2; 1425 mvy[(*num_can)++] = (pmot->y) >> 2; 1426 1427 if (imb > 0) /*left neighbor current frame */ 1428 { 1429 pmot = &mot16x16[mbnum-1]; 1430 mvx[(*num_can)] = (pmot->x) >> 2; 1431 mvy[(*num_can)++] = (pmot->y) >> 2; 1432 } 1433 if (jmb > 0) /*upper neighbor current frame */ 1434 { 1435 pmot = &mot16x16[mbnum-mbwidth]; 1436 mvx[(*num_can)] = (pmot->x) >> 2; 1437 mvy[(*num_can)++] = (pmot->y) >> 2; 1438 } 1439 if (imb < mbwidth - 1) /*right neighbor previous frame */ 1440 { 1441 pmot = &mot16x16[mbnum+1]; 1442 mvx[(*num_can)] = (pmot->x) >> 2; 1443 mvy[(*num_can)++] = (pmot->y) >> 2; 1444 } 1445 if (jmb < mbheight - 1) /*bottom neighbor previous frame */ 1446 { 1447 pmot = &mot16x16[mbnum+mbwidth]; 1448 mvx[(*num_can)] = (pmot->x) >> 2; 1449 mvy[(*num_can)++] = (pmot->y) >> 2; 1450 } 1451 } 1452 1453 /* get predicted MV */ 1454 if (imb > 0) /* get MV from left (A) neighbor either on current or previous frame */ 1455 { 1456 availA = 1; 1457 pmot = &mot16x16[mbnum-1]; 1458 pmvA_x = pmot->x; 1459 pmvA_y = pmot->y; 1460 } 1461 1462 if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */ 1463 { 1464 availB = 1; 1465 pmot = &mot16x16[mbnum-mbwidth]; 1466 pmvB_x = pmot->x; 1467 pmvB_y = pmot->y; 1468 1469 availC = 1; 1470 1471 if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */ 1472 { 1473 pmot = &mot16x16[mbnum-mbwidth+1]; 1474 } 1475 else /* get MV from top-left (D) neighbor of current frame */ 1476 { 1477 pmot = &mot16x16[mbnum-mbwidth-1]; 1478 } 1479 pmvC_x = pmot->x; 1480 pmvC_y = pmot->y; 1481 } 1482 1483 } 1484 else /* only Spatial Candidate (four candidates)*/ 1485 { 1486 if (type_pred == 0) /*first pass*/ 1487 { 1488 if (imb > 1) /* neighbor two blocks away to the left */ 1489 { 1490 pmot = &mot16x16[mbnum-2]; 1491 mvx[(*num_can)] = (pmot->x) >> 2; 1492 mvy[(*num_can)++] = (pmot->y) >> 2; 1493 } 1494 if (imb > 0 && jmb > 0) /* upper-left neighbor */ 1495 { 1496 pmot = &mot16x16[mbnum-mbwidth-1]; 1497 mvx[(*num_can)] = (pmot->x) >> 2; 1498 mvy[(*num_can)++] = (pmot->y) >> 2; 1499 } 1500 if (jmb > 0 && imb < mbheight - 1) /* upper right neighbor */ 1501 { 1502 pmot = &mot16x16[mbnum-mbwidth+1]; 1503 mvx[(*num_can)] = (pmot->x) >> 2; 1504 mvy[(*num_can)++] = (pmot->y) >> 2; 1505 } 1506 1507 /* get predicted MV */ 1508 if (imb > 1) /* get MV from 2nd left (A) neighbor either of current frame */ 1509 { 1510 availA = 1; 1511 pmot = &mot16x16[mbnum-2]; 1512 pmvA_x = pmot->x; 1513 pmvA_y = pmot->y; 1514 } 1515 1516 if (jmb > 0 && imb > 0) /* get MV from top-left (B) neighbor of current frame */ 1517 { 1518 availB = 1; 1519 pmot = &mot16x16[mbnum-mbwidth-1]; 1520 pmvB_x = pmot->x; 1521 pmvB_y = pmot->y; 1522 } 1523 1524 if (jmb > 0 && imb < mbwidth - 1) 1525 { 1526 availC = 1; 1527 pmot = &mot16x16[mbnum-mbwidth+1]; 1528 pmvC_x = pmot->x; 1529 pmvC_y = pmot->y; 1530 } 1531 } 1532 //#ifdef SCENE_CHANGE_DETECTION 1533 /* second pass (ST2 algorithm)*/ 1534 else 1535 { 1536 if (type_pred == 1) /* 4/7/01 */ 1537 { 1538 if (imb > 0) /*left neighbor current frame */ 1539 { 1540 pmot = &mot16x16[mbnum-1]; 1541 mvx[(*num_can)] = (pmot->x) >> 2; 1542 mvy[(*num_can)++] = (pmot->y) >> 2; 1543 } 1544 if (jmb > 0) /*upper neighbor current frame */ 1545 { 1546 pmot = &mot16x16[mbnum-mbwidth]; 1547 mvx[(*num_can)] = (pmot->x) >> 2; 1548 mvy[(*num_can)++] = (pmot->y) >> 2; 1549 } 1550 if (imb < mbwidth - 1) /*right neighbor current frame */ 1551 { 1552 pmot = &mot16x16[mbnum+1]; 1553 mvx[(*num_can)] = (pmot->x) >> 2; 1554 mvy[(*num_can)++] = (pmot->y) >> 2; 1555 } 1556 if (jmb < mbheight - 1) /*bottom neighbor current frame */ 1557 { 1558 pmot = &mot16x16[mbnum+mbwidth]; 1559 mvx[(*num_can)] = (pmot->x) >> 2; 1560 mvy[(*num_can)++] = (pmot->y) >> 2; 1561 } 1562 } 1563 //#else 1564 else /* original ST1 algorithm */ 1565 { 1566 if (imb > 0) /*left neighbor current frame */ 1567 { 1568 pmot = &mot16x16[mbnum-1]; 1569 mvx[(*num_can)] = (pmot->x) >> 2; 1570 mvy[(*num_can)++] = (pmot->y) >> 2; 1571 1572 if (jmb > 0) /*upper-left neighbor current frame */ 1573 { 1574 pmot = &mot16x16[mbnum-mbwidth-1]; 1575 mvx[(*num_can)] = (pmot->x) >> 2; 1576 mvy[(*num_can)++] = (pmot->y) >> 2; 1577 } 1578 1579 } 1580 if (jmb > 0) /*upper neighbor current frame */ 1581 { 1582 pmot = &mot16x16[mbnum-mbwidth]; 1583 mvx[(*num_can)] = (pmot->x) >> 2; 1584 mvy[(*num_can)++] = (pmot->y) >> 2; 1585 1586 if (imb < mbheight - 1) /*upper-right neighbor current frame */ 1587 { 1588 pmot = &mot16x16[mbnum-mbwidth+1]; 1589 mvx[(*num_can)] = (pmot->x) >> 2; 1590 mvy[(*num_can)++] = (pmot->y) >> 2; 1591 } 1592 } 1593 } 1594 1595 /* get predicted MV */ 1596 if (imb > 0) /* get MV from left (A) neighbor either on current or previous frame */ 1597 { 1598 availA = 1; 1599 pmot = &mot16x16[mbnum-1]; 1600 pmvA_x = pmot->x; 1601 pmvA_y = pmot->y; 1602 } 1603 1604 if (jmb > 0) /* get MV from top (B) neighbor either on current or previous frame */ 1605 { 1606 availB = 1; 1607 pmot = &mot16x16[mbnum-mbwidth]; 1608 pmvB_x = pmot->x; 1609 pmvB_y = pmot->y; 1610 1611 availC = 1; 1612 1613 if (imb < mbwidth - 1) /* get MV from top-right (C) neighbor of current frame */ 1614 { 1615 pmot = &mot16x16[mbnum-mbwidth+1]; 1616 } 1617 else /* get MV from top-left (D) neighbor of current frame */ 1618 { 1619 pmot = &mot16x16[mbnum-mbwidth-1]; 1620 } 1621 pmvC_x = pmot->x; 1622 pmvC_y = pmot->y; 1623 } 1624 } 1625 //#endif 1626 } 1627 1628 /* 3/23/01, remove redundant candidate (possible k-mean) */ 1629 num1 = *num_can; 1630 *num_can = 1; 1631 for (i = 1; i < num1; i++) 1632 { 1633 same = 0; 1634 j = 0; 1635 while (!same && j < *num_can) 1636 { 1637 #if (CANDIDATE_DISTANCE==0) 1638 if (mvx[i] == mvx[j] && mvy[i] == mvy[j]) 1639 #else 1640 // modified k-mean, 3/24/01, shouldn't be greater than 3 1641 if (AVC_ABS(mvx[i] - mvx[j]) + AVC_ABS(mvy[i] - mvy[j]) < CANDIDATE_DISTANCE) 1642 #endif 1643 same = 1; 1644 j++; 1645 } 1646 if (!same) 1647 { 1648 mvx[*num_can] = mvx[i]; 1649 mvy[*num_can] = mvy[i]; 1650 (*num_can)++; 1651 } 1652 } 1653 1654 if (num1 == 5 && *num_can == 1) 1655 *num_can = ALL_CAND_EQUAL; /* all are equal */ 1656 1657 /* calculate predicted MV */ 1658 1659 if (availA && !(availB || availC)) 1660 { 1661 *cmvx = pmvA_x; 1662 *cmvy = pmvA_y; 1663 } 1664 else 1665 { 1666 *cmvx = AVC_MEDIAN(pmvA_x, pmvB_x, pmvC_x); 1667 *cmvy = AVC_MEDIAN(pmvA_y, pmvB_y, pmvC_y); 1668 } 1669 1670 return ; 1671 } 1672 1673 1674 /************************************************************* 1675 Function: AVCMoveNeighborSAD 1676 Date: 3/27/01 1677 Purpose: Move neighboring SAD around when center has shifted 1678 *************************************************************/ 1679 1680 void AVCMoveNeighborSAD(int dn[], int new_loc) 1681 { 1682 int tmp[9]; 1683 tmp[0] = dn[0]; 1684 tmp[1] = dn[1]; 1685 tmp[2] = dn[2]; 1686 tmp[3] = dn[3]; 1687 tmp[4] = dn[4]; 1688 tmp[5] = dn[5]; 1689 tmp[6] = dn[6]; 1690 tmp[7] = dn[7]; 1691 tmp[8] = dn[8]; 1692 dn[0] = dn[1] = dn[2] = dn[3] = dn[4] = dn[5] = dn[6] = dn[7] = dn[8] = 65536; 1693 1694 switch (new_loc) 1695 { 1696 case 0: 1697 break; 1698 case 1: 1699 dn[4] = tmp[2]; 1700 dn[5] = tmp[0]; 1701 dn[6] = tmp[8]; 1702 break; 1703 case 2: 1704 dn[4] = tmp[3]; 1705 dn[5] = tmp[4]; 1706 dn[6] = tmp[0]; 1707 dn[7] = tmp[8]; 1708 dn[8] = tmp[1]; 1709 break; 1710 case 3: 1711 dn[6] = tmp[4]; 1712 dn[7] = tmp[0]; 1713 dn[8] = tmp[2]; 1714 break; 1715 case 4: 1716 dn[1] = tmp[2]; 1717 dn[2] = tmp[3]; 1718 dn[6] = tmp[5]; 1719 dn[7] = tmp[6]; 1720 dn[8] = tmp[0]; 1721 break; 1722 case 5: 1723 dn[1] = tmp[0]; 1724 dn[2] = tmp[4]; 1725 dn[8] = tmp[6]; 1726 break; 1727 case 6: 1728 dn[1] = tmp[8]; 1729 dn[2] = tmp[0]; 1730 dn[3] = tmp[4]; 1731 dn[4] = tmp[5]; 1732 dn[8] = tmp[7]; 1733 break; 1734 case 7: 1735 dn[2] = tmp[8]; 1736 dn[3] = tmp[0]; 1737 dn[4] = tmp[6]; 1738 break; 1739 case 8: 1740 dn[2] = tmp[1]; 1741 dn[3] = tmp[2]; 1742 dn[4] = tmp[0]; 1743 dn[5] = tmp[6]; 1744 dn[6] = tmp[7]; 1745 break; 1746 } 1747 dn[0] = tmp[new_loc]; 1748 1749 return ; 1750 } 1751 1752 /* 3/28/01, find minimal of dn[9] */ 1753 1754 int AVCFindMin(int dn[]) 1755 { 1756 int min, i; 1757 int dmin; 1758 1759 dmin = dn[1]; 1760 min = 1; 1761 for (i = 2; i < 9; i++) 1762 { 1763 if (dn[i] < dmin) 1764 { 1765 dmin = dn[i]; 1766 min = i; 1767 } 1768 } 1769 1770 return min; 1771 } 1772 1773 1774 1775