1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 /* contains 19 int AVCHalfPel1_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh) 20 int AVCHalfPel2_SAD_MB(uint8 *ref,uint8 *blk,int dmin,int width) 21 int AVCHalfPel1_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width,int ih,int jh) 22 int AVCHalfPel2_SAD_Blk(uint8 *ref,uint8 *blk,int dmin,int width) 23 24 int AVCSAD_MB_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info) 25 int AVCSAD_MB_HP_HTFM_Collect(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info) 26 int AVCSAD_MB_HP_HTFM(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info) 27 int AVCSAD_Blk_HalfPel_C(uint8 *ref,uint8 *blk,int dmin,int width,int rx,int xh,int yh,void *extra_info) 28 */ 29 30 #include "avcenc_lib.h" 31 #include "sad_halfpel_inline.h" 32 33 #ifdef _SAD_STAT 34 uint32 num_sad_HP_MB = 0; 35 uint32 num_sad_HP_Blk = 0; 36 uint32 num_sad_HP_MB_call = 0; 37 uint32 num_sad_HP_Blk_call = 0; 38 #define NUM_SAD_HP_MB_CALL() num_sad_HP_MB_call++ 39 #define NUM_SAD_HP_MB() num_sad_HP_MB++ 40 #define NUM_SAD_HP_BLK_CALL() num_sad_HP_Blk_call++ 41 #define NUM_SAD_HP_BLK() num_sad_HP_Blk++ 42 #else 43 #define NUM_SAD_HP_MB_CALL() 44 #define NUM_SAD_HP_MB() 45 #define NUM_SAD_HP_BLK_CALL() 46 #define NUM_SAD_HP_BLK() 47 #endif 48 49 50 51 /*=============================================================== 52 Function: SAD_MB_HalfPel 53 Date: 09/17/2000 54 Purpose: Compute the SAD on the half-pel resolution 55 Input/Output: hmem is assumed to be a pointer to the starting 56 point of the search in the 33x33 matrix search region 57 Changes: 58 11/7/00: implemented MMX 59 ===============================================================*/ 60 /*================================================================== 61 Function: AVCSAD_MB_HalfPel_C 62 Date: 04/30/2001 63 Purpose: Compute SAD 16x16 between blk and ref in halfpel 64 resolution, 65 Changes: 66 ==================================================================*/ 67 /* One component is half-pel */ 68 int AVCSAD_MB_HalfPel_Cxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) 69 { 70 (void)(extra_info); 71 72 int i, j; 73 int sad = 0; 74 uint8 *kk, *p1, *p2, *p3, *p4; 75 // int sumref=0; 76 int temp; 77 int rx = dmin_rx & 0xFFFF; 78 79 NUM_SAD_HP_MB_CALL(); 80 81 p1 = ref; 82 p2 = ref + 1; 83 p3 = ref + rx; 84 p4 = ref + rx + 1; 85 kk = blk; 86 87 for (i = 0; i < 16; i++) 88 { 89 for (j = 0; j < 16; j++) 90 { 91 92 temp = ((p1[j] + p2[j] + p3[j] + p4[j] + 2) >> 2) - *kk++; 93 sad += AVC_ABS(temp); 94 } 95 96 NUM_SAD_HP_MB(); 97 98 if (sad > (int)((uint32)dmin_rx >> 16)) 99 return sad; 100 101 p1 += rx; 102 p3 += rx; 103 p2 += rx; 104 p4 += rx; 105 } 106 return sad; 107 } 108 109 int AVCSAD_MB_HalfPel_Cyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) 110 { 111 (void)(extra_info); 112 113 int i, j; 114 int sad = 0; 115 uint8 *kk, *p1, *p2; 116 // int sumref=0; 117 int temp; 118 int rx = dmin_rx & 0xFFFF; 119 120 NUM_SAD_HP_MB_CALL(); 121 122 p1 = ref; 123 p2 = ref + rx; /* either left/right or top/bottom pixel */ 124 kk = blk; 125 126 for (i = 0; i < 16; i++) 127 { 128 for (j = 0; j < 16; j++) 129 { 130 131 temp = ((p1[j] + p2[j] + 1) >> 1) - *kk++; 132 sad += AVC_ABS(temp); 133 } 134 135 NUM_SAD_HP_MB(); 136 137 if (sad > (int)((uint32)dmin_rx >> 16)) 138 return sad; 139 p1 += rx; 140 p2 += rx; 141 } 142 return sad; 143 } 144 145 int AVCSAD_MB_HalfPel_Cxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) 146 { 147 (void)(extra_info); 148 149 int i, j; 150 int sad = 0; 151 uint8 *kk, *p1; 152 int temp; 153 int rx = dmin_rx & 0xFFFF; 154 155 NUM_SAD_HP_MB_CALL(); 156 157 p1 = ref; 158 kk = blk; 159 160 for (i = 0; i < 16; i++) 161 { 162 for (j = 0; j < 16; j++) 163 { 164 165 temp = ((p1[j] + p1[j+1] + 1) >> 1) - *kk++; 166 sad += AVC_ABS(temp); 167 } 168 169 NUM_SAD_HP_MB(); 170 171 if (sad > (int)((uint32)dmin_rx >> 16)) 172 return sad; 173 p1 += rx; 174 } 175 return sad; 176 } 177 178 #ifdef HTFM /* HTFM with uniform subsampling implementation, 2/28/01 */ 179 180 //Checheck here 181 int AVCAVCSAD_MB_HP_HTFM_Collectxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) 182 { 183 int i, j; 184 int sad = 0; 185 uint8 *p1, *p2; 186 int rx = dmin_rx & 0xFFFF; 187 int refwx4 = rx << 2; 188 int saddata[16]; /* used when collecting flag (global) is on */ 189 int difmad, tmp, tmp2; 190 int madstar; 191 HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info; 192 int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg); 193 UInt *countbreak = &(htfm_stat->countbreak); 194 int *offsetRef = htfm_stat->offsetRef; 195 uint32 cur_word; 196 197 madstar = (uint32)dmin_rx >> 20; 198 199 NUM_SAD_HP_MB_CALL(); 200 201 blk -= 4; 202 203 for (i = 0; i < 16; i++) /* 16 stages */ 204 { 205 p1 = ref + offsetRef[i]; 206 p2 = p1 + rx; 207 208 j = 4;/* 4 lines */ 209 do 210 { 211 cur_word = *((uint32*)(blk += 4)); 212 tmp = p1[12] + p2[12]; 213 tmp2 = p1[13] + p2[13]; 214 tmp += tmp2; 215 tmp2 = (cur_word >> 24) & 0xFF; 216 tmp += 2; 217 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; 218 tmp = p1[8] + p2[8]; 219 tmp2 = p1[9] + p2[9]; 220 tmp += tmp2; 221 tmp2 = (cur_word >> 16) & 0xFF; 222 tmp += 2; 223 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; 224 tmp = p1[4] + p2[4]; 225 tmp2 = p1[5] + p2[5]; 226 tmp += tmp2; 227 tmp2 = (cur_word >> 8) & 0xFF; 228 tmp += 2; 229 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; 230 tmp2 = p1[1] + p2[1]; 231 tmp = p1[0] + p2[0]; 232 p1 += refwx4; 233 p2 += refwx4; 234 tmp += tmp2; 235 tmp2 = (cur_word & 0xFF); 236 tmp += 2; 237 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; 238 } 239 while (--j); 240 241 NUM_SAD_HP_MB(); 242 243 saddata[i] = sad; 244 245 if (i > 0) 246 { 247 if (sad > ((uint32)dmin_rx >> 16)) 248 { 249 difmad = saddata[0] - ((saddata[1] + 1) >> 1); 250 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); 251 (*countbreak)++; 252 return sad; 253 } 254 } 255 } 256 difmad = saddata[0] - ((saddata[1] + 1) >> 1); 257 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); 258 (*countbreak)++; 259 260 return sad; 261 } 262 263 int AVCAVCSAD_MB_HP_HTFM_Collectyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) 264 { 265 int i, j; 266 int sad = 0; 267 uint8 *p1, *p2; 268 int rx = dmin_rx & 0xFFFF; 269 int refwx4 = rx << 2; 270 int saddata[16]; /* used when collecting flag (global) is on */ 271 int difmad, tmp, tmp2; 272 int madstar; 273 HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info; 274 int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg); 275 UInt *countbreak = &(htfm_stat->countbreak); 276 int *offsetRef = htfm_stat->offsetRef; 277 uint32 cur_word; 278 279 madstar = (uint32)dmin_rx >> 20; 280 281 NUM_SAD_HP_MB_CALL(); 282 283 blk -= 4; 284 285 for (i = 0; i < 16; i++) /* 16 stages */ 286 { 287 p1 = ref + offsetRef[i]; 288 p2 = p1 + rx; 289 j = 4; 290 do 291 { 292 cur_word = *((uint32*)(blk += 4)); 293 tmp = p1[12]; 294 tmp2 = p2[12]; 295 tmp++; 296 tmp2 += tmp; 297 tmp = (cur_word >> 24) & 0xFF; 298 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 299 tmp = p1[8]; 300 tmp2 = p2[8]; 301 tmp++; 302 tmp2 += tmp; 303 tmp = (cur_word >> 16) & 0xFF; 304 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 305 tmp = p1[4]; 306 tmp2 = p2[4]; 307 tmp++; 308 tmp2 += tmp; 309 tmp = (cur_word >> 8) & 0xFF; 310 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 311 tmp = p1[0]; 312 p1 += refwx4; 313 tmp2 = p2[0]; 314 p2 += refwx4; 315 tmp++; 316 tmp2 += tmp; 317 tmp = (cur_word & 0xFF); 318 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 319 } 320 while (--j); 321 322 NUM_SAD_HP_MB(); 323 324 saddata[i] = sad; 325 326 if (i > 0) 327 { 328 if (sad > ((uint32)dmin_rx >> 16)) 329 { 330 difmad = saddata[0] - ((saddata[1] + 1) >> 1); 331 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); 332 (*countbreak)++; 333 return sad; 334 } 335 } 336 } 337 difmad = saddata[0] - ((saddata[1] + 1) >> 1); 338 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); 339 (*countbreak)++; 340 341 return sad; 342 } 343 344 int AVCAVCSAD_MB_HP_HTFM_Collectxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) 345 { 346 int i, j; 347 int sad = 0; 348 uint8 *p1; 349 int rx = dmin_rx & 0xFFFF; 350 int refwx4 = rx << 2; 351 int saddata[16]; /* used when collecting flag (global) is on */ 352 int difmad, tmp, tmp2; 353 int madstar; 354 HTFM_Stat *htfm_stat = (HTFM_Stat*) extra_info; 355 int *abs_dif_mad_avg = &(htfm_stat->abs_dif_mad_avg); 356 UInt *countbreak = &(htfm_stat->countbreak); 357 int *offsetRef = htfm_stat->offsetRef; 358 uint32 cur_word; 359 360 madstar = (uint32)dmin_rx >> 20; 361 362 NUM_SAD_HP_MB_CALL(); 363 364 blk -= 4; 365 366 for (i = 0; i < 16; i++) /* 16 stages */ 367 { 368 p1 = ref + offsetRef[i]; 369 370 j = 4; /* 4 lines */ 371 do 372 { 373 cur_word = *((uint32*)(blk += 4)); 374 tmp = p1[12]; 375 tmp2 = p1[13]; 376 tmp++; 377 tmp2 += tmp; 378 tmp = (cur_word >> 24) & 0xFF; 379 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 380 tmp = p1[8]; 381 tmp2 = p1[9]; 382 tmp++; 383 tmp2 += tmp; 384 tmp = (cur_word >> 16) & 0xFF; 385 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 386 tmp = p1[4]; 387 tmp2 = p1[5]; 388 tmp++; 389 tmp2 += tmp; 390 tmp = (cur_word >> 8) & 0xFF; 391 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 392 tmp = p1[0]; 393 tmp2 = p1[1]; 394 p1 += refwx4; 395 tmp++; 396 tmp2 += tmp; 397 tmp = (cur_word & 0xFF); 398 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 399 } 400 while (--j); 401 402 NUM_SAD_HP_MB(); 403 404 saddata[i] = sad; 405 406 if (i > 0) 407 { 408 if (sad > ((uint32)dmin_rx >> 16)) 409 { 410 difmad = saddata[0] - ((saddata[1] + 1) >> 1); 411 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); 412 (*countbreak)++; 413 return sad; 414 } 415 } 416 } 417 difmad = saddata[0] - ((saddata[1] + 1) >> 1); 418 (*abs_dif_mad_avg) += ((difmad > 0) ? difmad : -difmad); 419 (*countbreak)++; 420 421 return sad; 422 } 423 424 int AVCSAD_MB_HP_HTFMxhyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) 425 { 426 int i, j; 427 int sad = 0, tmp, tmp2; 428 uint8 *p1, *p2; 429 int rx = dmin_rx & 0xFFFF; 430 int refwx4 = rx << 2; 431 int sadstar = 0, madstar; 432 int *nrmlz_th = (int*) extra_info; 433 int *offsetRef = nrmlz_th + 32; 434 uint32 cur_word; 435 436 madstar = (uint32)dmin_rx >> 20; 437 438 NUM_SAD_HP_MB_CALL(); 439 440 blk -= 4; 441 442 for (i = 0; i < 16; i++) /* 16 stages */ 443 { 444 p1 = ref + offsetRef[i]; 445 p2 = p1 + rx; 446 447 j = 4; /* 4 lines */ 448 do 449 { 450 cur_word = *((uint32*)(blk += 4)); 451 tmp = p1[12] + p2[12]; 452 tmp2 = p1[13] + p2[13]; 453 tmp += tmp2; 454 tmp2 = (cur_word >> 24) & 0xFF; 455 tmp += 2; 456 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; 457 tmp = p1[8] + p2[8]; 458 tmp2 = p1[9] + p2[9]; 459 tmp += tmp2; 460 tmp2 = (cur_word >> 16) & 0xFF; 461 tmp += 2; 462 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; 463 tmp = p1[4] + p2[4]; 464 tmp2 = p1[5] + p2[5]; 465 tmp += tmp2; 466 tmp2 = (cur_word >> 8) & 0xFF; 467 tmp += 2; 468 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; 469 tmp2 = p1[1] + p2[1]; 470 tmp = p1[0] + p2[0]; 471 p1 += refwx4; 472 p2 += refwx4; 473 tmp += tmp2; 474 tmp2 = (cur_word & 0xFF); 475 tmp += 2; 476 sad = INTERP2_SUB_SAD(sad, tmp, tmp2);; 477 } 478 while (--j); 479 480 NUM_SAD_HP_MB(); 481 482 sadstar += madstar; 483 if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16)) 484 { 485 return 65536; 486 } 487 } 488 489 return sad; 490 } 491 492 int AVCSAD_MB_HP_HTFMyh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) 493 { 494 int i, j; 495 int sad = 0, tmp, tmp2; 496 uint8 *p1, *p2; 497 int rx = dmin_rx & 0xFFFF; 498 int refwx4 = rx << 2; 499 int sadstar = 0, madstar; 500 int *nrmlz_th = (int*) extra_info; 501 int *offsetRef = nrmlz_th + 32; 502 uint32 cur_word; 503 504 madstar = (uint32)dmin_rx >> 20; 505 506 NUM_SAD_HP_MB_CALL(); 507 508 blk -= 4; 509 510 for (i = 0; i < 16; i++) /* 16 stages */ 511 { 512 p1 = ref + offsetRef[i]; 513 p2 = p1 + rx; 514 j = 4; 515 do 516 { 517 cur_word = *((uint32*)(blk += 4)); 518 tmp = p1[12]; 519 tmp2 = p2[12]; 520 tmp++; 521 tmp2 += tmp; 522 tmp = (cur_word >> 24) & 0xFF; 523 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 524 tmp = p1[8]; 525 tmp2 = p2[8]; 526 tmp++; 527 tmp2 += tmp; 528 tmp = (cur_word >> 16) & 0xFF; 529 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 530 tmp = p1[4]; 531 tmp2 = p2[4]; 532 tmp++; 533 tmp2 += tmp; 534 tmp = (cur_word >> 8) & 0xFF; 535 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 536 tmp = p1[0]; 537 p1 += refwx4; 538 tmp2 = p2[0]; 539 p2 += refwx4; 540 tmp++; 541 tmp2 += tmp; 542 tmp = (cur_word & 0xFF); 543 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 544 } 545 while (--j); 546 547 NUM_SAD_HP_MB(); 548 sadstar += madstar; 549 if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16)) 550 { 551 return 65536; 552 } 553 } 554 555 return sad; 556 } 557 558 int AVCSAD_MB_HP_HTFMxh(uint8 *ref, uint8 *blk, int dmin_rx, void *extra_info) 559 { 560 int i, j; 561 int sad = 0, tmp, tmp2; 562 uint8 *p1; 563 int rx = dmin_rx & 0xFFFF; 564 int refwx4 = rx << 2; 565 int sadstar = 0, madstar; 566 int *nrmlz_th = (int*) extra_info; 567 int *offsetRef = nrmlz_th + 32; 568 uint32 cur_word; 569 570 madstar = (uint32)dmin_rx >> 20; 571 572 NUM_SAD_HP_MB_CALL(); 573 574 blk -= 4; 575 576 for (i = 0; i < 16; i++) /* 16 stages */ 577 { 578 p1 = ref + offsetRef[i]; 579 580 j = 4;/* 4 lines */ 581 do 582 { 583 cur_word = *((uint32*)(blk += 4)); 584 tmp = p1[12]; 585 tmp2 = p1[13]; 586 tmp++; 587 tmp2 += tmp; 588 tmp = (cur_word >> 24) & 0xFF; 589 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 590 tmp = p1[8]; 591 tmp2 = p1[9]; 592 tmp++; 593 tmp2 += tmp; 594 tmp = (cur_word >> 16) & 0xFF; 595 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 596 tmp = p1[4]; 597 tmp2 = p1[5]; 598 tmp++; 599 tmp2 += tmp; 600 tmp = (cur_word >> 8) & 0xFF; 601 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 602 tmp = p1[0]; 603 tmp2 = p1[1]; 604 p1 += refwx4; 605 tmp++; 606 tmp2 += tmp; 607 tmp = (cur_word & 0xFF); 608 sad = INTERP1_SUB_SAD(sad, tmp, tmp2);; 609 } 610 while (--j); 611 612 NUM_SAD_HP_MB(); 613 614 sadstar += madstar; 615 if (sad > sadstar - nrmlz_th[i] || sad > ((uint32)dmin_rx >> 16)) 616 { 617 return 65536; 618 } 619 } 620 621 return sad; 622 } 623 624 #endif /* HTFM */ 625 626 627 628 629 630