1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /*------------------------------------------------------------------------------ 18 19 Table of contents 20 21 1. Include headers 22 2. External compiler flags 23 3. Module defines 24 4. Local function prototypes 25 5. Functions 26 27 ------------------------------------------------------------------------------*/ 28 29 /*------------------------------------------------------------------------------ 30 1. Include headers 31 ------------------------------------------------------------------------------*/ 32 33 #include "basetype.h" 34 #include "h264bsd_reconstruct.h" 35 #include "h264bsd_macroblock_layer.h" 36 #include "h264bsd_image.h" 37 #include "h264bsd_util.h" 38 39 #ifdef H264DEC_OMXDL 40 #include "omxtypes.h" 41 #include "omxVC.h" 42 #include "armVC.h" 43 #endif /* H264DEC_OMXDL */ 44 45 #define UNUSED(x) (void)(x) 46 47 /*------------------------------------------------------------------------------ 48 2. External compiler flags 49 -------------------------------------------------------------------------------- 50 51 -------------------------------------------------------------------------------- 52 3. Module defines 53 ------------------------------------------------------------------------------*/ 54 55 /* Switch off the following Lint messages for this file: 56 * Info 701: Shift left of signed quantity (int) 57 * Info 702: Shift right of signed quantity (int) 58 */ 59 /*lint -e701 -e702 */ 60 61 /* Luma fractional-sample positions 62 * 63 * G a b c H 64 * d e f g 65 * h i j k m 66 * n p q r 67 * M s N 68 * 69 * G, H, M and N are integer sample positions 70 * a-s are fractional samples that need to be interpolated. 71 */ 72 #ifndef H264DEC_OMXDL 73 static const u32 lumaFracPos[4][4] = { 74 /* G d h n a e i p b f j q c g k r */ 75 {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}}; 76 #endif /* H264DEC_OMXDL */ 77 78 /* clipping table, defined in h264bsd_intra_prediction.c */ 79 extern const u8 h264bsdClip[]; 80 81 /*------------------------------------------------------------------------------ 82 4. Local function prototypes 83 ------------------------------------------------------------------------------*/ 84 85 #ifndef H264DEC_OMXDL 86 87 /*------------------------------------------------------------------------------ 88 89 Function: h264bsdInterpolateChromaHor 90 91 Functional description: 92 This function performs chroma interpolation in horizontal direction. 93 Overfilling is done only if needed. Reference image (pRef) is 94 read at correct position and the predicted part is written to 95 macroblock's chrominance (predPartChroma) 96 Inputs: 97 pRef pointer to reference frame Cb top-left corner 98 x0 integer x-coordinate for prediction 99 y0 integer y-coordinate for prediction 100 width width of the reference frame chrominance in pixels 101 height height of the reference frame chrominance in pixels 102 xFrac horizontal fraction for prediction in 1/8 pixels 103 chromaPartWidth width of the predicted part in pixels 104 chromaPartHeight height of the predicted part in pixels 105 Outputs: 106 predPartChroma pointer where predicted part is written 107 108 ------------------------------------------------------------------------------*/ 109 #ifndef H264DEC_ARM11 110 void h264bsdInterpolateChromaHor( 111 u8 *pRef, 112 u8 *predPartChroma, 113 i32 x0, 114 i32 y0, 115 u32 width, 116 u32 height, 117 u32 xFrac, 118 u32 chromaPartWidth, 119 u32 chromaPartHeight) 120 { 121 122 /* Variables */ 123 124 u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val; 125 u8 *ptrA, *cbr; 126 u32 comp; 127 u8 block[9*8*2]; 128 129 /* Code */ 130 131 ASSERT(predPartChroma); 132 ASSERT(chromaPartWidth); 133 ASSERT(chromaPartHeight); 134 ASSERT(xFrac < 8); 135 ASSERT(pRef); 136 137 if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || 138 (y0 < 0) || ((u32)y0+chromaPartHeight > height)) 139 { 140 h264bsdFillBlock(pRef, block, x0, y0, width, height, 141 chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1); 142 pRef += width * height; 143 h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight, 144 x0, y0, width, height, chromaPartWidth + 1, 145 chromaPartHeight, chromaPartWidth + 1); 146 147 pRef = block; 148 x0 = 0; 149 y0 = 0; 150 width = chromaPartWidth+1; 151 height = chromaPartHeight; 152 } 153 154 val = 8 - xFrac; 155 156 for (comp = 0; comp <= 1; comp++) 157 { 158 159 ptrA = pRef + (comp * height + (u32)y0) * width + x0; 160 cbr = predPartChroma + comp * 8 * 8; 161 162 /* 2x2 pels per iteration 163 * bilinear horizontal interpolation */ 164 for (y = (chromaPartHeight >> 1); y; y--) 165 { 166 for (x = (chromaPartWidth >> 1); x; x--) 167 { 168 tmp1 = ptrA[width]; 169 tmp2 = *ptrA++; 170 tmp3 = ptrA[width]; 171 tmp4 = *ptrA++; 172 c = ((val * tmp1 + xFrac * tmp3) << 3) + 32; 173 c >>= 6; 174 cbr[8] = (u8)c; 175 c = ((val * tmp2 + xFrac * tmp4) << 3) + 32; 176 c >>= 6; 177 *cbr++ = (u8)c; 178 tmp1 = ptrA[width]; 179 tmp2 = *ptrA; 180 c = ((val * tmp3 + xFrac * tmp1) << 3) + 32; 181 c >>= 6; 182 cbr[8] = (u8)c; 183 c = ((val * tmp4 + xFrac * tmp2) << 3) + 32; 184 c >>= 6; 185 *cbr++ = (u8)c; 186 } 187 cbr += 2*8 - chromaPartWidth; 188 ptrA += 2*width - chromaPartWidth; 189 } 190 } 191 192 } 193 194 /*------------------------------------------------------------------------------ 195 196 Function: h264bsdInterpolateChromaVer 197 198 Functional description: 199 This function performs chroma interpolation in vertical direction. 200 Overfilling is done only if needed. Reference image (pRef) is 201 read at correct position and the predicted part is written to 202 macroblock's chrominance (predPartChroma) 203 204 ------------------------------------------------------------------------------*/ 205 206 void h264bsdInterpolateChromaVer( 207 u8 *pRef, 208 u8 *predPartChroma, 209 i32 x0, 210 i32 y0, 211 u32 width, 212 u32 height, 213 u32 yFrac, 214 u32 chromaPartWidth, 215 u32 chromaPartHeight) 216 { 217 218 /* Variables */ 219 220 u32 x, y, tmp1, tmp2, tmp3, c, val; 221 u8 *ptrA, *cbr; 222 u32 comp; 223 u8 block[9*8*2]; 224 225 /* Code */ 226 227 ASSERT(predPartChroma); 228 ASSERT(chromaPartWidth); 229 ASSERT(chromaPartHeight); 230 ASSERT(yFrac < 8); 231 ASSERT(pRef); 232 233 if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) || 234 (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) 235 { 236 h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth, 237 chromaPartHeight + 1, chromaPartWidth); 238 pRef += width * height; 239 h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1), 240 x0, y0, width, height, chromaPartWidth, 241 chromaPartHeight + 1, chromaPartWidth); 242 243 pRef = block; 244 x0 = 0; 245 y0 = 0; 246 width = chromaPartWidth; 247 height = chromaPartHeight+1; 248 } 249 250 val = 8 - yFrac; 251 252 for (comp = 0; comp <= 1; comp++) 253 { 254 255 ptrA = pRef + (comp * height + (u32)y0) * width + x0; 256 cbr = predPartChroma + comp * 8 * 8; 257 258 /* 2x2 pels per iteration 259 * bilinear vertical interpolation */ 260 for (y = (chromaPartHeight >> 1); y; y--) 261 { 262 for (x = (chromaPartWidth >> 1); x; x--) 263 { 264 tmp3 = ptrA[width*2]; 265 tmp2 = ptrA[width]; 266 tmp1 = *ptrA++; 267 c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; 268 c >>= 6; 269 cbr[8] = (u8)c; 270 c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; 271 c >>= 6; 272 *cbr++ = (u8)c; 273 tmp3 = ptrA[width*2]; 274 tmp2 = ptrA[width]; 275 tmp1 = *ptrA++; 276 c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; 277 c >>= 6; 278 cbr[8] = (u8)c; 279 c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; 280 c >>= 6; 281 *cbr++ = (u8)c; 282 } 283 cbr += 2*8 - chromaPartWidth; 284 ptrA += 2*width - chromaPartWidth; 285 } 286 } 287 288 } 289 #endif 290 /*------------------------------------------------------------------------------ 291 292 Function: h264bsdInterpolateChromaHorVer 293 294 Functional description: 295 This function performs chroma interpolation in horizontal and 296 vertical direction. Overfilling is done only if needed. Reference 297 image (ref) is read at correct position and the predicted part 298 is written to macroblock's chrominance (predPartChroma) 299 300 ------------------------------------------------------------------------------*/ 301 302 void h264bsdInterpolateChromaHorVer( 303 u8 *ref, 304 u8 *predPartChroma, 305 i32 x0, 306 i32 y0, 307 u32 width, 308 u32 height, 309 u32 xFrac, 310 u32 yFrac, 311 u32 chromaPartWidth, 312 u32 chromaPartHeight) 313 { 314 u8 block[9*9*2]; 315 u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32; 316 u32 comp; 317 u8 *ptrA, *cbr; 318 319 /* Code */ 320 321 ASSERT(predPartChroma); 322 ASSERT(chromaPartWidth); 323 ASSERT(chromaPartHeight); 324 ASSERT(xFrac < 8); 325 ASSERT(yFrac < 8); 326 ASSERT(ref); 327 328 if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || 329 (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) 330 { 331 h264bsdFillBlock(ref, block, x0, y0, width, height, 332 chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1); 333 ref += width * height; 334 h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1), 335 x0, y0, width, height, chromaPartWidth + 1, 336 chromaPartHeight + 1, chromaPartWidth + 1); 337 338 ref = block; 339 x0 = 0; 340 y0 = 0; 341 width = chromaPartWidth+1; 342 height = chromaPartHeight+1; 343 } 344 345 valX = 8 - xFrac; 346 valY = 8 - yFrac; 347 348 for (comp = 0; comp <= 1; comp++) 349 { 350 351 ptrA = ref + (comp * height + (u32)y0) * width + x0; 352 cbr = predPartChroma + comp * 8 * 8; 353 354 /* 2x2 pels per iteration 355 * bilinear vertical and horizontal interpolation */ 356 for (y = (chromaPartHeight >> 1); y; y--) 357 { 358 tmp1 = *ptrA; 359 tmp3 = ptrA[width]; 360 tmp5 = ptrA[width*2]; 361 tmp1 *= valY; 362 tmp1 += tmp3 * yFrac; 363 tmp3 *= valY; 364 tmp3 += tmp5 * yFrac; 365 for (x = (chromaPartWidth >> 1); x; x--) 366 { 367 tmp2 = *++ptrA; 368 tmp4 = ptrA[width]; 369 tmp6 = ptrA[width*2]; 370 tmp2 *= valY; 371 tmp2 += tmp4 * yFrac; 372 tmp4 *= valY; 373 tmp4 += tmp6 * yFrac; 374 tmp1 = tmp1 * valX + plus32; 375 tmp3 = tmp3 * valX + plus32; 376 tmp1 += tmp2 * xFrac; 377 tmp1 >>= 6; 378 tmp3 += tmp4 * xFrac; 379 tmp3 >>= 6; 380 cbr[8] = (u8)tmp3; 381 *cbr++ = (u8)tmp1; 382 383 tmp1 = *++ptrA; 384 tmp3 = ptrA[width]; 385 tmp5 = ptrA[width*2]; 386 tmp1 *= valY; 387 tmp1 += tmp3 * yFrac; 388 tmp3 *= valY; 389 tmp3 += tmp5 * yFrac; 390 tmp2 = tmp2 * valX + plus32; 391 tmp4 = tmp4 * valX + plus32; 392 tmp2 += tmp1 * xFrac; 393 tmp2 >>= 6; 394 tmp4 += tmp3 * xFrac; 395 tmp4 >>= 6; 396 cbr[8] = (u8)tmp4; 397 *cbr++ = (u8)tmp2; 398 } 399 cbr += 2*8 - chromaPartWidth; 400 ptrA += 2*width - chromaPartWidth; 401 } 402 } 403 404 } 405 406 /*------------------------------------------------------------------------------ 407 408 Function: PredictChroma 409 410 Functional description: 411 Top level chroma prediction function that calls the appropriate 412 interpolation function. The output is written to macroblock array. 413 414 ------------------------------------------------------------------------------*/ 415 416 static void PredictChroma( 417 u8 *mbPartChroma, 418 u32 xAL, 419 u32 yAL, 420 u32 partWidth, 421 u32 partHeight, 422 mv_t *mv, 423 image_t *refPic) 424 { 425 426 /* Variables */ 427 428 u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight; 429 i32 xInt, yInt; 430 u8 *ref; 431 432 /* Code */ 433 434 ASSERT(mv); 435 ASSERT(refPic); 436 ASSERT(refPic->data); 437 ASSERT(refPic->width); 438 ASSERT(refPic->height); 439 440 width = 8 * refPic->width; 441 height = 8 * refPic->height; 442 443 xInt = (xAL >> 1) + (mv->hor >> 3); 444 yInt = (yAL >> 1) + (mv->ver >> 3); 445 xFrac = mv->hor & 0x7; 446 yFrac = mv->ver & 0x7; 447 448 chromaPartWidth = partWidth >> 1; 449 chromaPartHeight = partHeight >> 1; 450 ref = refPic->data + 256 * refPic->width * refPic->height; 451 452 if (xFrac && yFrac) 453 { 454 h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width, 455 height, xFrac, yFrac, chromaPartWidth, chromaPartHeight); 456 } 457 else if (xFrac) 458 { 459 h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width, 460 height, xFrac, chromaPartWidth, chromaPartHeight); 461 } 462 else if (yFrac) 463 { 464 h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width, 465 height, yFrac, chromaPartWidth, chromaPartHeight); 466 } 467 else 468 { 469 h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height, 470 chromaPartWidth, chromaPartHeight, 8); 471 ref += width * height; 472 h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height, 473 chromaPartWidth, chromaPartHeight, 8); 474 } 475 476 } 477 478 479 /*------------------------------------------------------------------------------ 480 481 Function: h264bsdInterpolateVerHalf 482 483 Functional description: 484 Function to perform vertical interpolation of pixel position 'h' 485 for a block. Overfilling is done only if needed. Reference 486 image (ref) is read at correct position and the predicted part 487 is written to macroblock array (mb) 488 489 ------------------------------------------------------------------------------*/ 490 #ifndef H264DEC_ARM11 491 void h264bsdInterpolateVerHalf( 492 u8 *ref, 493 u8 *mb, 494 i32 x0, 495 i32 y0, 496 u32 width, 497 u32 height, 498 u32 partWidth, 499 u32 partHeight) 500 { 501 u32 p1[21*21/4+1]; 502 u32 i, j; 503 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 504 u8 *ptrC, *ptrV; 505 const u8 *clp = h264bsdClip + 512; 506 507 /* Code */ 508 509 ASSERT(ref); 510 ASSERT(mb); 511 512 if ((x0 < 0) || ((u32)x0+partWidth > width) || 513 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 514 { 515 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 516 partWidth, partHeight+5, partWidth); 517 518 x0 = 0; 519 y0 = 0; 520 ref = (u8*)p1; 521 width = partWidth; 522 } 523 524 ref += (u32)y0 * width + (u32)x0; 525 526 ptrC = ref + width; 527 ptrV = ptrC + 5*width; 528 529 /* 4 pixels per iteration, interpolate using 5 vertical samples */ 530 for (i = (partHeight >> 2); i; i--) 531 { 532 /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ 533 for (j = partWidth; j; j--) 534 { 535 tmp4 = ptrV[-(i32)width*2]; 536 tmp5 = ptrV[-(i32)width]; 537 tmp1 = ptrV[width]; 538 tmp2 = ptrV[width*2]; 539 tmp6 = *ptrV++; 540 541 tmp7 = tmp4 + tmp1; 542 tmp2 -= (tmp7 << 2); 543 tmp2 -= tmp7; 544 tmp2 += 16; 545 tmp7 = tmp5 + tmp6; 546 tmp3 = ptrC[width*2]; 547 tmp2 += (tmp7 << 4); 548 tmp2 += (tmp7 << 2); 549 tmp2 += tmp3; 550 tmp2 = clp[tmp2>>5]; 551 tmp1 += 16; 552 mb[48] = (u8)tmp2; 553 554 tmp7 = tmp3 + tmp6; 555 tmp1 -= (tmp7 << 2); 556 tmp1 -= tmp7; 557 tmp7 = tmp4 + tmp5; 558 tmp2 = ptrC[width]; 559 tmp1 += (tmp7 << 4); 560 tmp1 += (tmp7 << 2); 561 tmp1 += tmp2; 562 tmp1 = clp[tmp1>>5]; 563 tmp6 += 16; 564 mb[32] = (u8)tmp1; 565 566 tmp7 = tmp2 + tmp5; 567 tmp6 -= (tmp7 << 2); 568 tmp6 -= tmp7; 569 tmp7 = tmp4 + tmp3; 570 tmp1 = *ptrC; 571 tmp6 += (tmp7 << 4); 572 tmp6 += (tmp7 << 2); 573 tmp6 += tmp1; 574 tmp6 = clp[tmp6>>5]; 575 tmp5 += 16; 576 mb[16] = (u8)tmp6; 577 578 tmp1 += tmp4; 579 tmp5 -= (tmp1 << 2); 580 tmp5 -= tmp1; 581 tmp3 += tmp2; 582 tmp6 = ptrC[-(i32)width]; 583 tmp5 += (tmp3 << 4); 584 tmp5 += (tmp3 << 2); 585 tmp5 += tmp6; 586 tmp5 = clp[tmp5>>5]; 587 *mb++ = (u8)tmp5; 588 ptrC++; 589 } 590 ptrC += 4*width - partWidth; 591 ptrV += 4*width - partWidth; 592 mb += 4*16 - partWidth; 593 } 594 595 } 596 597 /*------------------------------------------------------------------------------ 598 599 Function: h264bsdInterpolateVerQuarter 600 601 Functional description: 602 Function to perform vertical interpolation of pixel position 'd' 603 or 'n' for a block. Overfilling is done only if needed. Reference 604 image (ref) is read at correct position and the predicted part 605 is written to macroblock array (mb) 606 607 ------------------------------------------------------------------------------*/ 608 609 void h264bsdInterpolateVerQuarter( 610 u8 *ref, 611 u8 *mb, 612 i32 x0, 613 i32 y0, 614 u32 width, 615 u32 height, 616 u32 partWidth, 617 u32 partHeight, 618 u32 verOffset) /* 0 for pixel d, 1 for pixel n */ 619 { 620 u32 p1[21*21/4+1]; 621 u32 i, j; 622 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 623 u8 *ptrC, *ptrV, *ptrInt; 624 const u8 *clp = h264bsdClip + 512; 625 626 /* Code */ 627 628 ASSERT(ref); 629 ASSERT(mb); 630 631 if ((x0 < 0) || ((u32)x0+partWidth > width) || 632 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 633 { 634 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 635 partWidth, partHeight+5, partWidth); 636 637 x0 = 0; 638 y0 = 0; 639 ref = (u8*)p1; 640 width = partWidth; 641 } 642 643 ref += (u32)y0 * width + (u32)x0; 644 645 ptrC = ref + width; 646 ptrV = ptrC + 5*width; 647 648 /* Pointer to integer sample position, either M or R */ 649 ptrInt = ptrC + (2+verOffset)*width; 650 651 /* 4 pixels per iteration 652 * interpolate using 5 vertical samples and average between 653 * interpolated value and integer sample value */ 654 for (i = (partHeight >> 2); i; i--) 655 { 656 /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ 657 for (j = partWidth; j; j--) 658 { 659 tmp4 = ptrV[-(i32)width*2]; 660 tmp5 = ptrV[-(i32)width]; 661 tmp1 = ptrV[width]; 662 tmp2 = ptrV[width*2]; 663 tmp6 = *ptrV++; 664 665 tmp7 = tmp4 + tmp1; 666 tmp2 -= (tmp7 << 2); 667 tmp2 -= tmp7; 668 tmp2 += 16; 669 tmp7 = tmp5 + tmp6; 670 tmp3 = ptrC[width*2]; 671 tmp2 += (tmp7 << 4); 672 tmp2 += (tmp7 << 2); 673 tmp2 += tmp3; 674 tmp2 = clp[tmp2>>5]; 675 tmp7 = ptrInt[width*2]; 676 tmp1 += 16; 677 tmp2++; 678 mb[48] = (u8)((tmp2 + tmp7) >> 1); 679 680 tmp7 = tmp3 + tmp6; 681 tmp1 -= (tmp7 << 2); 682 tmp1 -= tmp7; 683 tmp7 = tmp4 + tmp5; 684 tmp2 = ptrC[width]; 685 tmp1 += (tmp7 << 4); 686 tmp1 += (tmp7 << 2); 687 tmp1 += tmp2; 688 tmp1 = clp[tmp1>>5]; 689 tmp7 = ptrInt[width]; 690 tmp6 += 16; 691 tmp1++; 692 mb[32] = (u8)((tmp1 + tmp7) >> 1); 693 694 tmp7 = tmp2 + tmp5; 695 tmp6 -= (tmp7 << 2); 696 tmp6 -= tmp7; 697 tmp7 = tmp4 + tmp3; 698 tmp1 = *ptrC; 699 tmp6 += (tmp7 << 4); 700 tmp6 += (tmp7 << 2); 701 tmp6 += tmp1; 702 tmp6 = clp[tmp6>>5]; 703 tmp7 = *ptrInt; 704 tmp5 += 16; 705 tmp6++; 706 mb[16] = (u8)((tmp6 + tmp7) >> 1); 707 708 tmp1 += tmp4; 709 tmp5 -= (tmp1 << 2); 710 tmp5 -= tmp1; 711 tmp3 += tmp2; 712 tmp6 = ptrC[-(i32)width]; 713 tmp5 += (tmp3 << 4); 714 tmp5 += (tmp3 << 2); 715 tmp5 += tmp6; 716 tmp5 = clp[tmp5>>5]; 717 tmp7 = ptrInt[-(i32)width]; 718 tmp5++; 719 *mb++ = (u8)((tmp5 + tmp7) >> 1); 720 ptrC++; 721 ptrInt++; 722 } 723 ptrC += 4*width - partWidth; 724 ptrV += 4*width - partWidth; 725 ptrInt += 4*width - partWidth; 726 mb += 4*16 - partWidth; 727 } 728 729 } 730 731 /*------------------------------------------------------------------------------ 732 733 Function: h264bsdInterpolateHorHalf 734 735 Functional description: 736 Function to perform horizontal interpolation of pixel position 'b' 737 for a block. Overfilling is done only if needed. Reference 738 image (ref) is read at correct position and the predicted part 739 is written to macroblock array (mb) 740 741 ------------------------------------------------------------------------------*/ 742 743 void h264bsdInterpolateHorHalf( 744 u8 *ref, 745 u8 *mb, 746 i32 x0, 747 i32 y0, 748 u32 width, 749 u32 height, 750 u32 partWidth, 751 u32 partHeight) 752 { 753 u32 p1[21*21/4+1]; 754 u8 *ptrJ; 755 u32 x, y; 756 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 757 const u8 *clp = h264bsdClip + 512; 758 759 /* Code */ 760 761 ASSERT(ref); 762 ASSERT(mb); 763 ASSERT((partWidth&0x3) == 0); 764 ASSERT((partHeight&0x3) == 0); 765 766 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 767 (y0 < 0) || ((u32)y0+partHeight > height)) 768 { 769 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 770 partWidth+5, partHeight, partWidth+5); 771 772 x0 = 0; 773 y0 = 0; 774 ref = (u8*)p1; 775 width = partWidth + 5; 776 } 777 778 ref += (u32)y0 * width + (u32)x0; 779 780 ptrJ = ref + 5; 781 782 for (y = partHeight; y; y--) 783 { 784 tmp6 = *(ptrJ - 5); 785 tmp5 = *(ptrJ - 4); 786 tmp4 = *(ptrJ - 3); 787 tmp3 = *(ptrJ - 2); 788 tmp2 = *(ptrJ - 1); 789 790 /* calculate 4 pels per iteration */ 791 for (x = (partWidth >> 2); x; x--) 792 { 793 /* First pixel */ 794 tmp6 += 16; 795 tmp7 = tmp3 + tmp4; 796 tmp6 += (tmp7 << 4); 797 tmp6 += (tmp7 << 2); 798 tmp7 = tmp2 + tmp5; 799 tmp1 = *ptrJ++; 800 tmp6 -= (tmp7 << 2); 801 tmp6 -= tmp7; 802 tmp6 += tmp1; 803 tmp6 = clp[tmp6>>5]; 804 /* Second pixel */ 805 tmp5 += 16; 806 tmp7 = tmp2 + tmp3; 807 *mb++ = (u8)tmp6; 808 tmp5 += (tmp7 << 4); 809 tmp5 += (tmp7 << 2); 810 tmp7 = tmp1 + tmp4; 811 tmp6 = *ptrJ++; 812 tmp5 -= (tmp7 << 2); 813 tmp5 -= tmp7; 814 tmp5 += tmp6; 815 tmp5 = clp[tmp5>>5]; 816 /* Third pixel */ 817 tmp4 += 16; 818 tmp7 = tmp1 + tmp2; 819 *mb++ = (u8)tmp5; 820 tmp4 += (tmp7 << 4); 821 tmp4 += (tmp7 << 2); 822 tmp7 = tmp6 + tmp3; 823 tmp5 = *ptrJ++; 824 tmp4 -= (tmp7 << 2); 825 tmp4 -= tmp7; 826 tmp4 += tmp5; 827 tmp4 = clp[tmp4>>5]; 828 /* Fourth pixel */ 829 tmp3 += 16; 830 tmp7 = tmp6 + tmp1; 831 *mb++ = (u8)tmp4; 832 tmp3 += (tmp7 << 4); 833 tmp3 += (tmp7 << 2); 834 tmp7 = tmp5 + tmp2; 835 tmp4 = *ptrJ++; 836 tmp3 -= (tmp7 << 2); 837 tmp3 -= tmp7; 838 tmp3 += tmp4; 839 tmp3 = clp[tmp3>>5]; 840 tmp7 = tmp4; 841 tmp4 = tmp6; 842 tmp6 = tmp2; 843 tmp2 = tmp7; 844 *mb++ = (u8)tmp3; 845 tmp3 = tmp5; 846 tmp5 = tmp1; 847 } 848 ptrJ += width - partWidth; 849 mb += 16 - partWidth; 850 } 851 852 } 853 854 /*------------------------------------------------------------------------------ 855 856 Function: h264bsdInterpolateHorQuarter 857 858 Functional description: 859 Function to perform horizontal interpolation of pixel position 'a' 860 or 'c' for a block. Overfilling is done only if needed. Reference 861 image (ref) is read at correct position and the predicted part 862 is written to macroblock array (mb) 863 864 ------------------------------------------------------------------------------*/ 865 866 void h264bsdInterpolateHorQuarter( 867 u8 *ref, 868 u8 *mb, 869 i32 x0, 870 i32 y0, 871 u32 width, 872 u32 height, 873 u32 partWidth, 874 u32 partHeight, 875 u32 horOffset) /* 0 for pixel a, 1 for pixel c */ 876 { 877 u32 p1[21*21/4+1]; 878 u8 *ptrJ; 879 u32 x, y; 880 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 881 const u8 *clp = h264bsdClip + 512; 882 883 /* Code */ 884 885 ASSERT(ref); 886 ASSERT(mb); 887 888 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 889 (y0 < 0) || ((u32)y0+partHeight > height)) 890 { 891 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 892 partWidth+5, partHeight, partWidth+5); 893 894 x0 = 0; 895 y0 = 0; 896 ref = (u8*)p1; 897 width = partWidth + 5; 898 } 899 900 ref += (u32)y0 * width + (u32)x0; 901 902 ptrJ = ref + 5; 903 904 for (y = partHeight; y; y--) 905 { 906 tmp6 = *(ptrJ - 5); 907 tmp5 = *(ptrJ - 4); 908 tmp4 = *(ptrJ - 3); 909 tmp3 = *(ptrJ - 2); 910 tmp2 = *(ptrJ - 1); 911 912 /* calculate 4 pels per iteration */ 913 for (x = (partWidth >> 2); x; x--) 914 { 915 /* First pixel */ 916 tmp6 += 16; 917 tmp7 = tmp3 + tmp4; 918 tmp6 += (tmp7 << 4); 919 tmp6 += (tmp7 << 2); 920 tmp7 = tmp2 + tmp5; 921 tmp1 = *ptrJ++; 922 tmp6 -= (tmp7 << 2); 923 tmp6 -= tmp7; 924 tmp6 += tmp1; 925 tmp6 = clp[tmp6>>5]; 926 tmp5 += 16; 927 if (!horOffset) 928 tmp6 += tmp4; 929 else 930 tmp6 += tmp3; 931 *mb++ = (u8)((tmp6 + 1) >> 1); 932 /* Second pixel */ 933 tmp7 = tmp2 + tmp3; 934 tmp5 += (tmp7 << 4); 935 tmp5 += (tmp7 << 2); 936 tmp7 = tmp1 + tmp4; 937 tmp6 = *ptrJ++; 938 tmp5 -= (tmp7 << 2); 939 tmp5 -= tmp7; 940 tmp5 += tmp6; 941 tmp5 = clp[tmp5>>5]; 942 tmp4 += 16; 943 if (!horOffset) 944 tmp5 += tmp3; 945 else 946 tmp5 += tmp2; 947 *mb++ = (u8)((tmp5 + 1) >> 1); 948 /* Third pixel */ 949 tmp7 = tmp1 + tmp2; 950 tmp4 += (tmp7 << 4); 951 tmp4 += (tmp7 << 2); 952 tmp7 = tmp6 + tmp3; 953 tmp5 = *ptrJ++; 954 tmp4 -= (tmp7 << 2); 955 tmp4 -= tmp7; 956 tmp4 += tmp5; 957 tmp4 = clp[tmp4>>5]; 958 tmp3 += 16; 959 if (!horOffset) 960 tmp4 += tmp2; 961 else 962 tmp4 += tmp1; 963 *mb++ = (u8)((tmp4 + 1) >> 1); 964 /* Fourth pixel */ 965 tmp7 = tmp6 + tmp1; 966 tmp3 += (tmp7 << 4); 967 tmp3 += (tmp7 << 2); 968 tmp7 = tmp5 + tmp2; 969 tmp4 = *ptrJ++; 970 tmp3 -= (tmp7 << 2); 971 tmp3 -= tmp7; 972 tmp3 += tmp4; 973 tmp3 = clp[tmp3>>5]; 974 if (!horOffset) 975 tmp3 += tmp1; 976 else 977 tmp3 += tmp6; 978 *mb++ = (u8)((tmp3 + 1) >> 1); 979 tmp3 = tmp5; 980 tmp5 = tmp1; 981 tmp7 = tmp4; 982 tmp4 = tmp6; 983 tmp6 = tmp2; 984 tmp2 = tmp7; 985 } 986 ptrJ += width - partWidth; 987 mb += 16 - partWidth; 988 } 989 990 } 991 992 /*------------------------------------------------------------------------------ 993 994 Function: h264bsdInterpolateHorVerQuarter 995 996 Functional description: 997 Function to perform horizontal and vertical interpolation of pixel 998 position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only 999 if needed. Reference image (ref) is read at correct position and 1000 the predicted part is written to macroblock array (mb) 1001 1002 ------------------------------------------------------------------------------*/ 1003 1004 void h264bsdInterpolateHorVerQuarter( 1005 u8 *ref, 1006 u8 *mb, 1007 i32 x0, 1008 i32 y0, 1009 u32 width, 1010 u32 height, 1011 u32 partWidth, 1012 u32 partHeight, 1013 u32 horVerOffset) /* 0 for pixel e, 1 for pixel g, 1014 2 for pixel p, 3 for pixel r */ 1015 { 1016 u32 p1[21*21/4+1]; 1017 u8 *ptrC, *ptrJ, *ptrV; 1018 u32 x, y; 1019 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1020 const u8 *clp = h264bsdClip + 512; 1021 1022 /* Code */ 1023 1024 ASSERT(ref); 1025 ASSERT(mb); 1026 1027 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 1028 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 1029 { 1030 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 1031 partWidth+5, partHeight+5, partWidth+5); 1032 1033 x0 = 0; 1034 y0 = 0; 1035 ref = (u8*)p1; 1036 width = partWidth+5; 1037 } 1038 1039 /* Ref points to G + (-2, -2) */ 1040 ref += (u32)y0 * width + (u32)x0; 1041 1042 /* ptrJ points to either J or Q, depending on vertical offset */ 1043 ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5; 1044 1045 /* ptrC points to either C or D, depending on horizontal offset */ 1046 ptrC = ref + width + 2 + (horVerOffset & 0x1); 1047 1048 for (y = partHeight; y; y--) 1049 { 1050 tmp6 = *(ptrJ - 5); 1051 tmp5 = *(ptrJ - 4); 1052 tmp4 = *(ptrJ - 3); 1053 tmp3 = *(ptrJ - 2); 1054 tmp2 = *(ptrJ - 1); 1055 1056 /* Horizontal interpolation, calculate 4 pels per iteration */ 1057 for (x = (partWidth >> 2); x; x--) 1058 { 1059 /* First pixel */ 1060 tmp6 += 16; 1061 tmp7 = tmp3 + tmp4; 1062 tmp6 += (tmp7 << 4); 1063 tmp6 += (tmp7 << 2); 1064 tmp7 = tmp2 + tmp5; 1065 tmp1 = *ptrJ++; 1066 tmp6 -= (tmp7 << 2); 1067 tmp6 -= tmp7; 1068 tmp6 += tmp1; 1069 tmp6 = clp[tmp6>>5]; 1070 /* Second pixel */ 1071 tmp5 += 16; 1072 tmp7 = tmp2 + tmp3; 1073 *mb++ = (u8)tmp6; 1074 tmp5 += (tmp7 << 4); 1075 tmp5 += (tmp7 << 2); 1076 tmp7 = tmp1 + tmp4; 1077 tmp6 = *ptrJ++; 1078 tmp5 -= (tmp7 << 2); 1079 tmp5 -= tmp7; 1080 tmp5 += tmp6; 1081 tmp5 = clp[tmp5>>5]; 1082 /* Third pixel */ 1083 tmp4 += 16; 1084 tmp7 = tmp1 + tmp2; 1085 *mb++ = (u8)tmp5; 1086 tmp4 += (tmp7 << 4); 1087 tmp4 += (tmp7 << 2); 1088 tmp7 = tmp6 + tmp3; 1089 tmp5 = *ptrJ++; 1090 tmp4 -= (tmp7 << 2); 1091 tmp4 -= tmp7; 1092 tmp4 += tmp5; 1093 tmp4 = clp[tmp4>>5]; 1094 /* Fourth pixel */ 1095 tmp3 += 16; 1096 tmp7 = tmp6 + tmp1; 1097 *mb++ = (u8)tmp4; 1098 tmp3 += (tmp7 << 4); 1099 tmp3 += (tmp7 << 2); 1100 tmp7 = tmp5 + tmp2; 1101 tmp4 = *ptrJ++; 1102 tmp3 -= (tmp7 << 2); 1103 tmp3 -= tmp7; 1104 tmp3 += tmp4; 1105 tmp3 = clp[tmp3>>5]; 1106 tmp7 = tmp4; 1107 tmp4 = tmp6; 1108 tmp6 = tmp2; 1109 tmp2 = tmp7; 1110 *mb++ = (u8)tmp3; 1111 tmp3 = tmp5; 1112 tmp5 = tmp1; 1113 } 1114 ptrJ += width - partWidth; 1115 mb += 16 - partWidth; 1116 } 1117 1118 mb -= 16*partHeight; 1119 ptrV = ptrC + 5*width; 1120 1121 for (y = (partHeight >> 2); y; y--) 1122 { 1123 /* Vertical interpolation and averaging, 4 pels per iteration */ 1124 for (x = partWidth; x; x--) 1125 { 1126 tmp4 = ptrV[-(i32)width*2]; 1127 tmp5 = ptrV[-(i32)width]; 1128 tmp1 = ptrV[width]; 1129 tmp2 = ptrV[width*2]; 1130 tmp6 = *ptrV++; 1131 1132 tmp7 = tmp4 + tmp1; 1133 tmp2 -= (tmp7 << 2); 1134 tmp2 -= tmp7; 1135 tmp2 += 16; 1136 tmp7 = tmp5 + tmp6; 1137 tmp3 = ptrC[width*2]; 1138 tmp2 += (tmp7 << 4); 1139 tmp2 += (tmp7 << 2); 1140 tmp2 += tmp3; 1141 tmp7 = clp[tmp2>>5]; 1142 tmp2 = mb[48]; 1143 tmp1 += 16; 1144 tmp7++; 1145 mb[48] = (u8)((tmp2 + tmp7) >> 1); 1146 1147 tmp7 = tmp3 + tmp6; 1148 tmp1 -= (tmp7 << 2); 1149 tmp1 -= tmp7; 1150 tmp7 = tmp4 + tmp5; 1151 tmp2 = ptrC[width]; 1152 tmp1 += (tmp7 << 4); 1153 tmp1 += (tmp7 << 2); 1154 tmp1 += tmp2; 1155 tmp7 = clp[tmp1>>5]; 1156 tmp1 = mb[32]; 1157 tmp6 += 16; 1158 tmp7++; 1159 mb[32] = (u8)((tmp1 + tmp7) >> 1); 1160 1161 tmp1 = *ptrC; 1162 tmp7 = tmp2 + tmp5; 1163 tmp6 -= (tmp7 << 2); 1164 tmp6 -= tmp7; 1165 tmp7 = tmp4 + tmp3; 1166 tmp6 += (tmp7 << 4); 1167 tmp6 += (tmp7 << 2); 1168 tmp6 += tmp1; 1169 tmp7 = clp[tmp6>>5]; 1170 tmp6 = mb[16]; 1171 tmp5 += 16; 1172 tmp7++; 1173 mb[16] = (u8)((tmp6 + tmp7) >> 1); 1174 1175 tmp6 = ptrC[-(i32)width]; 1176 tmp1 += tmp4; 1177 tmp5 -= (tmp1 << 2); 1178 tmp5 -= tmp1; 1179 tmp3 += tmp2; 1180 tmp5 += (tmp3 << 4); 1181 tmp5 += (tmp3 << 2); 1182 tmp5 += tmp6; 1183 tmp7 = clp[tmp5>>5]; 1184 tmp5 = *mb; 1185 tmp7++; 1186 *mb++ = (u8)((tmp5 + tmp7) >> 1); 1187 ptrC++; 1188 1189 } 1190 ptrC += 4*width - partWidth; 1191 ptrV += 4*width - partWidth; 1192 mb += 4*16 - partWidth; 1193 } 1194 1195 } 1196 #endif 1197 1198 /*------------------------------------------------------------------------------ 1199 1200 Function: h264bsdInterpolateMidHalf 1201 1202 Functional description: 1203 Function to perform horizontal and vertical interpolation of pixel 1204 position 'j' for a block. Overfilling is done only if needed. 1205 Reference image (ref) is read at correct position and the predicted 1206 part is written to macroblock array (mb) 1207 1208 ------------------------------------------------------------------------------*/ 1209 1210 void h264bsdInterpolateMidHalf( 1211 u8 *ref, 1212 u8 *mb, 1213 i32 x0, 1214 i32 y0, 1215 u32 width, 1216 u32 height, 1217 u32 partWidth, 1218 u32 partHeight) 1219 { 1220 u32 p1[21*21/4+1]; 1221 u32 x, y; 1222 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1223 i32 *ptrC, *ptrV, *b1; 1224 u8 *ptrJ; 1225 i32 table[21*16]; 1226 const u8 *clp = h264bsdClip + 512; 1227 1228 /* Code */ 1229 1230 ASSERT(ref); 1231 ASSERT(mb); 1232 1233 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 1234 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 1235 { 1236 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 1237 partWidth+5, partHeight+5, partWidth+5); 1238 1239 x0 = 0; 1240 y0 = 0; 1241 ref = (u8*)p1; 1242 width = partWidth+5; 1243 } 1244 1245 ref += (u32)y0 * width + (u32)x0; 1246 1247 b1 = table; 1248 ptrJ = ref + 5; 1249 1250 /* First step: calculate intermediate values for 1251 * horizontal interpolation */ 1252 for (y = partHeight + 5; y; y--) 1253 { 1254 tmp6 = *(ptrJ - 5); 1255 tmp5 = *(ptrJ - 4); 1256 tmp4 = *(ptrJ - 3); 1257 tmp3 = *(ptrJ - 2); 1258 tmp2 = *(ptrJ - 1); 1259 1260 /* 4 pels per iteration */ 1261 for (x = (partWidth >> 2); x; x--) 1262 { 1263 /* First pixel */ 1264 tmp7 = tmp3 + tmp4; 1265 tmp6 += (tmp7 << 4); 1266 tmp6 += (tmp7 << 2); 1267 tmp7 = tmp2 + tmp5; 1268 tmp1 = *ptrJ++; 1269 tmp6 -= (tmp7 << 2); 1270 tmp6 -= tmp7; 1271 tmp6 += tmp1; 1272 *b1++ = tmp6; 1273 /* Second pixel */ 1274 tmp7 = tmp2 + tmp3; 1275 tmp5 += (tmp7 << 4); 1276 tmp5 += (tmp7 << 2); 1277 tmp7 = tmp1 + tmp4; 1278 tmp6 = *ptrJ++; 1279 tmp5 -= (tmp7 << 2); 1280 tmp5 -= tmp7; 1281 tmp5 += tmp6; 1282 *b1++ = tmp5; 1283 /* Third pixel */ 1284 tmp7 = tmp1 + tmp2; 1285 tmp4 += (tmp7 << 4); 1286 tmp4 += (tmp7 << 2); 1287 tmp7 = tmp6 + tmp3; 1288 tmp5 = *ptrJ++; 1289 tmp4 -= (tmp7 << 2); 1290 tmp4 -= tmp7; 1291 tmp4 += tmp5; 1292 *b1++ = tmp4; 1293 /* Fourth pixel */ 1294 tmp7 = tmp6 + tmp1; 1295 tmp3 += (tmp7 << 4); 1296 tmp3 += (tmp7 << 2); 1297 tmp7 = tmp5 + tmp2; 1298 tmp4 = *ptrJ++; 1299 tmp3 -= (tmp7 << 2); 1300 tmp3 -= tmp7; 1301 tmp3 += tmp4; 1302 *b1++ = tmp3; 1303 tmp7 = tmp4; 1304 tmp4 = tmp6; 1305 tmp6 = tmp2; 1306 tmp2 = tmp7; 1307 tmp3 = tmp5; 1308 tmp5 = tmp1; 1309 } 1310 ptrJ += width - partWidth; 1311 } 1312 1313 /* Second step: calculate vertical interpolation */ 1314 ptrC = table + partWidth; 1315 ptrV = ptrC + 5*partWidth; 1316 for (y = (partHeight >> 2); y; y--) 1317 { 1318 /* 4 pels per iteration */ 1319 for (x = partWidth; x; x--) 1320 { 1321 tmp4 = ptrV[-(i32)partWidth*2]; 1322 tmp5 = ptrV[-(i32)partWidth]; 1323 tmp1 = ptrV[partWidth]; 1324 tmp2 = ptrV[partWidth*2]; 1325 tmp6 = *ptrV++; 1326 1327 tmp7 = tmp4 + tmp1; 1328 tmp2 -= (tmp7 << 2); 1329 tmp2 -= tmp7; 1330 tmp2 += 512; 1331 tmp7 = tmp5 + tmp6; 1332 tmp3 = ptrC[partWidth*2]; 1333 tmp2 += (tmp7 << 4); 1334 tmp2 += (tmp7 << 2); 1335 tmp2 += tmp3; 1336 tmp7 = clp[tmp2>>10]; 1337 tmp1 += 512; 1338 mb[48] = (u8)tmp7; 1339 1340 tmp7 = tmp3 + tmp6; 1341 tmp1 -= (tmp7 << 2); 1342 tmp1 -= tmp7; 1343 tmp7 = tmp4 + tmp5; 1344 tmp2 = ptrC[partWidth]; 1345 tmp1 += (tmp7 << 4); 1346 tmp1 += (tmp7 << 2); 1347 tmp1 += tmp2; 1348 tmp7 = clp[tmp1>>10]; 1349 tmp6 += 512; 1350 mb[32] = (u8)tmp7; 1351 1352 tmp1 = *ptrC; 1353 tmp7 = tmp2 + tmp5; 1354 tmp6 -= (tmp7 << 2); 1355 tmp6 -= tmp7; 1356 tmp7 = tmp4 + tmp3; 1357 tmp6 += (tmp7 << 4); 1358 tmp6 += (tmp7 << 2); 1359 tmp6 += tmp1; 1360 tmp7 = clp[tmp6>>10]; 1361 tmp5 += 512; 1362 mb[16] = (u8)tmp7; 1363 1364 tmp6 = ptrC[-(i32)partWidth]; 1365 tmp1 += tmp4; 1366 tmp5 -= (tmp1 << 2); 1367 tmp5 -= tmp1; 1368 tmp3 += tmp2; 1369 tmp5 += (tmp3 << 4); 1370 tmp5 += (tmp3 << 2); 1371 tmp5 += tmp6; 1372 tmp7 = clp[tmp5>>10]; 1373 *mb++ = (u8)tmp7; 1374 ptrC++; 1375 } 1376 mb += 4*16 - partWidth; 1377 ptrC += 3*partWidth; 1378 ptrV += 3*partWidth; 1379 } 1380 1381 } 1382 1383 1384 /*------------------------------------------------------------------------------ 1385 1386 Function: h264bsdInterpolateMidVerQuarter 1387 1388 Functional description: 1389 Function to perform horizontal and vertical interpolation of pixel 1390 position 'f' or 'q' for a block. Overfilling is done only if needed. 1391 Reference image (ref) is read at correct position and the predicted 1392 part is written to macroblock array (mb) 1393 1394 ------------------------------------------------------------------------------*/ 1395 1396 void h264bsdInterpolateMidVerQuarter( 1397 u8 *ref, 1398 u8 *mb, 1399 i32 x0, 1400 i32 y0, 1401 u32 width, 1402 u32 height, 1403 u32 partWidth, 1404 u32 partHeight, 1405 u32 verOffset) /* 0 for pixel f, 1 for pixel q */ 1406 { 1407 u32 p1[21*21/4+1]; 1408 u32 x, y; 1409 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1410 i32 *ptrC, *ptrV, *ptrInt, *b1; 1411 u8 *ptrJ; 1412 i32 table[21*16]; 1413 const u8 *clp = h264bsdClip + 512; 1414 1415 /* Code */ 1416 1417 ASSERT(ref); 1418 ASSERT(mb); 1419 1420 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 1421 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 1422 { 1423 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 1424 partWidth+5, partHeight+5, partWidth+5); 1425 1426 x0 = 0; 1427 y0 = 0; 1428 ref = (u8*)p1; 1429 width = partWidth+5; 1430 } 1431 1432 ref += (u32)y0 * width + (u32)x0; 1433 1434 b1 = table; 1435 ptrJ = ref + 5; 1436 1437 /* First step: calculate intermediate values for 1438 * horizontal interpolation */ 1439 for (y = partHeight + 5; y; y--) 1440 { 1441 tmp6 = *(ptrJ - 5); 1442 tmp5 = *(ptrJ - 4); 1443 tmp4 = *(ptrJ - 3); 1444 tmp3 = *(ptrJ - 2); 1445 tmp2 = *(ptrJ - 1); 1446 for (x = (partWidth >> 2); x; x--) 1447 { 1448 /* First pixel */ 1449 tmp7 = tmp3 + tmp4; 1450 tmp6 += (tmp7 << 4); 1451 tmp6 += (tmp7 << 2); 1452 tmp7 = tmp2 + tmp5; 1453 tmp1 = *ptrJ++; 1454 tmp6 -= (tmp7 << 2); 1455 tmp6 -= tmp7; 1456 tmp6 += tmp1; 1457 *b1++ = tmp6; 1458 /* Second pixel */ 1459 tmp7 = tmp2 + tmp3; 1460 tmp5 += (tmp7 << 4); 1461 tmp5 += (tmp7 << 2); 1462 tmp7 = tmp1 + tmp4; 1463 tmp6 = *ptrJ++; 1464 tmp5 -= (tmp7 << 2); 1465 tmp5 -= tmp7; 1466 tmp5 += tmp6; 1467 *b1++ = tmp5; 1468 /* Third pixel */ 1469 tmp7 = tmp1 + tmp2; 1470 tmp4 += (tmp7 << 4); 1471 tmp4 += (tmp7 << 2); 1472 tmp7 = tmp6 + tmp3; 1473 tmp5 = *ptrJ++; 1474 tmp4 -= (tmp7 << 2); 1475 tmp4 -= tmp7; 1476 tmp4 += tmp5; 1477 *b1++ = tmp4; 1478 /* Fourth pixel */ 1479 tmp7 = tmp6 + tmp1; 1480 tmp3 += (tmp7 << 4); 1481 tmp3 += (tmp7 << 2); 1482 tmp7 = tmp5 + tmp2; 1483 tmp4 = *ptrJ++; 1484 tmp3 -= (tmp7 << 2); 1485 tmp3 -= tmp7; 1486 tmp3 += tmp4; 1487 *b1++ = tmp3; 1488 tmp7 = tmp4; 1489 tmp4 = tmp6; 1490 tmp6 = tmp2; 1491 tmp2 = tmp7; 1492 tmp3 = tmp5; 1493 tmp5 = tmp1; 1494 } 1495 ptrJ += width - partWidth; 1496 } 1497 1498 /* Second step: calculate vertical interpolation and average */ 1499 ptrC = table + partWidth; 1500 ptrV = ptrC + 5*partWidth; 1501 /* Pointer to integer sample position, either M or R */ 1502 ptrInt = ptrC + (2+verOffset)*partWidth; 1503 for (y = (partHeight >> 2); y; y--) 1504 { 1505 for (x = partWidth; x; x--) 1506 { 1507 tmp4 = ptrV[-(i32)partWidth*2]; 1508 tmp5 = ptrV[-(i32)partWidth]; 1509 tmp1 = ptrV[partWidth]; 1510 tmp2 = ptrV[partWidth*2]; 1511 tmp6 = *ptrV++; 1512 1513 tmp7 = tmp4 + tmp1; 1514 tmp2 -= (tmp7 << 2); 1515 tmp2 -= tmp7; 1516 tmp2 += 512; 1517 tmp7 = tmp5 + tmp6; 1518 tmp3 = ptrC[partWidth*2]; 1519 tmp2 += (tmp7 << 4); 1520 tmp2 += (tmp7 << 2); 1521 tmp7 = ptrInt[partWidth*2]; 1522 tmp2 += tmp3; 1523 tmp2 = clp[tmp2>>10]; 1524 tmp7 += 16; 1525 tmp7 = clp[tmp7>>5]; 1526 tmp1 += 512; 1527 tmp2++; 1528 mb[48] = (u8)((tmp7 + tmp2) >> 1); 1529 1530 tmp7 = tmp3 + tmp6; 1531 tmp1 -= (tmp7 << 2); 1532 tmp1 -= tmp7; 1533 tmp7 = tmp4 + tmp5; 1534 tmp2 = ptrC[partWidth]; 1535 tmp1 += (tmp7 << 4); 1536 tmp1 += (tmp7 << 2); 1537 tmp7 = ptrInt[partWidth]; 1538 tmp1 += tmp2; 1539 tmp1 = clp[tmp1>>10]; 1540 tmp7 += 16; 1541 tmp7 = clp[tmp7>>5]; 1542 tmp6 += 512; 1543 tmp1++; 1544 mb[32] = (u8)((tmp7 + tmp1) >> 1); 1545 1546 tmp1 = *ptrC; 1547 tmp7 = tmp2 + tmp5; 1548 tmp6 -= (tmp7 << 2); 1549 tmp6 -= tmp7; 1550 tmp7 = tmp4 + tmp3; 1551 tmp6 += (tmp7 << 4); 1552 tmp6 += (tmp7 << 2); 1553 tmp7 = *ptrInt; 1554 tmp6 += tmp1; 1555 tmp6 = clp[tmp6>>10]; 1556 tmp7 += 16; 1557 tmp7 = clp[tmp7>>5]; 1558 tmp5 += 512; 1559 tmp6++; 1560 mb[16] = (u8)((tmp7 + tmp6) >> 1); 1561 1562 tmp6 = ptrC[-(i32)partWidth]; 1563 tmp1 += tmp4; 1564 tmp5 -= (tmp1 << 2); 1565 tmp5 -= tmp1; 1566 tmp3 += tmp2; 1567 tmp5 += (tmp3 << 4); 1568 tmp5 += (tmp3 << 2); 1569 tmp7 = ptrInt[-(i32)partWidth]; 1570 tmp5 += tmp6; 1571 tmp5 = clp[tmp5>>10]; 1572 tmp7 += 16; 1573 tmp7 = clp[tmp7>>5]; 1574 tmp5++; 1575 *mb++ = (u8)((tmp7 + tmp5) >> 1); 1576 ptrC++; 1577 ptrInt++; 1578 } 1579 mb += 4*16 - partWidth; 1580 ptrC += 3*partWidth; 1581 ptrV += 3*partWidth; 1582 ptrInt += 3*partWidth; 1583 } 1584 1585 } 1586 1587 1588 /*------------------------------------------------------------------------------ 1589 1590 Function: h264bsdInterpolateMidHorQuarter 1591 1592 Functional description: 1593 Function to perform horizontal and vertical interpolation of pixel 1594 position 'i' or 'k' for a block. Overfilling is done only if needed. 1595 Reference image (ref) is read at correct position and the predicted 1596 part is written to macroblock array (mb) 1597 1598 ------------------------------------------------------------------------------*/ 1599 1600 void h264bsdInterpolateMidHorQuarter( 1601 u8 *ref, 1602 u8 *mb, 1603 i32 x0, 1604 i32 y0, 1605 u32 width, 1606 u32 height, 1607 u32 partWidth, 1608 u32 partHeight, 1609 u32 horOffset) /* 0 for pixel i, 1 for pixel k */ 1610 { 1611 u32 p1[21*21/4+1]; 1612 u32 x, y; 1613 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1614 i32 *ptrJ, *ptrInt, *h1; 1615 u8 *ptrC, *ptrV; 1616 i32 table[21*16]; 1617 i32 tableWidth = (i32)partWidth+5; 1618 const u8 *clp = h264bsdClip + 512; 1619 1620 /* Code */ 1621 1622 ASSERT(ref); 1623 ASSERT(mb); 1624 1625 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 1626 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 1627 { 1628 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 1629 partWidth+5, partHeight+5, partWidth+5); 1630 1631 x0 = 0; 1632 y0 = 0; 1633 ref = (u8*)p1; 1634 width = partWidth+5; 1635 } 1636 1637 ref += (u32)y0 * width + (u32)x0; 1638 1639 h1 = table + tableWidth; 1640 ptrC = ref + width; 1641 ptrV = ptrC + 5*width; 1642 1643 /* First step: calculate intermediate values for 1644 * vertical interpolation */ 1645 for (y = (partHeight >> 2); y; y--) 1646 { 1647 for (x = (u32)tableWidth; x; x--) 1648 { 1649 tmp4 = ptrV[-(i32)width*2]; 1650 tmp5 = ptrV[-(i32)width]; 1651 tmp1 = ptrV[width]; 1652 tmp2 = ptrV[width*2]; 1653 tmp6 = *ptrV++; 1654 1655 tmp7 = tmp4 + tmp1; 1656 tmp2 -= (tmp7 << 2); 1657 tmp2 -= tmp7; 1658 tmp7 = tmp5 + tmp6; 1659 tmp3 = ptrC[width*2]; 1660 tmp2 += (tmp7 << 4); 1661 tmp2 += (tmp7 << 2); 1662 tmp2 += tmp3; 1663 h1[tableWidth*2] = tmp2; 1664 1665 tmp7 = tmp3 + tmp6; 1666 tmp1 -= (tmp7 << 2); 1667 tmp1 -= tmp7; 1668 tmp7 = tmp4 + tmp5; 1669 tmp2 = ptrC[width]; 1670 tmp1 += (tmp7 << 4); 1671 tmp1 += (tmp7 << 2); 1672 tmp1 += tmp2; 1673 h1[tableWidth] = tmp1; 1674 1675 tmp1 = *ptrC; 1676 tmp7 = tmp2 + tmp5; 1677 tmp6 -= (tmp7 << 2); 1678 tmp6 -= tmp7; 1679 tmp7 = tmp4 + tmp3; 1680 tmp6 += (tmp7 << 4); 1681 tmp6 += (tmp7 << 2); 1682 tmp6 += tmp1; 1683 *h1 = tmp6; 1684 1685 tmp6 = ptrC[-(i32)width]; 1686 tmp1 += tmp4; 1687 tmp5 -= (tmp1 << 2); 1688 tmp5 -= tmp1; 1689 tmp3 += tmp2; 1690 tmp5 += (tmp3 << 4); 1691 tmp5 += (tmp3 << 2); 1692 tmp5 += tmp6; 1693 h1[-tableWidth] = tmp5; 1694 h1++; 1695 ptrC++; 1696 } 1697 ptrC += 4*width - partWidth - 5; 1698 ptrV += 4*width - partWidth - 5; 1699 h1 += 3*tableWidth; 1700 } 1701 1702 /* Second step: calculate horizontal interpolation and average */ 1703 ptrJ = table + 5; 1704 /* Pointer to integer sample position, either G or H */ 1705 ptrInt = table + 2 + horOffset; 1706 for (y = partHeight; y; y--) 1707 { 1708 tmp6 = *(ptrJ - 5); 1709 tmp5 = *(ptrJ - 4); 1710 tmp4 = *(ptrJ - 3); 1711 tmp3 = *(ptrJ - 2); 1712 tmp2 = *(ptrJ - 1); 1713 for (x = (partWidth>>2); x; x--) 1714 { 1715 /* First pixel */ 1716 tmp6 += 512; 1717 tmp7 = tmp3 + tmp4; 1718 tmp6 += (tmp7 << 4); 1719 tmp6 += (tmp7 << 2); 1720 tmp7 = tmp2 + tmp5; 1721 tmp1 = *ptrJ++; 1722 tmp6 -= (tmp7 << 2); 1723 tmp6 -= tmp7; 1724 tmp7 = *ptrInt++; 1725 tmp6 += tmp1; 1726 tmp6 = clp[tmp6 >> 10]; 1727 tmp7 += 16; 1728 tmp7 = clp[tmp7 >> 5]; 1729 tmp5 += 512; 1730 tmp6++; 1731 *mb++ = (u8)((tmp6 + tmp7) >> 1); 1732 /* Second pixel */ 1733 tmp7 = tmp2 + tmp3; 1734 tmp5 += (tmp7 << 4); 1735 tmp5 += (tmp7 << 2); 1736 tmp7 = tmp1 + tmp4; 1737 tmp6 = *ptrJ++; 1738 tmp5 -= (tmp7 << 2); 1739 tmp5 -= tmp7; 1740 tmp7 = *ptrInt++; 1741 tmp5 += tmp6; 1742 tmp5 = clp[tmp5 >> 10]; 1743 tmp7 += 16; 1744 tmp7 = clp[tmp7 >> 5]; 1745 tmp4 += 512; 1746 tmp5++; 1747 *mb++ = (u8)((tmp5 + tmp7) >> 1); 1748 /* Third pixel */ 1749 tmp7 = tmp1 + tmp2; 1750 tmp4 += (tmp7 << 4); 1751 tmp4 += (tmp7 << 2); 1752 tmp7 = tmp6 + tmp3; 1753 tmp5 = *ptrJ++; 1754 tmp4 -= (tmp7 << 2); 1755 tmp4 -= tmp7; 1756 tmp7 = *ptrInt++; 1757 tmp4 += tmp5; 1758 tmp4 = clp[tmp4 >> 10]; 1759 tmp7 += 16; 1760 tmp7 = clp[tmp7 >> 5]; 1761 tmp3 += 512; 1762 tmp4++; 1763 *mb++ = (u8)((tmp4 + tmp7) >> 1); 1764 /* Fourth pixel */ 1765 tmp7 = tmp6 + tmp1; 1766 tmp3 += (tmp7 << 4); 1767 tmp3 += (tmp7 << 2); 1768 tmp7 = tmp5 + tmp2; 1769 tmp4 = *ptrJ++; 1770 tmp3 -= (tmp7 << 2); 1771 tmp3 -= tmp7; 1772 tmp7 = *ptrInt++; 1773 tmp3 += tmp4; 1774 tmp3 = clp[tmp3 >> 10]; 1775 tmp7 += 16; 1776 tmp7 = clp[tmp7 >> 5]; 1777 tmp3++; 1778 *mb++ = (u8)((tmp3 + tmp7) >> 1); 1779 tmp3 = tmp5; 1780 tmp5 = tmp1; 1781 tmp7 = tmp4; 1782 tmp4 = tmp6; 1783 tmp6 = tmp2; 1784 tmp2 = tmp7; 1785 } 1786 ptrJ += 5; 1787 ptrInt += 5; 1788 mb += 16 - partWidth; 1789 } 1790 1791 } 1792 1793 1794 /*------------------------------------------------------------------------------ 1795 1796 Function: h264bsdPredictSamples 1797 1798 Functional description: 1799 This function reconstructs a prediction for a macroblock partition. 1800 The prediction is either copied or interpolated using the reference 1801 frame and the motion vector. Both luminance and chrominance parts are 1802 predicted. The prediction is stored in given macroblock array (data). 1803 Inputs: 1804 data pointer to macroblock array (384 bytes) for output 1805 mv pointer to motion vector used for prediction 1806 refPic pointer to reference picture structure 1807 xA x-coordinate for current macroblock 1808 yA y-coordinate for current macroblock 1809 partX x-offset for partition in macroblock 1810 partY y-offset for partition in macroblock 1811 partWidth width of partition 1812 partHeight height of partition 1813 Outputs: 1814 data macroblock array (16x16+8x8+8x8) where predicted 1815 partition is stored at correct position 1816 1817 ------------------------------------------------------------------------------*/ 1818 1819 void h264bsdPredictSamples( 1820 u8 *data, 1821 mv_t *mv, 1822 image_t *refPic, 1823 u32 xA, 1824 u32 yA, 1825 u32 partX, 1826 u32 partY, 1827 u32 partWidth, 1828 u32 partHeight) 1829 1830 { 1831 1832 /* Variables */ 1833 1834 u32 xFrac, yFrac, width, height; 1835 i32 xInt, yInt; 1836 u8 *lumaPartData; 1837 1838 /* Code */ 1839 1840 ASSERT(data); 1841 ASSERT(mv); 1842 ASSERT(partWidth); 1843 ASSERT(partHeight); 1844 ASSERT(refPic); 1845 ASSERT(refPic->data); 1846 ASSERT(refPic->width); 1847 ASSERT(refPic->height); 1848 1849 /* luma */ 1850 lumaPartData = data + 16*partY + partX; 1851 1852 xFrac = mv->hor & 0x3; 1853 yFrac = mv->ver & 0x3; 1854 1855 width = 16 * refPic->width; 1856 height = 16 * refPic->height; 1857 1858 xInt = (i32)xA + (i32)partX + (mv->hor >> 2); 1859 yInt = (i32)yA + (i32)partY + (mv->ver >> 2); 1860 1861 ASSERT(lumaFracPos[xFrac][yFrac] < 16); 1862 1863 switch (lumaFracPos[xFrac][yFrac]) 1864 { 1865 case 0: /* G */ 1866 h264bsdFillBlock(refPic->data, lumaPartData, 1867 xInt,yInt,width,height,partWidth,partHeight,16); 1868 break; 1869 case 1: /* d */ 1870 h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, 1871 xInt, yInt-2, width, height, partWidth, partHeight, 0); 1872 break; 1873 case 2: /* h */ 1874 h264bsdInterpolateVerHalf(refPic->data, lumaPartData, 1875 xInt, yInt-2, width, height, partWidth, partHeight); 1876 break; 1877 case 3: /* n */ 1878 h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, 1879 xInt, yInt-2, width, height, partWidth, partHeight, 1); 1880 break; 1881 case 4: /* a */ 1882 h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, 1883 xInt-2, yInt, width, height, partWidth, partHeight, 0); 1884 break; 1885 case 5: /* e */ 1886 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, 1887 xInt-2, yInt-2, width, height, partWidth, partHeight, 0); 1888 break; 1889 case 6: /* i */ 1890 h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, 1891 xInt-2, yInt-2, width, height, partWidth, partHeight, 0); 1892 break; 1893 case 7: /* p */ 1894 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, 1895 xInt-2, yInt-2, width, height, partWidth, partHeight, 2); 1896 break; 1897 case 8: /* b */ 1898 h264bsdInterpolateHorHalf(refPic->data, lumaPartData, 1899 xInt-2, yInt, width, height, partWidth, partHeight); 1900 break; 1901 case 9: /* f */ 1902 h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, 1903 xInt-2, yInt-2, width, height, partWidth, partHeight, 0); 1904 break; 1905 case 10: /* j */ 1906 h264bsdInterpolateMidHalf(refPic->data, lumaPartData, 1907 xInt-2, yInt-2, width, height, partWidth, partHeight); 1908 break; 1909 case 11: /* q */ 1910 h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, 1911 xInt-2, yInt-2, width, height, partWidth, partHeight, 1); 1912 break; 1913 case 12: /* c */ 1914 h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, 1915 xInt-2, yInt, width, height, partWidth, partHeight, 1); 1916 break; 1917 case 13: /* g */ 1918 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, 1919 xInt-2, yInt-2, width, height, partWidth, partHeight, 1); 1920 break; 1921 case 14: /* k */ 1922 h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, 1923 xInt-2, yInt-2, width, height, partWidth, partHeight, 1); 1924 break; 1925 default: /* case 15, r */ 1926 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, 1927 xInt-2, yInt-2, width, height, partWidth, partHeight, 3); 1928 break; 1929 } 1930 1931 /* chroma */ 1932 PredictChroma( 1933 data + 16*16 + (partY>>1)*8 + (partX>>1), 1934 xA + partX, 1935 yA + partY, 1936 partWidth, 1937 partHeight, 1938 mv, 1939 refPic); 1940 1941 } 1942 1943 #else /* H264DEC_OMXDL */ 1944 /*------------------------------------------------------------------------------ 1945 1946 Function: h264bsdPredictSamples 1947 1948 Functional description: 1949 This function reconstructs a prediction for a macroblock partition. 1950 The prediction is either copied or interpolated using the reference 1951 frame and the motion vector. Both luminance and chrominance parts are 1952 predicted. The prediction is stored in given macroblock array (data). 1953 Inputs: 1954 data pointer to macroblock array (384 bytes) for output 1955 mv pointer to motion vector used for prediction 1956 refPic pointer to reference picture structure 1957 xA x-coordinate for current macroblock 1958 yA y-coordinate for current macroblock 1959 partX x-offset for partition in macroblock 1960 partY y-offset for partition in macroblock 1961 partWidth width of partition 1962 partHeight height of partition 1963 Outputs: 1964 data macroblock array (16x16+8x8+8x8) where predicted 1965 partition is stored at correct position 1966 1967 ------------------------------------------------------------------------------*/ 1968 1969 /*lint -e{550} Symbol 'res' not accessed */ 1970 void h264bsdPredictSamples( 1971 u8 *data, 1972 mv_t *mv, 1973 image_t *refPic, 1974 u32 colAndRow, 1975 u32 part, 1976 u8 *pFill) 1977 1978 { 1979 1980 /* Variables */ 1981 1982 u32 xFrac, yFrac; 1983 u32 width, height; 1984 i32 xInt, yInt, x0, y0; 1985 u8 *partData, *ref; 1986 OMXSize roi; 1987 u32 fillWidth; 1988 u32 fillHeight; 1989 OMXResult res; 1990 u32 xA, yA; 1991 u32 partX, partY; 1992 u32 partWidth, partHeight; 1993 1994 /* Code */ 1995 1996 ASSERT(data); 1997 ASSERT(mv); 1998 ASSERT(refPic); 1999 ASSERT(refPic->data); 2000 ASSERT(refPic->width); 2001 ASSERT(refPic->height); 2002 2003 xA = (colAndRow & 0xFFFF0000) >> 16; 2004 yA = (colAndRow & 0x0000FFFF); 2005 2006 partX = (part & 0xFF000000) >> 24; 2007 partY = (part & 0x00FF0000) >> 16; 2008 partWidth = (part & 0x0000FF00) >> 8; 2009 partHeight = (part & 0x000000FF); 2010 2011 ASSERT(partWidth); 2012 ASSERT(partHeight); 2013 2014 /* luma */ 2015 partData = data + 16*partY + partX; 2016 2017 xFrac = mv->hor & 0x3; 2018 yFrac = mv->ver & 0x3; 2019 2020 width = 16 * refPic->width; 2021 height = 16 * refPic->height; 2022 2023 xInt = (i32)xA + (i32)partX + (mv->hor >> 2); 2024 yInt = (i32)yA + (i32)partY + (mv->ver >> 2); 2025 2026 x0 = (xFrac) ? xInt-2 : xInt; 2027 y0 = (yFrac) ? yInt-2 : yInt; 2028 2029 if (xFrac) 2030 { 2031 if (partWidth == 16) 2032 fillWidth = 32; 2033 else 2034 fillWidth = 16; 2035 } 2036 else 2037 fillWidth = (partWidth*2); 2038 if (yFrac) 2039 fillHeight = partHeight+5; 2040 else 2041 fillHeight = partHeight; 2042 2043 2044 if ((x0 < 0) || ((u32)x0+fillWidth > width) || 2045 (y0 < 0) || ((u32)y0+fillHeight > height)) 2046 { 2047 h264bsdFillBlock(refPic->data, (u8*)pFill, x0, y0, width, height, 2048 fillWidth, fillHeight, fillWidth); 2049 2050 x0 = 0; 2051 y0 = 0; 2052 ref = pFill; 2053 width = fillWidth; 2054 if (yFrac) 2055 ref += 2*width; 2056 if (xFrac) 2057 ref += 2; 2058 } 2059 else 2060 { 2061 /*lint --e(737) Loss of sign */ 2062 ref = refPic->data + yInt*width + xInt; 2063 } 2064 /* Luma interpolation */ 2065 roi.width = (i32)partWidth; 2066 roi.height = (i32)partHeight; 2067 2068 res = omxVCM4P10_InterpolateLuma(ref, (i32)width, partData, 16, 2069 (i32)xFrac, (i32)yFrac, roi); 2070 ASSERT(res == 0); 2071 2072 /* Chroma */ 2073 width = 8 * refPic->width; 2074 height = 8 * refPic->height; 2075 2076 x0 = ((xA + partX) >> 1) + (mv->hor >> 3); 2077 y0 = ((yA + partY) >> 1) + (mv->ver >> 3); 2078 xFrac = mv->hor & 0x7; 2079 yFrac = mv->ver & 0x7; 2080 2081 ref = refPic->data + 256 * refPic->width * refPic->height; 2082 2083 roi.width = (i32)(partWidth >> 1); 2084 fillWidth = ((partWidth >> 1) + 8) & ~0x7; 2085 roi.height = (i32)(partHeight >> 1); 2086 fillHeight = (partHeight >> 1) + 1; 2087 2088 if ((x0 < 0) || ((u32)x0+fillWidth > width) || 2089 (y0 < 0) || ((u32)y0+fillHeight > height)) 2090 { 2091 h264bsdFillBlock(ref, pFill, x0, y0, width, height, 2092 fillWidth, fillHeight, fillWidth); 2093 ref += width * height; 2094 h264bsdFillBlock(ref, pFill + fillWidth*fillHeight, 2095 x0, y0, width, height, fillWidth, 2096 fillHeight, fillWidth); 2097 2098 ref = pFill; 2099 x0 = 0; 2100 y0 = 0; 2101 width = fillWidth; 2102 height = fillHeight; 2103 } 2104 2105 partData = data + 16*16 + (partY>>1)*8 + (partX>>1); 2106 2107 /* Chroma interpolation */ 2108 /*lint --e(737) Loss of sign */ 2109 ref += y0 * width + x0; 2110 res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, 2111 (u32)roi.width, (u32)roi.height, xFrac, yFrac); 2112 ASSERT(res == 0); 2113 partData += 8 * 8; 2114 ref += height * width; 2115 res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, 2116 (u32)roi.width, (u32)roi.height, xFrac, yFrac); 2117 ASSERT(res == 0); 2118 2119 } 2120 2121 #endif /* H264DEC_OMXDL */ 2122 2123 2124 /*------------------------------------------------------------------------------ 2125 2126 Function: FillRow1 2127 2128 Functional description: 2129 This function gets a row of reference pels in a 'normal' case when no 2130 overfilling is necessary. 2131 2132 ------------------------------------------------------------------------------*/ 2133 2134 static void FillRow1( 2135 u8 *ref, 2136 u8 *fill, 2137 i32 left, 2138 i32 center, 2139 i32 right) 2140 { 2141 UNUSED(left); 2142 UNUSED(right); 2143 ASSERT(ref); 2144 ASSERT(fill); 2145 2146 H264SwDecMemcpy(fill, ref, (u32)center); 2147 2148 /*lint -e(715) */ 2149 } 2150 2151 2152 /*------------------------------------------------------------------------------ 2153 2154 Function: h264bsdFillRow7 2155 2156 Functional description: 2157 This function gets a row of reference pels when horizontal coordinate 2158 is partly negative or partly greater than reference picture width 2159 (overfilling some pels on left and/or right edge). 2160 Inputs: 2161 ref pointer to reference samples 2162 left amount of pixels to overfill on left-edge 2163 center amount of pixels to copy 2164 right amount of pixels to overfill on right-edge 2165 Outputs: 2166 fill pointer where samples are stored 2167 2168 ------------------------------------------------------------------------------*/ 2169 #ifndef H264DEC_NEON 2170 void h264bsdFillRow7( 2171 u8 *ref, 2172 u8 *fill, 2173 i32 left, 2174 i32 center, 2175 i32 right) 2176 { 2177 u8 tmp; 2178 2179 ASSERT(ref); 2180 ASSERT(fill); 2181 2182 if (left) 2183 tmp = *ref; 2184 2185 for ( ; left; left--) 2186 /*lint -esym(644,tmp) tmp is initialized if used */ 2187 *fill++ = tmp; 2188 2189 for ( ; center; center--) 2190 *fill++ = *ref++; 2191 2192 if (right) 2193 tmp = ref[-1]; 2194 2195 for ( ; right; right--) 2196 /*lint -esym(644,tmp) tmp is initialized if used */ 2197 *fill++ = tmp; 2198 } 2199 #endif 2200 /*------------------------------------------------------------------------------ 2201 2202 Function: h264bsdFillBlock 2203 2204 Functional description: 2205 This function gets a block of reference pels. It determines whether 2206 overfilling is needed or not and repeatedly calls an appropriate 2207 function (by using a function pointer) that fills one row the block. 2208 Inputs: 2209 ref pointer to reference frame 2210 x0 x-coordinate for block 2211 y0 y-coordinate for block 2212 width width of reference frame 2213 height height of reference frame 2214 blockWidth width of block 2215 blockHeight height of block 2216 fillScanLength length of a line in output array (pixels) 2217 Outputs: 2218 fill pointer to array where output block is written 2219 2220 ------------------------------------------------------------------------------*/ 2221 2222 void h264bsdFillBlock( 2223 u8 *ref, 2224 u8 *fill, 2225 i32 x0, 2226 i32 y0, 2227 u32 width, 2228 u32 height, 2229 u32 blockWidth, 2230 u32 blockHeight, 2231 u32 fillScanLength) 2232 2233 { 2234 2235 /* Variables */ 2236 2237 i32 xstop, ystop; 2238 void (*fp)(u8*, u8*, i32, i32, i32); 2239 i32 left, x, right; 2240 i32 top, y, bottom; 2241 2242 /* Code */ 2243 2244 ASSERT(ref); 2245 ASSERT(fill); 2246 ASSERT(width); 2247 ASSERT(height); 2248 ASSERT(fill); 2249 ASSERT(blockWidth); 2250 ASSERT(blockHeight); 2251 2252 xstop = x0 + (i32)blockWidth; 2253 ystop = y0 + (i32)blockHeight; 2254 2255 /* Choose correct function whether overfilling on left-edge or right-edge 2256 * is needed or not */ 2257 if (x0 >= 0 && xstop <= (i32)width) 2258 fp = FillRow1; 2259 else 2260 fp = h264bsdFillRow7; 2261 2262 if (ystop < 0) 2263 y0 = -(i32)blockHeight; 2264 2265 if (xstop < 0) 2266 x0 = -(i32)blockWidth; 2267 2268 if (y0 > (i32)height) 2269 y0 = (i32)height; 2270 2271 if (x0 > (i32)width) 2272 x0 = (i32)width; 2273 2274 xstop = x0 + (i32)blockWidth; 2275 ystop = y0 + (i32)blockHeight; 2276 2277 if (x0 > 0) 2278 ref += x0; 2279 2280 if (y0 > 0) 2281 ref += y0 * (i32)width; 2282 2283 left = x0 < 0 ? -x0 : 0; 2284 right = xstop > (i32)width ? xstop - (i32)width : 0; 2285 x = (i32)blockWidth - left - right; 2286 2287 top = y0 < 0 ? -y0 : 0; 2288 bottom = ystop > (i32)height ? ystop - (i32)height : 0; 2289 y = (i32)blockHeight - top - bottom; 2290 2291 /* Top-overfilling */ 2292 for ( ; top; top-- ) 2293 { 2294 (*fp)(ref, fill, left, x, right); 2295 fill += fillScanLength; 2296 } 2297 2298 /* Lines inside reference image */ 2299 for ( ; y; y-- ) 2300 { 2301 (*fp)(ref, fill, left, x, right); 2302 ref += width; 2303 fill += fillScanLength; 2304 } 2305 2306 ref -= width; 2307 2308 /* Bottom-overfilling */ 2309 for ( ; bottom; bottom-- ) 2310 { 2311 (*fp)(ref, fill, left, x, right); 2312 fill += fillScanLength; 2313 } 2314 } 2315 2316 /*lint +e701 +e702 */ 2317 2318 2319