1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /*------------------------------------------------------------------------------ 18 19 Table of contents 20 21 1. Include headers 22 2. External compiler flags 23 3. Module defines 24 4. Local function prototypes 25 5. Functions 26 27 ------------------------------------------------------------------------------*/ 28 29 /*------------------------------------------------------------------------------ 30 1. Include headers 31 ------------------------------------------------------------------------------*/ 32 33 #include "basetype.h" 34 #include "h264bsd_reconstruct.h" 35 #include "h264bsd_macroblock_layer.h" 36 #include "h264bsd_image.h" 37 #include "h264bsd_util.h" 38 39 #ifdef H264DEC_OMXDL 40 #include "omxtypes.h" 41 #include "omxVC.h" 42 #include "armVC.h" 43 #endif /* H264DEC_OMXDL */ 44 45 /*------------------------------------------------------------------------------ 46 2. External compiler flags 47 -------------------------------------------------------------------------------- 48 49 -------------------------------------------------------------------------------- 50 3. Module defines 51 ------------------------------------------------------------------------------*/ 52 53 /* Switch off the following Lint messages for this file: 54 * Info 701: Shift left of signed quantity (int) 55 * Info 702: Shift right of signed quantity (int) 56 */ 57 /*lint -e701 -e702 */ 58 59 /* Luma fractional-sample positions 60 * 61 * G a b c H 62 * d e f g 63 * h i j k m 64 * n p q r 65 * M s N 66 * 67 * G, H, M and N are integer sample positions 68 * a-s are fractional samples that need to be interpolated. 69 */ 70 #ifndef H264DEC_OMXDL 71 static const u32 lumaFracPos[4][4] = { 72 /* G d h n a e i p b f j q c g k r */ 73 {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}}; 74 #endif /* H264DEC_OMXDL */ 75 76 /* clipping table, defined in h264bsd_intra_prediction.c */ 77 extern const u8 h264bsdClip[]; 78 79 /*------------------------------------------------------------------------------ 80 4. Local function prototypes 81 ------------------------------------------------------------------------------*/ 82 83 #ifndef H264DEC_OMXDL 84 85 /*------------------------------------------------------------------------------ 86 87 Function: h264bsdInterpolateChromaHor 88 89 Functional description: 90 This function performs chroma interpolation in horizontal direction. 91 Overfilling is done only if needed. Reference image (pRef) is 92 read at correct position and the predicted part is written to 93 macroblock's chrominance (predPartChroma) 94 Inputs: 95 pRef pointer to reference frame Cb top-left corner 96 x0 integer x-coordinate for prediction 97 y0 integer y-coordinate for prediction 98 width width of the reference frame chrominance in pixels 99 height height of the reference frame chrominance in pixels 100 xFrac horizontal fraction for prediction in 1/8 pixels 101 chromaPartWidth width of the predicted part in pixels 102 chromaPartHeight height of the predicted part in pixels 103 Outputs: 104 predPartChroma pointer where predicted part is written 105 106 ------------------------------------------------------------------------------*/ 107 #ifndef H264DEC_ARM11 108 void h264bsdInterpolateChromaHor( 109 u8 *pRef, 110 u8 *predPartChroma, 111 i32 x0, 112 i32 y0, 113 u32 width, 114 u32 height, 115 u32 xFrac, 116 u32 chromaPartWidth, 117 u32 chromaPartHeight) 118 { 119 120 /* Variables */ 121 122 u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val; 123 u8 *ptrA, *cbr; 124 u32 comp; 125 u8 block[9*8*2]; 126 127 /* Code */ 128 129 ASSERT(predPartChroma); 130 ASSERT(chromaPartWidth); 131 ASSERT(chromaPartHeight); 132 ASSERT(xFrac < 8); 133 ASSERT(pRef); 134 135 if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || 136 (y0 < 0) || ((u32)y0+chromaPartHeight > height)) 137 { 138 h264bsdFillBlock(pRef, block, x0, y0, width, height, 139 chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1); 140 pRef += width * height; 141 h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight, 142 x0, y0, width, height, chromaPartWidth + 1, 143 chromaPartHeight, chromaPartWidth + 1); 144 145 pRef = block; 146 x0 = 0; 147 y0 = 0; 148 width = chromaPartWidth+1; 149 height = chromaPartHeight; 150 } 151 152 val = 8 - xFrac; 153 154 for (comp = 0; comp <= 1; comp++) 155 { 156 157 ptrA = pRef + (comp * height + (u32)y0) * width + x0; 158 cbr = predPartChroma + comp * 8 * 8; 159 160 /* 2x2 pels per iteration 161 * bilinear horizontal interpolation */ 162 for (y = (chromaPartHeight >> 1); y; y--) 163 { 164 for (x = (chromaPartWidth >> 1); x; x--) 165 { 166 tmp1 = ptrA[width]; 167 tmp2 = *ptrA++; 168 tmp3 = ptrA[width]; 169 tmp4 = *ptrA++; 170 c = ((val * tmp1 + xFrac * tmp3) << 3) + 32; 171 c >>= 6; 172 cbr[8] = (u8)c; 173 c = ((val * tmp2 + xFrac * tmp4) << 3) + 32; 174 c >>= 6; 175 *cbr++ = (u8)c; 176 tmp1 = ptrA[width]; 177 tmp2 = *ptrA; 178 c = ((val * tmp3 + xFrac * tmp1) << 3) + 32; 179 c >>= 6; 180 cbr[8] = (u8)c; 181 c = ((val * tmp4 + xFrac * tmp2) << 3) + 32; 182 c >>= 6; 183 *cbr++ = (u8)c; 184 } 185 cbr += 2*8 - chromaPartWidth; 186 ptrA += 2*width - chromaPartWidth; 187 } 188 } 189 190 } 191 192 /*------------------------------------------------------------------------------ 193 194 Function: h264bsdInterpolateChromaVer 195 196 Functional description: 197 This function performs chroma interpolation in vertical direction. 198 Overfilling is done only if needed. Reference image (pRef) is 199 read at correct position and the predicted part is written to 200 macroblock's chrominance (predPartChroma) 201 202 ------------------------------------------------------------------------------*/ 203 204 void h264bsdInterpolateChromaVer( 205 u8 *pRef, 206 u8 *predPartChroma, 207 i32 x0, 208 i32 y0, 209 u32 width, 210 u32 height, 211 u32 yFrac, 212 u32 chromaPartWidth, 213 u32 chromaPartHeight) 214 { 215 216 /* Variables */ 217 218 u32 x, y, tmp1, tmp2, tmp3, c, val; 219 u8 *ptrA, *cbr; 220 u32 comp; 221 u8 block[9*8*2]; 222 223 /* Code */ 224 225 ASSERT(predPartChroma); 226 ASSERT(chromaPartWidth); 227 ASSERT(chromaPartHeight); 228 ASSERT(yFrac < 8); 229 ASSERT(pRef); 230 231 if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) || 232 (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) 233 { 234 h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth, 235 chromaPartHeight + 1, chromaPartWidth); 236 pRef += width * height; 237 h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1), 238 x0, y0, width, height, chromaPartWidth, 239 chromaPartHeight + 1, chromaPartWidth); 240 241 pRef = block; 242 x0 = 0; 243 y0 = 0; 244 width = chromaPartWidth; 245 height = chromaPartHeight+1; 246 } 247 248 val = 8 - yFrac; 249 250 for (comp = 0; comp <= 1; comp++) 251 { 252 253 ptrA = pRef + (comp * height + (u32)y0) * width + x0; 254 cbr = predPartChroma + comp * 8 * 8; 255 256 /* 2x2 pels per iteration 257 * bilinear vertical interpolation */ 258 for (y = (chromaPartHeight >> 1); y; y--) 259 { 260 for (x = (chromaPartWidth >> 1); x; x--) 261 { 262 tmp3 = ptrA[width*2]; 263 tmp2 = ptrA[width]; 264 tmp1 = *ptrA++; 265 c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; 266 c >>= 6; 267 cbr[8] = (u8)c; 268 c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; 269 c >>= 6; 270 *cbr++ = (u8)c; 271 tmp3 = ptrA[width*2]; 272 tmp2 = ptrA[width]; 273 tmp1 = *ptrA++; 274 c = ((val * tmp2 + yFrac * tmp3) << 3) + 32; 275 c >>= 6; 276 cbr[8] = (u8)c; 277 c = ((val * tmp1 + yFrac * tmp2) << 3) + 32; 278 c >>= 6; 279 *cbr++ = (u8)c; 280 } 281 cbr += 2*8 - chromaPartWidth; 282 ptrA += 2*width - chromaPartWidth; 283 } 284 } 285 286 } 287 #endif 288 /*------------------------------------------------------------------------------ 289 290 Function: h264bsdInterpolateChromaHorVer 291 292 Functional description: 293 This function performs chroma interpolation in horizontal and 294 vertical direction. Overfilling is done only if needed. Reference 295 image (ref) is read at correct position and the predicted part 296 is written to macroblock's chrominance (predPartChroma) 297 298 ------------------------------------------------------------------------------*/ 299 300 void h264bsdInterpolateChromaHorVer( 301 u8 *ref, 302 u8 *predPartChroma, 303 i32 x0, 304 i32 y0, 305 u32 width, 306 u32 height, 307 u32 xFrac, 308 u32 yFrac, 309 u32 chromaPartWidth, 310 u32 chromaPartHeight) 311 { 312 u8 block[9*9*2]; 313 u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32; 314 u32 comp; 315 u8 *ptrA, *cbr; 316 317 /* Code */ 318 319 ASSERT(predPartChroma); 320 ASSERT(chromaPartWidth); 321 ASSERT(chromaPartHeight); 322 ASSERT(xFrac < 8); 323 ASSERT(yFrac < 8); 324 ASSERT(ref); 325 326 if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) || 327 (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height)) 328 { 329 h264bsdFillBlock(ref, block, x0, y0, width, height, 330 chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1); 331 ref += width * height; 332 h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1), 333 x0, y0, width, height, chromaPartWidth + 1, 334 chromaPartHeight + 1, chromaPartWidth + 1); 335 336 ref = block; 337 x0 = 0; 338 y0 = 0; 339 width = chromaPartWidth+1; 340 height = chromaPartHeight+1; 341 } 342 343 valX = 8 - xFrac; 344 valY = 8 - yFrac; 345 346 for (comp = 0; comp <= 1; comp++) 347 { 348 349 ptrA = ref + (comp * height + (u32)y0) * width + x0; 350 cbr = predPartChroma + comp * 8 * 8; 351 352 /* 2x2 pels per iteration 353 * bilinear vertical and horizontal interpolation */ 354 for (y = (chromaPartHeight >> 1); y; y--) 355 { 356 tmp1 = *ptrA; 357 tmp3 = ptrA[width]; 358 tmp5 = ptrA[width*2]; 359 tmp1 *= valY; 360 tmp1 += tmp3 * yFrac; 361 tmp3 *= valY; 362 tmp3 += tmp5 * yFrac; 363 for (x = (chromaPartWidth >> 1); x; x--) 364 { 365 tmp2 = *++ptrA; 366 tmp4 = ptrA[width]; 367 tmp6 = ptrA[width*2]; 368 tmp2 *= valY; 369 tmp2 += tmp4 * yFrac; 370 tmp4 *= valY; 371 tmp4 += tmp6 * yFrac; 372 tmp1 = tmp1 * valX + plus32; 373 tmp3 = tmp3 * valX + plus32; 374 tmp1 += tmp2 * xFrac; 375 tmp1 >>= 6; 376 tmp3 += tmp4 * xFrac; 377 tmp3 >>= 6; 378 cbr[8] = (u8)tmp3; 379 *cbr++ = (u8)tmp1; 380 381 tmp1 = *++ptrA; 382 tmp3 = ptrA[width]; 383 tmp5 = ptrA[width*2]; 384 tmp1 *= valY; 385 tmp1 += tmp3 * yFrac; 386 tmp3 *= valY; 387 tmp3 += tmp5 * yFrac; 388 tmp2 = tmp2 * valX + plus32; 389 tmp4 = tmp4 * valX + plus32; 390 tmp2 += tmp1 * xFrac; 391 tmp2 >>= 6; 392 tmp4 += tmp3 * xFrac; 393 tmp4 >>= 6; 394 cbr[8] = (u8)tmp4; 395 *cbr++ = (u8)tmp2; 396 } 397 cbr += 2*8 - chromaPartWidth; 398 ptrA += 2*width - chromaPartWidth; 399 } 400 } 401 402 } 403 404 /*------------------------------------------------------------------------------ 405 406 Function: PredictChroma 407 408 Functional description: 409 Top level chroma prediction function that calls the appropriate 410 interpolation function. The output is written to macroblock array. 411 412 ------------------------------------------------------------------------------*/ 413 414 static void PredictChroma( 415 u8 *mbPartChroma, 416 u32 xAL, 417 u32 yAL, 418 u32 partWidth, 419 u32 partHeight, 420 mv_t *mv, 421 image_t *refPic) 422 { 423 424 /* Variables */ 425 426 u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight; 427 i32 xInt, yInt; 428 u8 *ref; 429 430 /* Code */ 431 432 ASSERT(mv); 433 ASSERT(refPic); 434 ASSERT(refPic->data); 435 ASSERT(refPic->width); 436 ASSERT(refPic->height); 437 438 width = 8 * refPic->width; 439 height = 8 * refPic->height; 440 441 xInt = (xAL >> 1) + (mv->hor >> 3); 442 yInt = (yAL >> 1) + (mv->ver >> 3); 443 xFrac = mv->hor & 0x7; 444 yFrac = mv->ver & 0x7; 445 446 chromaPartWidth = partWidth >> 1; 447 chromaPartHeight = partHeight >> 1; 448 ref = refPic->data + 256 * refPic->width * refPic->height; 449 450 if (xFrac && yFrac) 451 { 452 h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width, 453 height, xFrac, yFrac, chromaPartWidth, chromaPartHeight); 454 } 455 else if (xFrac) 456 { 457 h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width, 458 height, xFrac, chromaPartWidth, chromaPartHeight); 459 } 460 else if (yFrac) 461 { 462 h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width, 463 height, yFrac, chromaPartWidth, chromaPartHeight); 464 } 465 else 466 { 467 h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height, 468 chromaPartWidth, chromaPartHeight, 8); 469 ref += width * height; 470 h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height, 471 chromaPartWidth, chromaPartHeight, 8); 472 } 473 474 } 475 476 477 /*------------------------------------------------------------------------------ 478 479 Function: h264bsdInterpolateVerHalf 480 481 Functional description: 482 Function to perform vertical interpolation of pixel position 'h' 483 for a block. Overfilling is done only if needed. Reference 484 image (ref) is read at correct position and the predicted part 485 is written to macroblock array (mb) 486 487 ------------------------------------------------------------------------------*/ 488 #ifndef H264DEC_ARM11 489 void h264bsdInterpolateVerHalf( 490 u8 *ref, 491 u8 *mb, 492 i32 x0, 493 i32 y0, 494 u32 width, 495 u32 height, 496 u32 partWidth, 497 u32 partHeight) 498 { 499 u32 p1[21*21/4+1]; 500 u32 i, j; 501 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 502 u8 *ptrC, *ptrV; 503 const u8 *clp = h264bsdClip + 512; 504 505 /* Code */ 506 507 ASSERT(ref); 508 ASSERT(mb); 509 510 if ((x0 < 0) || ((u32)x0+partWidth > width) || 511 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 512 { 513 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 514 partWidth, partHeight+5, partWidth); 515 516 x0 = 0; 517 y0 = 0; 518 ref = (u8*)p1; 519 width = partWidth; 520 } 521 522 ref += (u32)y0 * width + (u32)x0; 523 524 ptrC = ref + width; 525 ptrV = ptrC + 5*width; 526 527 /* 4 pixels per iteration, interpolate using 5 vertical samples */ 528 for (i = (partHeight >> 2); i; i--) 529 { 530 /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ 531 for (j = partWidth; j; j--) 532 { 533 tmp4 = ptrV[-(i32)width*2]; 534 tmp5 = ptrV[-(i32)width]; 535 tmp1 = ptrV[width]; 536 tmp2 = ptrV[width*2]; 537 tmp6 = *ptrV++; 538 539 tmp7 = tmp4 + tmp1; 540 tmp2 -= (tmp7 << 2); 541 tmp2 -= tmp7; 542 tmp2 += 16; 543 tmp7 = tmp5 + tmp6; 544 tmp3 = ptrC[width*2]; 545 tmp2 += (tmp7 << 4); 546 tmp2 += (tmp7 << 2); 547 tmp2 += tmp3; 548 tmp2 = clp[tmp2>>5]; 549 tmp1 += 16; 550 mb[48] = (u8)tmp2; 551 552 tmp7 = tmp3 + tmp6; 553 tmp1 -= (tmp7 << 2); 554 tmp1 -= tmp7; 555 tmp7 = tmp4 + tmp5; 556 tmp2 = ptrC[width]; 557 tmp1 += (tmp7 << 4); 558 tmp1 += (tmp7 << 2); 559 tmp1 += tmp2; 560 tmp1 = clp[tmp1>>5]; 561 tmp6 += 16; 562 mb[32] = (u8)tmp1; 563 564 tmp7 = tmp2 + tmp5; 565 tmp6 -= (tmp7 << 2); 566 tmp6 -= tmp7; 567 tmp7 = tmp4 + tmp3; 568 tmp1 = *ptrC; 569 tmp6 += (tmp7 << 4); 570 tmp6 += (tmp7 << 2); 571 tmp6 += tmp1; 572 tmp6 = clp[tmp6>>5]; 573 tmp5 += 16; 574 mb[16] = (u8)tmp6; 575 576 tmp1 += tmp4; 577 tmp5 -= (tmp1 << 2); 578 tmp5 -= tmp1; 579 tmp3 += tmp2; 580 tmp6 = ptrC[-(i32)width]; 581 tmp5 += (tmp3 << 4); 582 tmp5 += (tmp3 << 2); 583 tmp5 += tmp6; 584 tmp5 = clp[tmp5>>5]; 585 *mb++ = (u8)tmp5; 586 ptrC++; 587 } 588 ptrC += 4*width - partWidth; 589 ptrV += 4*width - partWidth; 590 mb += 4*16 - partWidth; 591 } 592 593 } 594 595 /*------------------------------------------------------------------------------ 596 597 Function: h264bsdInterpolateVerQuarter 598 599 Functional description: 600 Function to perform vertical interpolation of pixel position 'd' 601 or 'n' for a block. Overfilling is done only if needed. Reference 602 image (ref) is read at correct position and the predicted part 603 is written to macroblock array (mb) 604 605 ------------------------------------------------------------------------------*/ 606 607 void h264bsdInterpolateVerQuarter( 608 u8 *ref, 609 u8 *mb, 610 i32 x0, 611 i32 y0, 612 u32 width, 613 u32 height, 614 u32 partWidth, 615 u32 partHeight, 616 u32 verOffset) /* 0 for pixel d, 1 for pixel n */ 617 { 618 u32 p1[21*21/4+1]; 619 u32 i, j; 620 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 621 u8 *ptrC, *ptrV, *ptrInt; 622 const u8 *clp = h264bsdClip + 512; 623 624 /* Code */ 625 626 ASSERT(ref); 627 ASSERT(mb); 628 629 if ((x0 < 0) || ((u32)x0+partWidth > width) || 630 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 631 { 632 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 633 partWidth, partHeight+5, partWidth); 634 635 x0 = 0; 636 y0 = 0; 637 ref = (u8*)p1; 638 width = partWidth; 639 } 640 641 ref += (u32)y0 * width + (u32)x0; 642 643 ptrC = ref + width; 644 ptrV = ptrC + 5*width; 645 646 /* Pointer to integer sample position, either M or R */ 647 ptrInt = ptrC + (2+verOffset)*width; 648 649 /* 4 pixels per iteration 650 * interpolate using 5 vertical samples and average between 651 * interpolated value and integer sample value */ 652 for (i = (partHeight >> 2); i; i--) 653 { 654 /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */ 655 for (j = partWidth; j; j--) 656 { 657 tmp4 = ptrV[-(i32)width*2]; 658 tmp5 = ptrV[-(i32)width]; 659 tmp1 = ptrV[width]; 660 tmp2 = ptrV[width*2]; 661 tmp6 = *ptrV++; 662 663 tmp7 = tmp4 + tmp1; 664 tmp2 -= (tmp7 << 2); 665 tmp2 -= tmp7; 666 tmp2 += 16; 667 tmp7 = tmp5 + tmp6; 668 tmp3 = ptrC[width*2]; 669 tmp2 += (tmp7 << 4); 670 tmp2 += (tmp7 << 2); 671 tmp2 += tmp3; 672 tmp2 = clp[tmp2>>5]; 673 tmp7 = ptrInt[width*2]; 674 tmp1 += 16; 675 tmp2++; 676 mb[48] = (u8)((tmp2 + tmp7) >> 1); 677 678 tmp7 = tmp3 + tmp6; 679 tmp1 -= (tmp7 << 2); 680 tmp1 -= tmp7; 681 tmp7 = tmp4 + tmp5; 682 tmp2 = ptrC[width]; 683 tmp1 += (tmp7 << 4); 684 tmp1 += (tmp7 << 2); 685 tmp1 += tmp2; 686 tmp1 = clp[tmp1>>5]; 687 tmp7 = ptrInt[width]; 688 tmp6 += 16; 689 tmp1++; 690 mb[32] = (u8)((tmp1 + tmp7) >> 1); 691 692 tmp7 = tmp2 + tmp5; 693 tmp6 -= (tmp7 << 2); 694 tmp6 -= tmp7; 695 tmp7 = tmp4 + tmp3; 696 tmp1 = *ptrC; 697 tmp6 += (tmp7 << 4); 698 tmp6 += (tmp7 << 2); 699 tmp6 += tmp1; 700 tmp6 = clp[tmp6>>5]; 701 tmp7 = *ptrInt; 702 tmp5 += 16; 703 tmp6++; 704 mb[16] = (u8)((tmp6 + tmp7) >> 1); 705 706 tmp1 += tmp4; 707 tmp5 -= (tmp1 << 2); 708 tmp5 -= tmp1; 709 tmp3 += tmp2; 710 tmp6 = ptrC[-(i32)width]; 711 tmp5 += (tmp3 << 4); 712 tmp5 += (tmp3 << 2); 713 tmp5 += tmp6; 714 tmp5 = clp[tmp5>>5]; 715 tmp7 = ptrInt[-(i32)width]; 716 tmp5++; 717 *mb++ = (u8)((tmp5 + tmp7) >> 1); 718 ptrC++; 719 ptrInt++; 720 } 721 ptrC += 4*width - partWidth; 722 ptrV += 4*width - partWidth; 723 ptrInt += 4*width - partWidth; 724 mb += 4*16 - partWidth; 725 } 726 727 } 728 729 /*------------------------------------------------------------------------------ 730 731 Function: h264bsdInterpolateHorHalf 732 733 Functional description: 734 Function to perform horizontal interpolation of pixel position 'b' 735 for a block. Overfilling is done only if needed. Reference 736 image (ref) is read at correct position and the predicted part 737 is written to macroblock array (mb) 738 739 ------------------------------------------------------------------------------*/ 740 741 void h264bsdInterpolateHorHalf( 742 u8 *ref, 743 u8 *mb, 744 i32 x0, 745 i32 y0, 746 u32 width, 747 u32 height, 748 u32 partWidth, 749 u32 partHeight) 750 { 751 u32 p1[21*21/4+1]; 752 u8 *ptrJ; 753 u32 x, y; 754 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 755 const u8 *clp = h264bsdClip + 512; 756 757 /* Code */ 758 759 ASSERT(ref); 760 ASSERT(mb); 761 ASSERT((partWidth&0x3) == 0); 762 ASSERT((partHeight&0x3) == 0); 763 764 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 765 (y0 < 0) || ((u32)y0+partHeight > height)) 766 { 767 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 768 partWidth+5, partHeight, partWidth+5); 769 770 x0 = 0; 771 y0 = 0; 772 ref = (u8*)p1; 773 width = partWidth + 5; 774 } 775 776 ref += (u32)y0 * width + (u32)x0; 777 778 ptrJ = ref + 5; 779 780 for (y = partHeight; y; y--) 781 { 782 tmp6 = *(ptrJ - 5); 783 tmp5 = *(ptrJ - 4); 784 tmp4 = *(ptrJ - 3); 785 tmp3 = *(ptrJ - 2); 786 tmp2 = *(ptrJ - 1); 787 788 /* calculate 4 pels per iteration */ 789 for (x = (partWidth >> 2); x; x--) 790 { 791 /* First pixel */ 792 tmp6 += 16; 793 tmp7 = tmp3 + tmp4; 794 tmp6 += (tmp7 << 4); 795 tmp6 += (tmp7 << 2); 796 tmp7 = tmp2 + tmp5; 797 tmp1 = *ptrJ++; 798 tmp6 -= (tmp7 << 2); 799 tmp6 -= tmp7; 800 tmp6 += tmp1; 801 tmp6 = clp[tmp6>>5]; 802 /* Second pixel */ 803 tmp5 += 16; 804 tmp7 = tmp2 + tmp3; 805 *mb++ = (u8)tmp6; 806 tmp5 += (tmp7 << 4); 807 tmp5 += (tmp7 << 2); 808 tmp7 = tmp1 + tmp4; 809 tmp6 = *ptrJ++; 810 tmp5 -= (tmp7 << 2); 811 tmp5 -= tmp7; 812 tmp5 += tmp6; 813 tmp5 = clp[tmp5>>5]; 814 /* Third pixel */ 815 tmp4 += 16; 816 tmp7 = tmp1 + tmp2; 817 *mb++ = (u8)tmp5; 818 tmp4 += (tmp7 << 4); 819 tmp4 += (tmp7 << 2); 820 tmp7 = tmp6 + tmp3; 821 tmp5 = *ptrJ++; 822 tmp4 -= (tmp7 << 2); 823 tmp4 -= tmp7; 824 tmp4 += tmp5; 825 tmp4 = clp[tmp4>>5]; 826 /* Fourth pixel */ 827 tmp3 += 16; 828 tmp7 = tmp6 + tmp1; 829 *mb++ = (u8)tmp4; 830 tmp3 += (tmp7 << 4); 831 tmp3 += (tmp7 << 2); 832 tmp7 = tmp5 + tmp2; 833 tmp4 = *ptrJ++; 834 tmp3 -= (tmp7 << 2); 835 tmp3 -= tmp7; 836 tmp3 += tmp4; 837 tmp3 = clp[tmp3>>5]; 838 tmp7 = tmp4; 839 tmp4 = tmp6; 840 tmp6 = tmp2; 841 tmp2 = tmp7; 842 *mb++ = (u8)tmp3; 843 tmp3 = tmp5; 844 tmp5 = tmp1; 845 } 846 ptrJ += width - partWidth; 847 mb += 16 - partWidth; 848 } 849 850 } 851 852 /*------------------------------------------------------------------------------ 853 854 Function: h264bsdInterpolateHorQuarter 855 856 Functional description: 857 Function to perform horizontal interpolation of pixel position 'a' 858 or 'c' for a block. Overfilling is done only if needed. Reference 859 image (ref) is read at correct position and the predicted part 860 is written to macroblock array (mb) 861 862 ------------------------------------------------------------------------------*/ 863 864 void h264bsdInterpolateHorQuarter( 865 u8 *ref, 866 u8 *mb, 867 i32 x0, 868 i32 y0, 869 u32 width, 870 u32 height, 871 u32 partWidth, 872 u32 partHeight, 873 u32 horOffset) /* 0 for pixel a, 1 for pixel c */ 874 { 875 u32 p1[21*21/4+1]; 876 u8 *ptrJ; 877 u32 x, y; 878 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 879 const u8 *clp = h264bsdClip + 512; 880 881 /* Code */ 882 883 ASSERT(ref); 884 ASSERT(mb); 885 886 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 887 (y0 < 0) || ((u32)y0+partHeight > height)) 888 { 889 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 890 partWidth+5, partHeight, partWidth+5); 891 892 x0 = 0; 893 y0 = 0; 894 ref = (u8*)p1; 895 width = partWidth + 5; 896 } 897 898 ref += (u32)y0 * width + (u32)x0; 899 900 ptrJ = ref + 5; 901 902 for (y = partHeight; y; y--) 903 { 904 tmp6 = *(ptrJ - 5); 905 tmp5 = *(ptrJ - 4); 906 tmp4 = *(ptrJ - 3); 907 tmp3 = *(ptrJ - 2); 908 tmp2 = *(ptrJ - 1); 909 910 /* calculate 4 pels per iteration */ 911 for (x = (partWidth >> 2); x; x--) 912 { 913 /* First pixel */ 914 tmp6 += 16; 915 tmp7 = tmp3 + tmp4; 916 tmp6 += (tmp7 << 4); 917 tmp6 += (tmp7 << 2); 918 tmp7 = tmp2 + tmp5; 919 tmp1 = *ptrJ++; 920 tmp6 -= (tmp7 << 2); 921 tmp6 -= tmp7; 922 tmp6 += tmp1; 923 tmp6 = clp[tmp6>>5]; 924 tmp5 += 16; 925 if (!horOffset) 926 tmp6 += tmp4; 927 else 928 tmp6 += tmp3; 929 *mb++ = (u8)((tmp6 + 1) >> 1); 930 /* Second pixel */ 931 tmp7 = tmp2 + tmp3; 932 tmp5 += (tmp7 << 4); 933 tmp5 += (tmp7 << 2); 934 tmp7 = tmp1 + tmp4; 935 tmp6 = *ptrJ++; 936 tmp5 -= (tmp7 << 2); 937 tmp5 -= tmp7; 938 tmp5 += tmp6; 939 tmp5 = clp[tmp5>>5]; 940 tmp4 += 16; 941 if (!horOffset) 942 tmp5 += tmp3; 943 else 944 tmp5 += tmp2; 945 *mb++ = (u8)((tmp5 + 1) >> 1); 946 /* Third pixel */ 947 tmp7 = tmp1 + tmp2; 948 tmp4 += (tmp7 << 4); 949 tmp4 += (tmp7 << 2); 950 tmp7 = tmp6 + tmp3; 951 tmp5 = *ptrJ++; 952 tmp4 -= (tmp7 << 2); 953 tmp4 -= tmp7; 954 tmp4 += tmp5; 955 tmp4 = clp[tmp4>>5]; 956 tmp3 += 16; 957 if (!horOffset) 958 tmp4 += tmp2; 959 else 960 tmp4 += tmp1; 961 *mb++ = (u8)((tmp4 + 1) >> 1); 962 /* Fourth pixel */ 963 tmp7 = tmp6 + tmp1; 964 tmp3 += (tmp7 << 4); 965 tmp3 += (tmp7 << 2); 966 tmp7 = tmp5 + tmp2; 967 tmp4 = *ptrJ++; 968 tmp3 -= (tmp7 << 2); 969 tmp3 -= tmp7; 970 tmp3 += tmp4; 971 tmp3 = clp[tmp3>>5]; 972 if (!horOffset) 973 tmp3 += tmp1; 974 else 975 tmp3 += tmp6; 976 *mb++ = (u8)((tmp3 + 1) >> 1); 977 tmp3 = tmp5; 978 tmp5 = tmp1; 979 tmp7 = tmp4; 980 tmp4 = tmp6; 981 tmp6 = tmp2; 982 tmp2 = tmp7; 983 } 984 ptrJ += width - partWidth; 985 mb += 16 - partWidth; 986 } 987 988 } 989 990 /*------------------------------------------------------------------------------ 991 992 Function: h264bsdInterpolateHorVerQuarter 993 994 Functional description: 995 Function to perform horizontal and vertical interpolation of pixel 996 position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only 997 if needed. Reference image (ref) is read at correct position and 998 the predicted part is written to macroblock array (mb) 999 1000 ------------------------------------------------------------------------------*/ 1001 1002 void h264bsdInterpolateHorVerQuarter( 1003 u8 *ref, 1004 u8 *mb, 1005 i32 x0, 1006 i32 y0, 1007 u32 width, 1008 u32 height, 1009 u32 partWidth, 1010 u32 partHeight, 1011 u32 horVerOffset) /* 0 for pixel e, 1 for pixel g, 1012 2 for pixel p, 3 for pixel r */ 1013 { 1014 u32 p1[21*21/4+1]; 1015 u8 *ptrC, *ptrJ, *ptrV; 1016 u32 x, y; 1017 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1018 const u8 *clp = h264bsdClip + 512; 1019 1020 /* Code */ 1021 1022 ASSERT(ref); 1023 ASSERT(mb); 1024 1025 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 1026 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 1027 { 1028 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 1029 partWidth+5, partHeight+5, partWidth+5); 1030 1031 x0 = 0; 1032 y0 = 0; 1033 ref = (u8*)p1; 1034 width = partWidth+5; 1035 } 1036 1037 /* Ref points to G + (-2, -2) */ 1038 ref += (u32)y0 * width + (u32)x0; 1039 1040 /* ptrJ points to either J or Q, depending on vertical offset */ 1041 ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5; 1042 1043 /* ptrC points to either C or D, depending on horizontal offset */ 1044 ptrC = ref + width + 2 + (horVerOffset & 0x1); 1045 1046 for (y = partHeight; y; y--) 1047 { 1048 tmp6 = *(ptrJ - 5); 1049 tmp5 = *(ptrJ - 4); 1050 tmp4 = *(ptrJ - 3); 1051 tmp3 = *(ptrJ - 2); 1052 tmp2 = *(ptrJ - 1); 1053 1054 /* Horizontal interpolation, calculate 4 pels per iteration */ 1055 for (x = (partWidth >> 2); x; x--) 1056 { 1057 /* First pixel */ 1058 tmp6 += 16; 1059 tmp7 = tmp3 + tmp4; 1060 tmp6 += (tmp7 << 4); 1061 tmp6 += (tmp7 << 2); 1062 tmp7 = tmp2 + tmp5; 1063 tmp1 = *ptrJ++; 1064 tmp6 -= (tmp7 << 2); 1065 tmp6 -= tmp7; 1066 tmp6 += tmp1; 1067 tmp6 = clp[tmp6>>5]; 1068 /* Second pixel */ 1069 tmp5 += 16; 1070 tmp7 = tmp2 + tmp3; 1071 *mb++ = (u8)tmp6; 1072 tmp5 += (tmp7 << 4); 1073 tmp5 += (tmp7 << 2); 1074 tmp7 = tmp1 + tmp4; 1075 tmp6 = *ptrJ++; 1076 tmp5 -= (tmp7 << 2); 1077 tmp5 -= tmp7; 1078 tmp5 += tmp6; 1079 tmp5 = clp[tmp5>>5]; 1080 /* Third pixel */ 1081 tmp4 += 16; 1082 tmp7 = tmp1 + tmp2; 1083 *mb++ = (u8)tmp5; 1084 tmp4 += (tmp7 << 4); 1085 tmp4 += (tmp7 << 2); 1086 tmp7 = tmp6 + tmp3; 1087 tmp5 = *ptrJ++; 1088 tmp4 -= (tmp7 << 2); 1089 tmp4 -= tmp7; 1090 tmp4 += tmp5; 1091 tmp4 = clp[tmp4>>5]; 1092 /* Fourth pixel */ 1093 tmp3 += 16; 1094 tmp7 = tmp6 + tmp1; 1095 *mb++ = (u8)tmp4; 1096 tmp3 += (tmp7 << 4); 1097 tmp3 += (tmp7 << 2); 1098 tmp7 = tmp5 + tmp2; 1099 tmp4 = *ptrJ++; 1100 tmp3 -= (tmp7 << 2); 1101 tmp3 -= tmp7; 1102 tmp3 += tmp4; 1103 tmp3 = clp[tmp3>>5]; 1104 tmp7 = tmp4; 1105 tmp4 = tmp6; 1106 tmp6 = tmp2; 1107 tmp2 = tmp7; 1108 *mb++ = (u8)tmp3; 1109 tmp3 = tmp5; 1110 tmp5 = tmp1; 1111 } 1112 ptrJ += width - partWidth; 1113 mb += 16 - partWidth; 1114 } 1115 1116 mb -= 16*partHeight; 1117 ptrV = ptrC + 5*width; 1118 1119 for (y = (partHeight >> 2); y; y--) 1120 { 1121 /* Vertical interpolation and averaging, 4 pels per iteration */ 1122 for (x = partWidth; x; x--) 1123 { 1124 tmp4 = ptrV[-(i32)width*2]; 1125 tmp5 = ptrV[-(i32)width]; 1126 tmp1 = ptrV[width]; 1127 tmp2 = ptrV[width*2]; 1128 tmp6 = *ptrV++; 1129 1130 tmp7 = tmp4 + tmp1; 1131 tmp2 -= (tmp7 << 2); 1132 tmp2 -= tmp7; 1133 tmp2 += 16; 1134 tmp7 = tmp5 + tmp6; 1135 tmp3 = ptrC[width*2]; 1136 tmp2 += (tmp7 << 4); 1137 tmp2 += (tmp7 << 2); 1138 tmp2 += tmp3; 1139 tmp7 = clp[tmp2>>5]; 1140 tmp2 = mb[48]; 1141 tmp1 += 16; 1142 tmp7++; 1143 mb[48] = (u8)((tmp2 + tmp7) >> 1); 1144 1145 tmp7 = tmp3 + tmp6; 1146 tmp1 -= (tmp7 << 2); 1147 tmp1 -= tmp7; 1148 tmp7 = tmp4 + tmp5; 1149 tmp2 = ptrC[width]; 1150 tmp1 += (tmp7 << 4); 1151 tmp1 += (tmp7 << 2); 1152 tmp1 += tmp2; 1153 tmp7 = clp[tmp1>>5]; 1154 tmp1 = mb[32]; 1155 tmp6 += 16; 1156 tmp7++; 1157 mb[32] = (u8)((tmp1 + tmp7) >> 1); 1158 1159 tmp1 = *ptrC; 1160 tmp7 = tmp2 + tmp5; 1161 tmp6 -= (tmp7 << 2); 1162 tmp6 -= tmp7; 1163 tmp7 = tmp4 + tmp3; 1164 tmp6 += (tmp7 << 4); 1165 tmp6 += (tmp7 << 2); 1166 tmp6 += tmp1; 1167 tmp7 = clp[tmp6>>5]; 1168 tmp6 = mb[16]; 1169 tmp5 += 16; 1170 tmp7++; 1171 mb[16] = (u8)((tmp6 + tmp7) >> 1); 1172 1173 tmp6 = ptrC[-(i32)width]; 1174 tmp1 += tmp4; 1175 tmp5 -= (tmp1 << 2); 1176 tmp5 -= tmp1; 1177 tmp3 += tmp2; 1178 tmp5 += (tmp3 << 4); 1179 tmp5 += (tmp3 << 2); 1180 tmp5 += tmp6; 1181 tmp7 = clp[tmp5>>5]; 1182 tmp5 = *mb; 1183 tmp7++; 1184 *mb++ = (u8)((tmp5 + tmp7) >> 1); 1185 ptrC++; 1186 1187 } 1188 ptrC += 4*width - partWidth; 1189 ptrV += 4*width - partWidth; 1190 mb += 4*16 - partWidth; 1191 } 1192 1193 } 1194 #endif 1195 1196 /*------------------------------------------------------------------------------ 1197 1198 Function: h264bsdInterpolateMidHalf 1199 1200 Functional description: 1201 Function to perform horizontal and vertical interpolation of pixel 1202 position 'j' for a block. Overfilling is done only if needed. 1203 Reference image (ref) is read at correct position and the predicted 1204 part is written to macroblock array (mb) 1205 1206 ------------------------------------------------------------------------------*/ 1207 1208 void h264bsdInterpolateMidHalf( 1209 u8 *ref, 1210 u8 *mb, 1211 i32 x0, 1212 i32 y0, 1213 u32 width, 1214 u32 height, 1215 u32 partWidth, 1216 u32 partHeight) 1217 { 1218 u32 p1[21*21/4+1]; 1219 u32 x, y; 1220 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1221 i32 *ptrC, *ptrV, *b1; 1222 u8 *ptrJ; 1223 i32 table[21*16]; 1224 const u8 *clp = h264bsdClip + 512; 1225 1226 /* Code */ 1227 1228 ASSERT(ref); 1229 ASSERT(mb); 1230 1231 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 1232 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 1233 { 1234 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 1235 partWidth+5, partHeight+5, partWidth+5); 1236 1237 x0 = 0; 1238 y0 = 0; 1239 ref = (u8*)p1; 1240 width = partWidth+5; 1241 } 1242 1243 ref += (u32)y0 * width + (u32)x0; 1244 1245 b1 = table; 1246 ptrJ = ref + 5; 1247 1248 /* First step: calculate intermediate values for 1249 * horizontal interpolation */ 1250 for (y = partHeight + 5; y; y--) 1251 { 1252 tmp6 = *(ptrJ - 5); 1253 tmp5 = *(ptrJ - 4); 1254 tmp4 = *(ptrJ - 3); 1255 tmp3 = *(ptrJ - 2); 1256 tmp2 = *(ptrJ - 1); 1257 1258 /* 4 pels per iteration */ 1259 for (x = (partWidth >> 2); x; x--) 1260 { 1261 /* First pixel */ 1262 tmp7 = tmp3 + tmp4; 1263 tmp6 += (tmp7 << 4); 1264 tmp6 += (tmp7 << 2); 1265 tmp7 = tmp2 + tmp5; 1266 tmp1 = *ptrJ++; 1267 tmp6 -= (tmp7 << 2); 1268 tmp6 -= tmp7; 1269 tmp6 += tmp1; 1270 *b1++ = tmp6; 1271 /* Second pixel */ 1272 tmp7 = tmp2 + tmp3; 1273 tmp5 += (tmp7 << 4); 1274 tmp5 += (tmp7 << 2); 1275 tmp7 = tmp1 + tmp4; 1276 tmp6 = *ptrJ++; 1277 tmp5 -= (tmp7 << 2); 1278 tmp5 -= tmp7; 1279 tmp5 += tmp6; 1280 *b1++ = tmp5; 1281 /* Third pixel */ 1282 tmp7 = tmp1 + tmp2; 1283 tmp4 += (tmp7 << 4); 1284 tmp4 += (tmp7 << 2); 1285 tmp7 = tmp6 + tmp3; 1286 tmp5 = *ptrJ++; 1287 tmp4 -= (tmp7 << 2); 1288 tmp4 -= tmp7; 1289 tmp4 += tmp5; 1290 *b1++ = tmp4; 1291 /* Fourth pixel */ 1292 tmp7 = tmp6 + tmp1; 1293 tmp3 += (tmp7 << 4); 1294 tmp3 += (tmp7 << 2); 1295 tmp7 = tmp5 + tmp2; 1296 tmp4 = *ptrJ++; 1297 tmp3 -= (tmp7 << 2); 1298 tmp3 -= tmp7; 1299 tmp3 += tmp4; 1300 *b1++ = tmp3; 1301 tmp7 = tmp4; 1302 tmp4 = tmp6; 1303 tmp6 = tmp2; 1304 tmp2 = tmp7; 1305 tmp3 = tmp5; 1306 tmp5 = tmp1; 1307 } 1308 ptrJ += width - partWidth; 1309 } 1310 1311 /* Second step: calculate vertical interpolation */ 1312 ptrC = table + partWidth; 1313 ptrV = ptrC + 5*partWidth; 1314 for (y = (partHeight >> 2); y; y--) 1315 { 1316 /* 4 pels per iteration */ 1317 for (x = partWidth; x; x--) 1318 { 1319 tmp4 = ptrV[-(i32)partWidth*2]; 1320 tmp5 = ptrV[-(i32)partWidth]; 1321 tmp1 = ptrV[partWidth]; 1322 tmp2 = ptrV[partWidth*2]; 1323 tmp6 = *ptrV++; 1324 1325 tmp7 = tmp4 + tmp1; 1326 tmp2 -= (tmp7 << 2); 1327 tmp2 -= tmp7; 1328 tmp2 += 512; 1329 tmp7 = tmp5 + tmp6; 1330 tmp3 = ptrC[partWidth*2]; 1331 tmp2 += (tmp7 << 4); 1332 tmp2 += (tmp7 << 2); 1333 tmp2 += tmp3; 1334 tmp7 = clp[tmp2>>10]; 1335 tmp1 += 512; 1336 mb[48] = (u8)tmp7; 1337 1338 tmp7 = tmp3 + tmp6; 1339 tmp1 -= (tmp7 << 2); 1340 tmp1 -= tmp7; 1341 tmp7 = tmp4 + tmp5; 1342 tmp2 = ptrC[partWidth]; 1343 tmp1 += (tmp7 << 4); 1344 tmp1 += (tmp7 << 2); 1345 tmp1 += tmp2; 1346 tmp7 = clp[tmp1>>10]; 1347 tmp6 += 512; 1348 mb[32] = (u8)tmp7; 1349 1350 tmp1 = *ptrC; 1351 tmp7 = tmp2 + tmp5; 1352 tmp6 -= (tmp7 << 2); 1353 tmp6 -= tmp7; 1354 tmp7 = tmp4 + tmp3; 1355 tmp6 += (tmp7 << 4); 1356 tmp6 += (tmp7 << 2); 1357 tmp6 += tmp1; 1358 tmp7 = clp[tmp6>>10]; 1359 tmp5 += 512; 1360 mb[16] = (u8)tmp7; 1361 1362 tmp6 = ptrC[-(i32)partWidth]; 1363 tmp1 += tmp4; 1364 tmp5 -= (tmp1 << 2); 1365 tmp5 -= tmp1; 1366 tmp3 += tmp2; 1367 tmp5 += (tmp3 << 4); 1368 tmp5 += (tmp3 << 2); 1369 tmp5 += tmp6; 1370 tmp7 = clp[tmp5>>10]; 1371 *mb++ = (u8)tmp7; 1372 ptrC++; 1373 } 1374 mb += 4*16 - partWidth; 1375 ptrC += 3*partWidth; 1376 ptrV += 3*partWidth; 1377 } 1378 1379 } 1380 1381 1382 /*------------------------------------------------------------------------------ 1383 1384 Function: h264bsdInterpolateMidVerQuarter 1385 1386 Functional description: 1387 Function to perform horizontal and vertical interpolation of pixel 1388 position 'f' or 'q' for a block. Overfilling is done only if needed. 1389 Reference image (ref) is read at correct position and the predicted 1390 part is written to macroblock array (mb) 1391 1392 ------------------------------------------------------------------------------*/ 1393 1394 void h264bsdInterpolateMidVerQuarter( 1395 u8 *ref, 1396 u8 *mb, 1397 i32 x0, 1398 i32 y0, 1399 u32 width, 1400 u32 height, 1401 u32 partWidth, 1402 u32 partHeight, 1403 u32 verOffset) /* 0 for pixel f, 1 for pixel q */ 1404 { 1405 u32 p1[21*21/4+1]; 1406 u32 x, y; 1407 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1408 i32 *ptrC, *ptrV, *ptrInt, *b1; 1409 u8 *ptrJ; 1410 i32 table[21*16]; 1411 const u8 *clp = h264bsdClip + 512; 1412 1413 /* Code */ 1414 1415 ASSERT(ref); 1416 ASSERT(mb); 1417 1418 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 1419 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 1420 { 1421 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 1422 partWidth+5, partHeight+5, partWidth+5); 1423 1424 x0 = 0; 1425 y0 = 0; 1426 ref = (u8*)p1; 1427 width = partWidth+5; 1428 } 1429 1430 ref += (u32)y0 * width + (u32)x0; 1431 1432 b1 = table; 1433 ptrJ = ref + 5; 1434 1435 /* First step: calculate intermediate values for 1436 * horizontal interpolation */ 1437 for (y = partHeight + 5; y; y--) 1438 { 1439 tmp6 = *(ptrJ - 5); 1440 tmp5 = *(ptrJ - 4); 1441 tmp4 = *(ptrJ - 3); 1442 tmp3 = *(ptrJ - 2); 1443 tmp2 = *(ptrJ - 1); 1444 for (x = (partWidth >> 2); x; x--) 1445 { 1446 /* First pixel */ 1447 tmp7 = tmp3 + tmp4; 1448 tmp6 += (tmp7 << 4); 1449 tmp6 += (tmp7 << 2); 1450 tmp7 = tmp2 + tmp5; 1451 tmp1 = *ptrJ++; 1452 tmp6 -= (tmp7 << 2); 1453 tmp6 -= tmp7; 1454 tmp6 += tmp1; 1455 *b1++ = tmp6; 1456 /* Second pixel */ 1457 tmp7 = tmp2 + tmp3; 1458 tmp5 += (tmp7 << 4); 1459 tmp5 += (tmp7 << 2); 1460 tmp7 = tmp1 + tmp4; 1461 tmp6 = *ptrJ++; 1462 tmp5 -= (tmp7 << 2); 1463 tmp5 -= tmp7; 1464 tmp5 += tmp6; 1465 *b1++ = tmp5; 1466 /* Third pixel */ 1467 tmp7 = tmp1 + tmp2; 1468 tmp4 += (tmp7 << 4); 1469 tmp4 += (tmp7 << 2); 1470 tmp7 = tmp6 + tmp3; 1471 tmp5 = *ptrJ++; 1472 tmp4 -= (tmp7 << 2); 1473 tmp4 -= tmp7; 1474 tmp4 += tmp5; 1475 *b1++ = tmp4; 1476 /* Fourth pixel */ 1477 tmp7 = tmp6 + tmp1; 1478 tmp3 += (tmp7 << 4); 1479 tmp3 += (tmp7 << 2); 1480 tmp7 = tmp5 + tmp2; 1481 tmp4 = *ptrJ++; 1482 tmp3 -= (tmp7 << 2); 1483 tmp3 -= tmp7; 1484 tmp3 += tmp4; 1485 *b1++ = tmp3; 1486 tmp7 = tmp4; 1487 tmp4 = tmp6; 1488 tmp6 = tmp2; 1489 tmp2 = tmp7; 1490 tmp3 = tmp5; 1491 tmp5 = tmp1; 1492 } 1493 ptrJ += width - partWidth; 1494 } 1495 1496 /* Second step: calculate vertical interpolation and average */ 1497 ptrC = table + partWidth; 1498 ptrV = ptrC + 5*partWidth; 1499 /* Pointer to integer sample position, either M or R */ 1500 ptrInt = ptrC + (2+verOffset)*partWidth; 1501 for (y = (partHeight >> 2); y; y--) 1502 { 1503 for (x = partWidth; x; x--) 1504 { 1505 tmp4 = ptrV[-(i32)partWidth*2]; 1506 tmp5 = ptrV[-(i32)partWidth]; 1507 tmp1 = ptrV[partWidth]; 1508 tmp2 = ptrV[partWidth*2]; 1509 tmp6 = *ptrV++; 1510 1511 tmp7 = tmp4 + tmp1; 1512 tmp2 -= (tmp7 << 2); 1513 tmp2 -= tmp7; 1514 tmp2 += 512; 1515 tmp7 = tmp5 + tmp6; 1516 tmp3 = ptrC[partWidth*2]; 1517 tmp2 += (tmp7 << 4); 1518 tmp2 += (tmp7 << 2); 1519 tmp7 = ptrInt[partWidth*2]; 1520 tmp2 += tmp3; 1521 tmp2 = clp[tmp2>>10]; 1522 tmp7 += 16; 1523 tmp7 = clp[tmp7>>5]; 1524 tmp1 += 512; 1525 tmp2++; 1526 mb[48] = (u8)((tmp7 + tmp2) >> 1); 1527 1528 tmp7 = tmp3 + tmp6; 1529 tmp1 -= (tmp7 << 2); 1530 tmp1 -= tmp7; 1531 tmp7 = tmp4 + tmp5; 1532 tmp2 = ptrC[partWidth]; 1533 tmp1 += (tmp7 << 4); 1534 tmp1 += (tmp7 << 2); 1535 tmp7 = ptrInt[partWidth]; 1536 tmp1 += tmp2; 1537 tmp1 = clp[tmp1>>10]; 1538 tmp7 += 16; 1539 tmp7 = clp[tmp7>>5]; 1540 tmp6 += 512; 1541 tmp1++; 1542 mb[32] = (u8)((tmp7 + tmp1) >> 1); 1543 1544 tmp1 = *ptrC; 1545 tmp7 = tmp2 + tmp5; 1546 tmp6 -= (tmp7 << 2); 1547 tmp6 -= tmp7; 1548 tmp7 = tmp4 + tmp3; 1549 tmp6 += (tmp7 << 4); 1550 tmp6 += (tmp7 << 2); 1551 tmp7 = *ptrInt; 1552 tmp6 += tmp1; 1553 tmp6 = clp[tmp6>>10]; 1554 tmp7 += 16; 1555 tmp7 = clp[tmp7>>5]; 1556 tmp5 += 512; 1557 tmp6++; 1558 mb[16] = (u8)((tmp7 + tmp6) >> 1); 1559 1560 tmp6 = ptrC[-(i32)partWidth]; 1561 tmp1 += tmp4; 1562 tmp5 -= (tmp1 << 2); 1563 tmp5 -= tmp1; 1564 tmp3 += tmp2; 1565 tmp5 += (tmp3 << 4); 1566 tmp5 += (tmp3 << 2); 1567 tmp7 = ptrInt[-(i32)partWidth]; 1568 tmp5 += tmp6; 1569 tmp5 = clp[tmp5>>10]; 1570 tmp7 += 16; 1571 tmp7 = clp[tmp7>>5]; 1572 tmp5++; 1573 *mb++ = (u8)((tmp7 + tmp5) >> 1); 1574 ptrC++; 1575 ptrInt++; 1576 } 1577 mb += 4*16 - partWidth; 1578 ptrC += 3*partWidth; 1579 ptrV += 3*partWidth; 1580 ptrInt += 3*partWidth; 1581 } 1582 1583 } 1584 1585 1586 /*------------------------------------------------------------------------------ 1587 1588 Function: h264bsdInterpolateMidHorQuarter 1589 1590 Functional description: 1591 Function to perform horizontal and vertical interpolation of pixel 1592 position 'i' or 'k' for a block. Overfilling is done only if needed. 1593 Reference image (ref) is read at correct position and the predicted 1594 part is written to macroblock array (mb) 1595 1596 ------------------------------------------------------------------------------*/ 1597 1598 void h264bsdInterpolateMidHorQuarter( 1599 u8 *ref, 1600 u8 *mb, 1601 i32 x0, 1602 i32 y0, 1603 u32 width, 1604 u32 height, 1605 u32 partWidth, 1606 u32 partHeight, 1607 u32 horOffset) /* 0 for pixel i, 1 for pixel k */ 1608 { 1609 u32 p1[21*21/4+1]; 1610 u32 x, y; 1611 i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; 1612 i32 *ptrJ, *ptrInt, *h1; 1613 u8 *ptrC, *ptrV; 1614 i32 table[21*16]; 1615 i32 tableWidth = (i32)partWidth+5; 1616 const u8 *clp = h264bsdClip + 512; 1617 1618 /* Code */ 1619 1620 ASSERT(ref); 1621 ASSERT(mb); 1622 1623 if ((x0 < 0) || ((u32)x0+partWidth+5 > width) || 1624 (y0 < 0) || ((u32)y0+partHeight+5 > height)) 1625 { 1626 h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height, 1627 partWidth+5, partHeight+5, partWidth+5); 1628 1629 x0 = 0; 1630 y0 = 0; 1631 ref = (u8*)p1; 1632 width = partWidth+5; 1633 } 1634 1635 ref += (u32)y0 * width + (u32)x0; 1636 1637 h1 = table + tableWidth; 1638 ptrC = ref + width; 1639 ptrV = ptrC + 5*width; 1640 1641 /* First step: calculate intermediate values for 1642 * vertical interpolation */ 1643 for (y = (partHeight >> 2); y; y--) 1644 { 1645 for (x = (u32)tableWidth; x; x--) 1646 { 1647 tmp4 = ptrV[-(i32)width*2]; 1648 tmp5 = ptrV[-(i32)width]; 1649 tmp1 = ptrV[width]; 1650 tmp2 = ptrV[width*2]; 1651 tmp6 = *ptrV++; 1652 1653 tmp7 = tmp4 + tmp1; 1654 tmp2 -= (tmp7 << 2); 1655 tmp2 -= tmp7; 1656 tmp7 = tmp5 + tmp6; 1657 tmp3 = ptrC[width*2]; 1658 tmp2 += (tmp7 << 4); 1659 tmp2 += (tmp7 << 2); 1660 tmp2 += tmp3; 1661 h1[tableWidth*2] = tmp2; 1662 1663 tmp7 = tmp3 + tmp6; 1664 tmp1 -= (tmp7 << 2); 1665 tmp1 -= tmp7; 1666 tmp7 = tmp4 + tmp5; 1667 tmp2 = ptrC[width]; 1668 tmp1 += (tmp7 << 4); 1669 tmp1 += (tmp7 << 2); 1670 tmp1 += tmp2; 1671 h1[tableWidth] = tmp1; 1672 1673 tmp1 = *ptrC; 1674 tmp7 = tmp2 + tmp5; 1675 tmp6 -= (tmp7 << 2); 1676 tmp6 -= tmp7; 1677 tmp7 = tmp4 + tmp3; 1678 tmp6 += (tmp7 << 4); 1679 tmp6 += (tmp7 << 2); 1680 tmp6 += tmp1; 1681 *h1 = tmp6; 1682 1683 tmp6 = ptrC[-(i32)width]; 1684 tmp1 += tmp4; 1685 tmp5 -= (tmp1 << 2); 1686 tmp5 -= tmp1; 1687 tmp3 += tmp2; 1688 tmp5 += (tmp3 << 4); 1689 tmp5 += (tmp3 << 2); 1690 tmp5 += tmp6; 1691 h1[-tableWidth] = tmp5; 1692 h1++; 1693 ptrC++; 1694 } 1695 ptrC += 4*width - partWidth - 5; 1696 ptrV += 4*width - partWidth - 5; 1697 h1 += 3*tableWidth; 1698 } 1699 1700 /* Second step: calculate horizontal interpolation and average */ 1701 ptrJ = table + 5; 1702 /* Pointer to integer sample position, either G or H */ 1703 ptrInt = table + 2 + horOffset; 1704 for (y = partHeight; y; y--) 1705 { 1706 tmp6 = *(ptrJ - 5); 1707 tmp5 = *(ptrJ - 4); 1708 tmp4 = *(ptrJ - 3); 1709 tmp3 = *(ptrJ - 2); 1710 tmp2 = *(ptrJ - 1); 1711 for (x = (partWidth>>2); x; x--) 1712 { 1713 /* First pixel */ 1714 tmp6 += 512; 1715 tmp7 = tmp3 + tmp4; 1716 tmp6 += (tmp7 << 4); 1717 tmp6 += (tmp7 << 2); 1718 tmp7 = tmp2 + tmp5; 1719 tmp1 = *ptrJ++; 1720 tmp6 -= (tmp7 << 2); 1721 tmp6 -= tmp7; 1722 tmp7 = *ptrInt++; 1723 tmp6 += tmp1; 1724 tmp6 = clp[tmp6 >> 10]; 1725 tmp7 += 16; 1726 tmp7 = clp[tmp7 >> 5]; 1727 tmp5 += 512; 1728 tmp6++; 1729 *mb++ = (u8)((tmp6 + tmp7) >> 1); 1730 /* Second pixel */ 1731 tmp7 = tmp2 + tmp3; 1732 tmp5 += (tmp7 << 4); 1733 tmp5 += (tmp7 << 2); 1734 tmp7 = tmp1 + tmp4; 1735 tmp6 = *ptrJ++; 1736 tmp5 -= (tmp7 << 2); 1737 tmp5 -= tmp7; 1738 tmp7 = *ptrInt++; 1739 tmp5 += tmp6; 1740 tmp5 = clp[tmp5 >> 10]; 1741 tmp7 += 16; 1742 tmp7 = clp[tmp7 >> 5]; 1743 tmp4 += 512; 1744 tmp5++; 1745 *mb++ = (u8)((tmp5 + tmp7) >> 1); 1746 /* Third pixel */ 1747 tmp7 = tmp1 + tmp2; 1748 tmp4 += (tmp7 << 4); 1749 tmp4 += (tmp7 << 2); 1750 tmp7 = tmp6 + tmp3; 1751 tmp5 = *ptrJ++; 1752 tmp4 -= (tmp7 << 2); 1753 tmp4 -= tmp7; 1754 tmp7 = *ptrInt++; 1755 tmp4 += tmp5; 1756 tmp4 = clp[tmp4 >> 10]; 1757 tmp7 += 16; 1758 tmp7 = clp[tmp7 >> 5]; 1759 tmp3 += 512; 1760 tmp4++; 1761 *mb++ = (u8)((tmp4 + tmp7) >> 1); 1762 /* Fourth pixel */ 1763 tmp7 = tmp6 + tmp1; 1764 tmp3 += (tmp7 << 4); 1765 tmp3 += (tmp7 << 2); 1766 tmp7 = tmp5 + tmp2; 1767 tmp4 = *ptrJ++; 1768 tmp3 -= (tmp7 << 2); 1769 tmp3 -= tmp7; 1770 tmp7 = *ptrInt++; 1771 tmp3 += tmp4; 1772 tmp3 = clp[tmp3 >> 10]; 1773 tmp7 += 16; 1774 tmp7 = clp[tmp7 >> 5]; 1775 tmp3++; 1776 *mb++ = (u8)((tmp3 + tmp7) >> 1); 1777 tmp3 = tmp5; 1778 tmp5 = tmp1; 1779 tmp7 = tmp4; 1780 tmp4 = tmp6; 1781 tmp6 = tmp2; 1782 tmp2 = tmp7; 1783 } 1784 ptrJ += 5; 1785 ptrInt += 5; 1786 mb += 16 - partWidth; 1787 } 1788 1789 } 1790 1791 1792 /*------------------------------------------------------------------------------ 1793 1794 Function: h264bsdPredictSamples 1795 1796 Functional description: 1797 This function reconstructs a prediction for a macroblock partition. 1798 The prediction is either copied or interpolated using the reference 1799 frame and the motion vector. Both luminance and chrominance parts are 1800 predicted. The prediction is stored in given macroblock array (data). 1801 Inputs: 1802 data pointer to macroblock array (384 bytes) for output 1803 mv pointer to motion vector used for prediction 1804 refPic pointer to reference picture structure 1805 xA x-coordinate for current macroblock 1806 yA y-coordinate for current macroblock 1807 partX x-offset for partition in macroblock 1808 partY y-offset for partition in macroblock 1809 partWidth width of partition 1810 partHeight height of partition 1811 Outputs: 1812 data macroblock array (16x16+8x8+8x8) where predicted 1813 partition is stored at correct position 1814 1815 ------------------------------------------------------------------------------*/ 1816 1817 void h264bsdPredictSamples( 1818 u8 *data, 1819 mv_t *mv, 1820 image_t *refPic, 1821 u32 xA, 1822 u32 yA, 1823 u32 partX, 1824 u32 partY, 1825 u32 partWidth, 1826 u32 partHeight) 1827 1828 { 1829 1830 /* Variables */ 1831 1832 u32 xFrac, yFrac, width, height; 1833 i32 xInt, yInt; 1834 u8 *lumaPartData; 1835 1836 /* Code */ 1837 1838 ASSERT(data); 1839 ASSERT(mv); 1840 ASSERT(partWidth); 1841 ASSERT(partHeight); 1842 ASSERT(refPic); 1843 ASSERT(refPic->data); 1844 ASSERT(refPic->width); 1845 ASSERT(refPic->height); 1846 1847 /* luma */ 1848 lumaPartData = data + 16*partY + partX; 1849 1850 xFrac = mv->hor & 0x3; 1851 yFrac = mv->ver & 0x3; 1852 1853 width = 16 * refPic->width; 1854 height = 16 * refPic->height; 1855 1856 xInt = (i32)xA + (i32)partX + (mv->hor >> 2); 1857 yInt = (i32)yA + (i32)partY + (mv->ver >> 2); 1858 1859 ASSERT(lumaFracPos[xFrac][yFrac] < 16); 1860 1861 switch (lumaFracPos[xFrac][yFrac]) 1862 { 1863 case 0: /* G */ 1864 h264bsdFillBlock(refPic->data, lumaPartData, 1865 xInt,yInt,width,height,partWidth,partHeight,16); 1866 break; 1867 case 1: /* d */ 1868 h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, 1869 xInt, yInt-2, width, height, partWidth, partHeight, 0); 1870 break; 1871 case 2: /* h */ 1872 h264bsdInterpolateVerHalf(refPic->data, lumaPartData, 1873 xInt, yInt-2, width, height, partWidth, partHeight); 1874 break; 1875 case 3: /* n */ 1876 h264bsdInterpolateVerQuarter(refPic->data, lumaPartData, 1877 xInt, yInt-2, width, height, partWidth, partHeight, 1); 1878 break; 1879 case 4: /* a */ 1880 h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, 1881 xInt-2, yInt, width, height, partWidth, partHeight, 0); 1882 break; 1883 case 5: /* e */ 1884 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, 1885 xInt-2, yInt-2, width, height, partWidth, partHeight, 0); 1886 break; 1887 case 6: /* i */ 1888 h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, 1889 xInt-2, yInt-2, width, height, partWidth, partHeight, 0); 1890 break; 1891 case 7: /* p */ 1892 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, 1893 xInt-2, yInt-2, width, height, partWidth, partHeight, 2); 1894 break; 1895 case 8: /* b */ 1896 h264bsdInterpolateHorHalf(refPic->data, lumaPartData, 1897 xInt-2, yInt, width, height, partWidth, partHeight); 1898 break; 1899 case 9: /* f */ 1900 h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, 1901 xInt-2, yInt-2, width, height, partWidth, partHeight, 0); 1902 break; 1903 case 10: /* j */ 1904 h264bsdInterpolateMidHalf(refPic->data, lumaPartData, 1905 xInt-2, yInt-2, width, height, partWidth, partHeight); 1906 break; 1907 case 11: /* q */ 1908 h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData, 1909 xInt-2, yInt-2, width, height, partWidth, partHeight, 1); 1910 break; 1911 case 12: /* c */ 1912 h264bsdInterpolateHorQuarter(refPic->data, lumaPartData, 1913 xInt-2, yInt, width, height, partWidth, partHeight, 1); 1914 break; 1915 case 13: /* g */ 1916 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, 1917 xInt-2, yInt-2, width, height, partWidth, partHeight, 1); 1918 break; 1919 case 14: /* k */ 1920 h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData, 1921 xInt-2, yInt-2, width, height, partWidth, partHeight, 1); 1922 break; 1923 default: /* case 15, r */ 1924 h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData, 1925 xInt-2, yInt-2, width, height, partWidth, partHeight, 3); 1926 break; 1927 } 1928 1929 /* chroma */ 1930 PredictChroma( 1931 data + 16*16 + (partY>>1)*8 + (partX>>1), 1932 xA + partX, 1933 yA + partY, 1934 partWidth, 1935 partHeight, 1936 mv, 1937 refPic); 1938 1939 } 1940 1941 #else /* H264DEC_OMXDL */ 1942 /*------------------------------------------------------------------------------ 1943 1944 Function: h264bsdPredictSamples 1945 1946 Functional description: 1947 This function reconstructs a prediction for a macroblock partition. 1948 The prediction is either copied or interpolated using the reference 1949 frame and the motion vector. Both luminance and chrominance parts are 1950 predicted. The prediction is stored in given macroblock array (data). 1951 Inputs: 1952 data pointer to macroblock array (384 bytes) for output 1953 mv pointer to motion vector used for prediction 1954 refPic pointer to reference picture structure 1955 xA x-coordinate for current macroblock 1956 yA y-coordinate for current macroblock 1957 partX x-offset for partition in macroblock 1958 partY y-offset for partition in macroblock 1959 partWidth width of partition 1960 partHeight height of partition 1961 Outputs: 1962 data macroblock array (16x16+8x8+8x8) where predicted 1963 partition is stored at correct position 1964 1965 ------------------------------------------------------------------------------*/ 1966 1967 /*lint -e{550} Symbol 'res' not accessed */ 1968 void h264bsdPredictSamples( 1969 u8 *data, 1970 mv_t *mv, 1971 image_t *refPic, 1972 u32 colAndRow, 1973 u32 part, 1974 u8 *pFill) 1975 1976 { 1977 1978 /* Variables */ 1979 1980 u32 xFrac, yFrac; 1981 u32 width, height; 1982 i32 xInt, yInt, x0, y0; 1983 u8 *partData, *ref; 1984 OMXSize roi; 1985 u32 fillWidth; 1986 u32 fillHeight; 1987 OMXResult res; 1988 u32 xA, yA; 1989 u32 partX, partY; 1990 u32 partWidth, partHeight; 1991 1992 /* Code */ 1993 1994 ASSERT(data); 1995 ASSERT(mv); 1996 ASSERT(refPic); 1997 ASSERT(refPic->data); 1998 ASSERT(refPic->width); 1999 ASSERT(refPic->height); 2000 2001 xA = (colAndRow & 0xFFFF0000) >> 16; 2002 yA = (colAndRow & 0x0000FFFF); 2003 2004 partX = (part & 0xFF000000) >> 24; 2005 partY = (part & 0x00FF0000) >> 16; 2006 partWidth = (part & 0x0000FF00) >> 8; 2007 partHeight = (part & 0x000000FF); 2008 2009 ASSERT(partWidth); 2010 ASSERT(partHeight); 2011 2012 /* luma */ 2013 partData = data + 16*partY + partX; 2014 2015 xFrac = mv->hor & 0x3; 2016 yFrac = mv->ver & 0x3; 2017 2018 width = 16 * refPic->width; 2019 height = 16 * refPic->height; 2020 2021 xInt = (i32)xA + (i32)partX + (mv->hor >> 2); 2022 yInt = (i32)yA + (i32)partY + (mv->ver >> 2); 2023 2024 x0 = (xFrac) ? xInt-2 : xInt; 2025 y0 = (yFrac) ? yInt-2 : yInt; 2026 2027 if (xFrac) 2028 { 2029 if (partWidth == 16) 2030 fillWidth = 32; 2031 else 2032 fillWidth = 16; 2033 } 2034 else 2035 fillWidth = (partWidth*2); 2036 if (yFrac) 2037 fillHeight = partHeight+5; 2038 else 2039 fillHeight = partHeight; 2040 2041 2042 if ((x0 < 0) || ((u32)x0+fillWidth > width) || 2043 (y0 < 0) || ((u32)y0+fillHeight > height)) 2044 { 2045 h264bsdFillBlock(refPic->data, (u8*)pFill, x0, y0, width, height, 2046 fillWidth, fillHeight, fillWidth); 2047 2048 x0 = 0; 2049 y0 = 0; 2050 ref = pFill; 2051 width = fillWidth; 2052 if (yFrac) 2053 ref += 2*width; 2054 if (xFrac) 2055 ref += 2; 2056 } 2057 else 2058 { 2059 /*lint --e(737) Loss of sign */ 2060 ref = refPic->data + yInt*width + xInt; 2061 } 2062 /* Luma interpolation */ 2063 roi.width = (i32)partWidth; 2064 roi.height = (i32)partHeight; 2065 2066 res = omxVCM4P10_InterpolateLuma(ref, (i32)width, partData, 16, 2067 (i32)xFrac, (i32)yFrac, roi); 2068 ASSERT(res == 0); 2069 2070 /* Chroma */ 2071 width = 8 * refPic->width; 2072 height = 8 * refPic->height; 2073 2074 x0 = ((xA + partX) >> 1) + (mv->hor >> 3); 2075 y0 = ((yA + partY) >> 1) + (mv->ver >> 3); 2076 xFrac = mv->hor & 0x7; 2077 yFrac = mv->ver & 0x7; 2078 2079 ref = refPic->data + 256 * refPic->width * refPic->height; 2080 2081 roi.width = (i32)(partWidth >> 1); 2082 fillWidth = ((partWidth >> 1) + 8) & ~0x7; 2083 roi.height = (i32)(partHeight >> 1); 2084 fillHeight = (partHeight >> 1) + 1; 2085 2086 if ((x0 < 0) || ((u32)x0+fillWidth > width) || 2087 (y0 < 0) || ((u32)y0+fillHeight > height)) 2088 { 2089 h264bsdFillBlock(ref, pFill, x0, y0, width, height, 2090 fillWidth, fillHeight, fillWidth); 2091 ref += width * height; 2092 h264bsdFillBlock(ref, pFill + fillWidth*fillHeight, 2093 x0, y0, width, height, fillWidth, 2094 fillHeight, fillWidth); 2095 2096 ref = pFill; 2097 x0 = 0; 2098 y0 = 0; 2099 width = fillWidth; 2100 height = fillHeight; 2101 } 2102 2103 partData = data + 16*16 + (partY>>1)*8 + (partX>>1); 2104 2105 /* Chroma interpolation */ 2106 /*lint --e(737) Loss of sign */ 2107 ref += y0 * width + x0; 2108 res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, 2109 (u32)roi.width, (u32)roi.height, xFrac, yFrac); 2110 ASSERT(res == 0); 2111 partData += 8 * 8; 2112 ref += height * width; 2113 res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8, 2114 (u32)roi.width, (u32)roi.height, xFrac, yFrac); 2115 ASSERT(res == 0); 2116 2117 } 2118 2119 #endif /* H264DEC_OMXDL */ 2120 2121 2122 /*------------------------------------------------------------------------------ 2123 2124 Function: FillRow1 2125 2126 Functional description: 2127 This function gets a row of reference pels in a 'normal' case when no 2128 overfilling is necessary. 2129 2130 ------------------------------------------------------------------------------*/ 2131 2132 static void FillRow1( 2133 u8 *ref, 2134 u8 *fill, 2135 i32 left, 2136 i32 center, 2137 i32 right) 2138 { 2139 2140 ASSERT(ref); 2141 ASSERT(fill); 2142 2143 H264SwDecMemcpy(fill, ref, (u32)center); 2144 2145 /*lint -e(715) */ 2146 } 2147 2148 2149 /*------------------------------------------------------------------------------ 2150 2151 Function: h264bsdFillRow7 2152 2153 Functional description: 2154 This function gets a row of reference pels when horizontal coordinate 2155 is partly negative or partly greater than reference picture width 2156 (overfilling some pels on left and/or right edge). 2157 Inputs: 2158 ref pointer to reference samples 2159 left amount of pixels to overfill on left-edge 2160 center amount of pixels to copy 2161 right amount of pixels to overfill on right-edge 2162 Outputs: 2163 fill pointer where samples are stored 2164 2165 ------------------------------------------------------------------------------*/ 2166 #ifndef H264DEC_NEON 2167 void h264bsdFillRow7( 2168 u8 *ref, 2169 u8 *fill, 2170 i32 left, 2171 i32 center, 2172 i32 right) 2173 { 2174 u8 tmp; 2175 2176 ASSERT(ref); 2177 ASSERT(fill); 2178 2179 if (left) 2180 tmp = *ref; 2181 2182 for ( ; left; left--) 2183 /*lint -esym(644,tmp) tmp is initialized if used */ 2184 *fill++ = tmp; 2185 2186 for ( ; center; center--) 2187 *fill++ = *ref++; 2188 2189 if (right) 2190 tmp = ref[-1]; 2191 2192 for ( ; right; right--) 2193 /*lint -esym(644,tmp) tmp is initialized if used */ 2194 *fill++ = tmp; 2195 } 2196 #endif 2197 /*------------------------------------------------------------------------------ 2198 2199 Function: h264bsdFillBlock 2200 2201 Functional description: 2202 This function gets a block of reference pels. It determines whether 2203 overfilling is needed or not and repeatedly calls an appropriate 2204 function (by using a function pointer) that fills one row the block. 2205 Inputs: 2206 ref pointer to reference frame 2207 x0 x-coordinate for block 2208 y0 y-coordinate for block 2209 width width of reference frame 2210 height height of reference frame 2211 blockWidth width of block 2212 blockHeight height of block 2213 fillScanLength length of a line in output array (pixels) 2214 Outputs: 2215 fill pointer to array where output block is written 2216 2217 ------------------------------------------------------------------------------*/ 2218 2219 void h264bsdFillBlock( 2220 u8 *ref, 2221 u8 *fill, 2222 i32 x0, 2223 i32 y0, 2224 u32 width, 2225 u32 height, 2226 u32 blockWidth, 2227 u32 blockHeight, 2228 u32 fillScanLength) 2229 2230 { 2231 2232 /* Variables */ 2233 2234 i32 xstop, ystop; 2235 void (*fp)(u8*, u8*, i32, i32, i32); 2236 i32 left, x, right; 2237 i32 top, y, bottom; 2238 2239 /* Code */ 2240 2241 ASSERT(ref); 2242 ASSERT(fill); 2243 ASSERT(width); 2244 ASSERT(height); 2245 ASSERT(fill); 2246 ASSERT(blockWidth); 2247 ASSERT(blockHeight); 2248 2249 xstop = x0 + (i32)blockWidth; 2250 ystop = y0 + (i32)blockHeight; 2251 2252 /* Choose correct function whether overfilling on left-edge or right-edge 2253 * is needed or not */ 2254 if (x0 >= 0 && xstop <= (i32)width) 2255 fp = FillRow1; 2256 else 2257 fp = h264bsdFillRow7; 2258 2259 if (ystop < 0) 2260 y0 = -(i32)blockHeight; 2261 2262 if (xstop < 0) 2263 x0 = -(i32)blockWidth; 2264 2265 if (y0 > (i32)height) 2266 y0 = (i32)height; 2267 2268 if (x0 > (i32)width) 2269 x0 = (i32)width; 2270 2271 xstop = x0 + (i32)blockWidth; 2272 ystop = y0 + (i32)blockHeight; 2273 2274 if (x0 > 0) 2275 ref += x0; 2276 2277 if (y0 > 0) 2278 ref += y0 * (i32)width; 2279 2280 left = x0 < 0 ? -x0 : 0; 2281 right = xstop > (i32)width ? xstop - (i32)width : 0; 2282 x = (i32)blockWidth - left - right; 2283 2284 top = y0 < 0 ? -y0 : 0; 2285 bottom = ystop > (i32)height ? ystop - (i32)height : 0; 2286 y = (i32)blockHeight - top - bottom; 2287 2288 /* Top-overfilling */ 2289 for ( ; top; top-- ) 2290 { 2291 (*fp)(ref, fill, left, x, right); 2292 fill += fillScanLength; 2293 } 2294 2295 /* Lines inside reference image */ 2296 for ( ; y; y-- ) 2297 { 2298 (*fp)(ref, fill, left, x, right); 2299 ref += width; 2300 fill += fillScanLength; 2301 } 2302 2303 ref -= width; 2304 2305 /* Bottom-overfilling */ 2306 for ( ; bottom; bottom-- ) 2307 { 2308 (*fp)(ref, fill, left, x, right); 2309 fill += fillScanLength; 2310 } 2311 } 2312 2313 /*lint +e701 +e702 */ 2314 2315 2316