1 /****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 /** 21 ******************************************************************************* 22 * @file 23 * ih264_inter_pred_filters.c 24 * 25 * @brief 26 * Contains function definitions for inter prediction interpolation filters 27 * 28 * @author 29 * Ittiam 30 * 31 * @par List of Functions: 32 * - ih264_inter_pred_luma_copy 33 * - ih264_interleave_copy 34 * - ih264_inter_pred_luma_horz 35 * - ih264_inter_pred_luma_vert 36 * - ih264_inter_pred_luma_horz_hpel_vert_hpel 37 * - ih264_inter_pred_luma_horz_qpel 38 * - ih264_inter_pred_luma_vert_qpel 39 * - ih264_inter_pred_luma_horz_qpel_vert_qpel 40 * - ih264_inter_pred_luma_horz_hpel_vert_qpel 41 * - ih264_inter_pred_luma_horz_qpel_vert_hpel 42 * - ih264_inter_pred_luma_bilinear 43 * - ih264_inter_pred_chroma 44 * 45 * @remarks 46 * None 47 * 48 ******************************************************************************* 49 */ 50 51 /*****************************************************************************/ 52 /* File Includes */ 53 /*****************************************************************************/ 54 55 /* User include files */ 56 #include "ih264_typedefs.h" 57 #include "ih264_macros.h" 58 #include "ih264_platform_macros.h" 59 #include "ih264_inter_pred_filters.h" 60 61 62 /*****************************************************************************/ 63 /* Constant Data variables */ 64 /*****************************************************************************/ 65 66 /* coefficients for 6 tap filtering*/ 67 const WORD32 ih264_g_six_tap[3] ={1,-5,20}; 68 69 70 /*****************************************************************************/ 71 /* Function definitions . */ 72 /*****************************************************************************/ 73 /** 74 ******************************************************************************* 75 * 76 * @brief 77 * Interprediction luma function for copy 78 * 79 * @par Description: 80 * Copies the array of width 'wd' and height 'ht' from the location pointed 81 * by 'src' to the location pointed by 'dst' 82 * 83 * @param[in] pu1_src 84 * UWORD8 pointer to the source 85 * 86 * @param[out] pu1_dst 87 * UWORD8 pointer to the destination 88 * 89 * @param[in] src_strd 90 * integer source stride 91 * 92 * @param[in] dst_strd 93 * integer destination stride 94 * 95 * 96 * @param[in] ht 97 * integer height of the array 98 * 99 * @param[in] wd 100 * integer width of the array 101 * 102 * @returns 103 * 104 * @remarks 105 * None 106 * 107 ******************************************************************************* 108 */ 109 110 void ih264_inter_pred_luma_copy(UWORD8 *pu1_src, 111 UWORD8 *pu1_dst, 112 WORD32 src_strd, 113 WORD32 dst_strd, 114 WORD32 ht, 115 WORD32 wd, 116 UWORD8* pu1_tmp, 117 WORD32 dydx) 118 { 119 WORD32 row, col; 120 UNUSED(pu1_tmp); 121 UNUSED(dydx); 122 for(row = 0; row < ht; row++) 123 { 124 for(col = 0; col < wd; col++) 125 { 126 pu1_dst[col] = pu1_src[col]; 127 } 128 129 pu1_src += src_strd; 130 pu1_dst += dst_strd; 131 } 132 } 133 134 /** 135 ******************************************************************************* 136 * 137 * @brief 138 * Fucntion for copying to an interleaved destination 139 * 140 * @par Description: 141 * Copies the array of width 'wd' and height 'ht' from the location pointed 142 * by 'src' to the location pointed by 'dst' 143 * 144 * @param[in] pu1_src 145 * UWORD8 pointer to the source 146 * 147 * @param[out] pu1_dst 148 * UWORD8 pointer to the destination 149 * 150 * @param[in] src_strd 151 * integer source stride 152 * 153 * @param[in] dst_strd 154 * integer destination stride 155 * 156 * @param[in] ht 157 * integer height of the array 158 * 159 * @param[in] wd 160 * integer width of the array 161 * 162 * @returns 163 * 164 * @remarks 165 * The alternate elements of src will be copied to alternate locations in dsr 166 * Other locations are not touched 167 * 168 ******************************************************************************* 169 */ 170 void ih264_interleave_copy(UWORD8 *pu1_src, 171 UWORD8 *pu1_dst, 172 WORD32 src_strd, 173 WORD32 dst_strd, 174 WORD32 ht, 175 WORD32 wd) 176 { 177 WORD32 row, col; 178 wd *= 2; 179 180 for(row = 0; row < ht; row++) 181 { 182 for(col = 0; col < wd; col+=2) 183 { 184 pu1_dst[col] = pu1_src[col]; 185 } 186 187 pu1_src += src_strd; 188 pu1_dst += dst_strd; 189 } 190 } 191 192 /** 193 ******************************************************************************* 194 * 195 * @brief 196 * Interprediction luma filter for horizontal input 197 * 198 * @par Description: 199 * Applies a 6 tap horizontal filter .The output is clipped to 8 bits 200 * sec 8.4.2.2.1 titled "Luma sample interpolation process" 201 * 202 * @param[in] pu1_src 203 * UWORD8 pointer to the source 204 * 205 * @param[out] pu1_dst 206 * UWORD8 pointer to the destination 207 * 208 * @param[in] src_strd 209 * integer source stride 210 * 211 * @param[in] dst_strd 212 * integer destination stride 213 * 214 * @param[in] ht 215 * integer height of the array 216 * 217 * @param[in] wd 218 * integer width of the array 219 * 220 * @returns 221 * 222 * @remarks 223 * None 224 * 225 ******************************************************************************* 226 */ 227 void ih264_inter_pred_luma_horz(UWORD8 *pu1_src, 228 UWORD8 *pu1_dst, 229 WORD32 src_strd, 230 WORD32 dst_strd, 231 WORD32 ht, 232 WORD32 wd, 233 UWORD8* pu1_tmp, 234 WORD32 dydx) 235 { 236 WORD32 row, col; 237 WORD16 i2_tmp; 238 UNUSED(pu1_tmp); 239 UNUSED(dydx); 240 241 for(row = 0; row < ht; row++) 242 { 243 for(col = 0; col < wd; col++) 244 { 245 i2_tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ 246 i2_tmp = ih264_g_six_tap[0] * 247 (pu1_src[col - 2] + pu1_src[col + 3]) 248 + ih264_g_six_tap[1] * 249 (pu1_src[col - 1] + pu1_src[col + 2]) 250 + ih264_g_six_tap[2] * 251 (pu1_src[col] + pu1_src[col + 1]); 252 i2_tmp = (i2_tmp + 16) >> 5; 253 pu1_dst[col] = CLIP_U8(i2_tmp); 254 } 255 256 pu1_src += src_strd; 257 pu1_dst += dst_strd; 258 } 259 260 } 261 262 /** 263 ******************************************************************************* 264 * 265 * @brief 266 * Interprediction luma filter for vertical input 267 * 268 * @par Description: 269 * Applies a 6 tap vertical filter.The output is clipped to 8 bits 270 * sec 8.4.2.2.1 titled "Luma sample interpolation process" 271 * 272 * @param[in] pu1_src 273 * UWORD8 pointer to the source 274 * 275 * @param[out] pu1_dst 276 * UWORD8 pointer to the destination 277 * 278 * @param[in] src_strd 279 * integer source stride 280 * 281 * @param[in] dst_strd 282 * integer destination stride 283 * 284 * @param[in] ht 285 * integer height of the array 286 * 287 * @param[in] wd 288 * integer width of the array 289 * 290 * @returns 291 * 292 * @remarks 293 * None 294 * 295 ******************************************************************************* 296 */ 297 void ih264_inter_pred_luma_vert(UWORD8 *pu1_src, 298 UWORD8 *pu1_dst, 299 WORD32 src_strd, 300 WORD32 dst_strd, 301 WORD32 ht, 302 WORD32 wd, 303 UWORD8* pu1_tmp, 304 WORD32 dydx) 305 { 306 WORD32 row, col; 307 WORD16 i2_tmp; 308 UNUSED(pu1_tmp); 309 UNUSED(dydx); 310 311 for(row = 0; row < ht; row++) 312 { 313 for(col = 0; col < wd; col++) 314 { 315 i2_tmp = 0; /*ih264_g_six_tap[] is the array containing the filter coeffs*/ 316 i2_tmp = ih264_g_six_tap[0] * 317 (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) 318 + ih264_g_six_tap[1] * 319 (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) 320 + ih264_g_six_tap[2] * 321 (pu1_src[col] + pu1_src[col + 1 * src_strd]); 322 i2_tmp = (i2_tmp + 16) >> 5; 323 pu1_dst[col] = CLIP_U8(i2_tmp); 324 } 325 pu1_src += src_strd; 326 pu1_dst += dst_strd; 327 } 328 } 329 330 /*! 331 ************************************************************************** 332 * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_hpel \endif 333 * 334 * \brief 335 * This function implements a two stage cascaded six tap filter. It 336 * applies the six tap filter in the horizontal direction on the 337 * predictor values, followed by applying the same filter in the 338 * vertical direction on the output of the first stage. The six tap 339 * filtering operation is described in sec 8.4.2.2.1 titled "Luma sample 340 * interpolation process" 341 * 342 * \param pu1_src: Pointer to the buffer containing the predictor values. 343 * pu1_src could point to the frame buffer or the predictor buffer. 344 * \param pu1_dst: Pointer to the destination buffer where the output of 345 * the six tap filter is stored. 346 * \param ht: Height of the rectangular pixel grid to be interpolated 347 * \param wd: Width of the rectangular pixel grid to be interpolated 348 * \param src_strd: Width of the buffer pointed to by pu1_src. 349 * \param dst_strd: Width of the destination buffer 350 * \param pu1_tmp: temporary buffer. 351 * \param dydx: x and y reference offset for qpel calculations: UNUSED in this function. 352 * 353 * \return 354 * None. 355 * 356 * \note 357 * This function takes the 8 bit predictor values, applies the six tap 358 * filter in the horizontal direction and outputs the result clipped to 359 * 8 bit precision. The input is stored in the buffer pointed to by 360 * pu1_src while the output is stored in the buffer pointed by pu1_dst. 361 * Both pu1_src and pu1_dst could point to the same buffer i.e. the 362 * six tap filter could be done in place. 363 * 364 ************************************************************************** 365 */ 366 void ih264_inter_pred_luma_horz_hpel_vert_hpel(UWORD8 *pu1_src, 367 UWORD8 *pu1_dst, 368 WORD32 src_strd, 369 WORD32 dst_strd, 370 WORD32 ht, 371 WORD32 wd, 372 UWORD8* pu1_tmp, 373 WORD32 dydx) 374 { 375 WORD32 row, col; 376 WORD32 tmp; 377 WORD16* pi2_pred1_temp; 378 WORD16* pi2_pred1; 379 UNUSED(dydx); 380 pi2_pred1_temp = (WORD16*)pu1_tmp; 381 pi2_pred1_temp += 2; 382 pi2_pred1 = pi2_pred1_temp; 383 for(row = 0; row < ht; row++) 384 { 385 for(col = -2; col < wd + 3; col++) 386 { 387 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ 388 tmp = ih264_g_six_tap[0] * 389 (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) 390 + ih264_g_six_tap[1] * 391 (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) 392 + ih264_g_six_tap[2] * 393 (pu1_src[col] + pu1_src[col + 1 * src_strd]); 394 pi2_pred1_temp[col] = tmp; 395 } 396 pu1_src += src_strd; 397 pi2_pred1_temp = pi2_pred1_temp + wd + 5; 398 } 399 400 for(row = 0; row < ht; row++) 401 { 402 for(col = 0; col < wd; col++) 403 { 404 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ 405 tmp = ih264_g_six_tap[0] * 406 (pi2_pred1[col - 2] + pi2_pred1[col + 3]) 407 + ih264_g_six_tap[1] * 408 (pi2_pred1[col - 1] + pi2_pred1[col + 2]) 409 + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1]); 410 tmp = (tmp + 512) >> 10; 411 pu1_dst[col] = CLIP_U8(tmp); 412 } 413 pi2_pred1 += (wd + 5); 414 pu1_dst += dst_strd; 415 } 416 } 417 418 /*! 419 ************************************************************************** 420 * \if Function name : ih264_inter_pred_luma_horz_qpel \endif 421 * 422 * \brief 423 * This routine applies the six tap filter to the predictors in the 424 * horizontal direction. The six tap filtering operation is described in 425 * sec 8.4.2.2.1 titled "Luma sample interpolation process" 426 * 427 * \param pu1_src: Pointer to the buffer containing the predictor values. 428 * pu1_src could point to the frame buffer or the predictor buffer. 429 * \param pu1_dst: Pointer to the destination buffer where the output of 430 * the six tap filter is stored. 431 * \param ht: Height of the rectangular pixel grid to be interpolated 432 * \param wd: Width of the rectangular pixel grid to be interpolated 433 * \param src_strd: Width of the buffer pointed to by pu1_src. 434 * \param dst_strd: Width of the destination buffer 435 * \param pu1_tmp: temporary buffer: UNUSED in this function 436 * \param dydx: x and y reference offset for qpel calculations. 437 * 438 * \return 439 * None. 440 * 441 * \note 442 * This function takes the 8 bit predictor values, applies the six tap 443 * filter in the horizontal direction and outputs the result clipped to 444 * 8 bit precision. The input is stored in the buffer pointed to by 445 * pu1_src while the output is stored in the buffer pointed by pu1_dst. 446 * Both pu1_src and pu1_dst could point to the same buffer i.e. the 447 * six tap filter could be done in place. 448 * 449 ************************************************************************** 450 */ 451 void ih264_inter_pred_luma_horz_qpel(UWORD8 *pu1_src, 452 UWORD8 *pu1_dst, 453 WORD32 src_strd, 454 WORD32 dst_strd, 455 WORD32 ht, 456 WORD32 wd, 457 UWORD8* pu1_tmp, 458 WORD32 dydx) 459 { 460 WORD32 row, col; 461 UWORD8 *pu1_pred1; 462 WORD32 x_offset = dydx & 0x3; 463 UNUSED(pu1_tmp); 464 pu1_pred1 = pu1_src + (x_offset >> 1); 465 466 for(row = 0; row < ht; row++) 467 { 468 for(col = 0; col < wd; col++, pu1_src++, pu1_dst++) 469 { 470 WORD16 i2_temp; 471 /* The logic below implements the following equation 472 i2_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) + 473 20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */ 474 i2_temp = pu1_src[-2] + pu1_src[3] 475 - (pu1_src[-1] + pu1_src[2]) 476 + ((pu1_src[0] + pu1_src[1] - pu1_src[-1] - pu1_src[2]) << 2) 477 + ((pu1_src[0] + pu1_src[1]) << 4); 478 i2_temp = (i2_temp + 16) >> 5; 479 i2_temp = CLIP_U8(i2_temp); 480 *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1; 481 482 pu1_pred1++; 483 } 484 pu1_dst += dst_strd - wd; 485 pu1_src += src_strd - wd; 486 pu1_pred1 += src_strd - wd; 487 } 488 } 489 490 /*! 491 ************************************************************************** 492 * \if Function name : ih264_inter_pred_luma_vert_qpel \endif 493 * 494 * \brief 495 * This routine applies the six tap filter to the predictors in the 496 * vertical direction and interpolates them to obtain pixels at quarter vertical 497 * positions (0, 1/4) and (0, 3/4). The six tap filtering operation is 498 * described in sec 8.4.2.2.1 titled "Luma sample interpolation process" 499 * 500 * \param pu1_src: Pointer to the buffer containing the predictor values. 501 * pu1_src could point to the frame buffer or the predictor buffer. 502 * \param pu1_dst: Pointer to the destination buffer where the output of 503 * the six tap filter is stored. 504 * \param ht: Height of the rectangular pixel grid to be interpolated 505 * \param wd: Width of the rectangular pixel grid to be interpolated 506 * \param src_strd: Width of the buffer pointed to by puc_pred. 507 * \param dst_strd: Width of the destination buffer 508 * \param pu1_tmp: temporary buffer: UNUSED in this function 509 * \param dydx: x and y reference offset for qpel calculations. 510 * 511 * \return 512 * void 513 * 514 * \note 515 * This function takes the 8 bit predictor values, applies the six tap 516 * filter in the vertical direction and outputs the result clipped to 517 * 8 bit precision. The input is stored in the buffer pointed to by 518 * puc_pred while the output is stored in the buffer pointed by puc_dest. 519 * Both puc_pred and puc_dest could point to the same buffer i.e. the 520 * six tap filter could be done in place. 521 * 522 * \para <title> 523 * <paragraph> 524 * ... 525 ************************************************************************** 526 */ 527 void ih264_inter_pred_luma_vert_qpel(UWORD8 *pu1_src, 528 UWORD8 *pu1_dst, 529 WORD32 src_strd, 530 WORD32 dst_strd, 531 WORD32 ht, 532 WORD32 wd, 533 UWORD8* pu1_tmp, 534 WORD32 dydx) 535 { 536 WORD32 row, col; 537 WORD32 y_offset = dydx >> 2; 538 WORD32 off1, off2, off3; 539 UWORD8 *pu1_pred1; 540 UNUSED(pu1_tmp); 541 y_offset = y_offset & 0x3; 542 543 off1 = src_strd; 544 off2 = src_strd << 1; 545 off3 = off1 + off2; 546 547 pu1_pred1 = pu1_src + (y_offset >> 1) * src_strd; 548 549 for(row = 0; row < ht; row++) 550 { 551 for(col = 0; col < wd; col++, pu1_dst++, pu1_src++, pu1_pred1++) 552 { 553 WORD16 i2_temp; 554 /* The logic below implements the following equation 555 i16_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] - 556 5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) + 557 20 * (puc_pred[0] + puc_pred[src_strd]); */ 558 i2_temp = pu1_src[-off2] + pu1_src[off3] 559 - (pu1_src[-off1] + pu1_src[off2]) 560 + ((pu1_src[0] + pu1_src[off1] - pu1_src[-off1] - pu1_src[off2]) << 2) 561 + ((pu1_src[0] + pu1_src[off1]) << 4); 562 i2_temp = (i2_temp + 16) >> 5; 563 i2_temp = CLIP_U8(i2_temp); 564 565 *pu1_dst = (i2_temp + *pu1_pred1 + 1) >> 1; 566 } 567 pu1_src += src_strd - wd; 568 pu1_pred1 += src_strd - wd; 569 pu1_dst += dst_strd - wd; 570 } 571 } 572 573 /*! 574 ************************************************************************** 575 * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_qpel \endif 576 * 577 * \brief 578 * This routine applies the six tap filter to the predictors in the 579 * vertical and horizontal direction and averages them to get pixels at locations 580 * (1/4,1/4), (1/4, 3/4), (3/4, 1/4) & (3/4, 3/4). The six tap filtering operation 581 * is described in sec 8.4.2.2.1 titled "Luma sample interpolation process" 582 * 583 * \param pu1_src: Pointer to the buffer containing the predictor values. 584 * pu1_src could point to the frame buffer or the predictor buffer. 585 * \param pu1_dst: Pointer to the destination buffer where the output of 586 * the six tap filter is stored. 587 * \param wd: Width of the rectangular pixel grid to be interpolated 588 * \param ht: Height of the rectangular pixel grid to be interpolated 589 * \param src_strd: Width of the buffer pointed to by puc_pred. 590 * \param dst_strd: Width of the destination buffer 591 * \param pu1_tmp: temporary buffer, UNUSED in this function 592 * \param dydx: x and y reference offset for qpel calculations. 593 * 594 * \return 595 * void 596 * 597 * \note 598 * This function takes the 8 bit predictor values, applies the six tap 599 * filter in the vertical direction and outputs the result clipped to 600 * 8 bit precision. The input is stored in the buffer pointed to by 601 * puc_pred while the output is stored in the buffer pointed by puc_dest. 602 * Both puc_pred and puc_dest could point to the same buffer i.e. the 603 * six tap filter could be done in place. 604 * 605 * \para <title> 606 * <paragraph> 607 * ... 608 ************************************************************************** 609 */ 610 void ih264_inter_pred_luma_horz_qpel_vert_qpel(UWORD8 *pu1_src, 611 UWORD8 *pu1_dst, 612 WORD32 src_strd, 613 WORD32 dst_strd, 614 WORD32 ht, 615 WORD32 wd, 616 UWORD8* pu1_tmp, 617 WORD32 dydx) 618 { 619 WORD32 row, col; 620 WORD32 x_offset = dydx & 0x3; 621 WORD32 y_offset = dydx >> 2; 622 623 WORD32 off1, off2, off3; 624 UWORD8* pu1_pred_vert, *pu1_pred_horz; 625 UNUSED(pu1_tmp); 626 y_offset = y_offset & 0x3; 627 628 off1 = src_strd; 629 off2 = src_strd << 1; 630 off3 = off1 + off2; 631 632 pu1_pred_horz = pu1_src + (y_offset >> 1) * src_strd; 633 pu1_pred_vert = pu1_src + (x_offset >> 1); 634 635 for(row = 0; row < ht; row++) 636 { 637 for(col = 0; col < wd; 638 col++, pu1_dst++, pu1_pred_vert++, pu1_pred_horz++) 639 { 640 WORD16 i2_temp_vert, i2_temp_horz; 641 /* The logic below implements the following equation 642 i2_temp = puc_pred[-2*src_strd] + puc_pred[3*src_strd] - 643 5 * (puc_pred[-1*src_strd] + puc_pred[2*src_strd]) + 644 20 * (puc_pred[0] + puc_pred[src_strd]); */ 645 i2_temp_vert = pu1_pred_vert[-off2] + pu1_pred_vert[off3] 646 - (pu1_pred_vert[-off1] + pu1_pred_vert[off2]) 647 + ((pu1_pred_vert[0] + pu1_pred_vert[off1] 648 - pu1_pred_vert[-off1] 649 - pu1_pred_vert[off2]) << 2) 650 + ((pu1_pred_vert[0] + pu1_pred_vert[off1]) << 4); 651 i2_temp_vert = (i2_temp_vert + 16) >> 5; 652 i2_temp_vert = CLIP_U8(i2_temp_vert); 653 654 /* The logic below implements the following equation 655 i16_temp = puc_pred[-2] - 5 * (puc_pred[-1] + puc_pred[2]) + 656 20 * (puc_pred[0] + puc_pred[1]) + puc_pred[3]; */ 657 i2_temp_horz = pu1_pred_horz[-2] + pu1_pred_horz[3] 658 - (pu1_pred_horz[-1] + pu1_pred_horz[2]) 659 + ((pu1_pred_horz[0] + pu1_pred_horz[1] 660 - pu1_pred_horz[-1] 661 - pu1_pred_horz[2]) << 2) 662 + ((pu1_pred_horz[0] + pu1_pred_horz[1]) << 4); 663 i2_temp_horz = (i2_temp_horz + 16) >> 5; 664 i2_temp_horz = CLIP_U8(i2_temp_horz); 665 *pu1_dst = (i2_temp_vert + i2_temp_horz + 1) >> 1; 666 } 667 pu1_pred_vert += (src_strd - wd); 668 pu1_pred_horz += (src_strd - wd); 669 pu1_dst += (dst_strd - wd); 670 } 671 } 672 673 /*! 674 ************************************************************************** 675 * \if Function name : ih264_inter_pred_luma_horz_qpel_vert_hpel \endif 676 * 677 * \brief 678 * This routine applies the six tap filter to the predictors in the vertical 679 * and horizontal direction to obtain the pixel at (1/2,1/2). It then interpolates 680 * pixel at (0,1/2) and (1/2,1/2) to obtain pixel at (1/4,1/2). Similarly for (3/4,1/2). 681 * The six tap filtering operation is described in sec 8.4.2.2.1 titled 682 * "Luma sample interpolation process" 683 * 684 * \param pu1_src: Pointer to the buffer containing the predictor values. 685 * pu1_src could point to the frame buffer or the predictor buffer. 686 * \param pu1_dst: Pointer to the destination buffer where the output of 687 * the six tap filter followed by interpolation is stored. 688 * \param wd: Width of the rectangular pixel grid to be interpolated 689 * \param ht: Height of the rectangular pixel grid to be interpolated 690 * \param src_strd: Width of the buffer pointed to by puc_pred. 691 * \param dst_strd: Width of the destination buffer 692 * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter. 693 * \param dydx: x and y reference offset for qpel calculations. 694 * 695 * \return 696 * void 697 * 698 * \note 699 * This function takes the 8 bit predictor values, applies the six tap 700 * filter in the vertical direction and outputs the result clipped to 701 * 8 bit precision. The input is stored in the buffer pointed to by 702 * puc_pred while the output is stored in the buffer pointed by puc_dest. 703 * Both puc_pred and puc_dest could point to the same buffer i.e. the 704 * six tap filter could be done in place. 705 * 706 * \para <title> 707 * <paragraph> 708 * ... 709 ************************************************************************** 710 */ 711 void ih264_inter_pred_luma_horz_qpel_vert_hpel(UWORD8 *pu1_src, 712 UWORD8 *pu1_dst, 713 WORD32 src_strd, 714 WORD32 dst_strd, 715 WORD32 ht, 716 WORD32 wd, 717 UWORD8* pu1_tmp, 718 WORD32 dydx) 719 { 720 WORD32 row, col; 721 WORD32 tmp; 722 WORD16* pi2_pred1_temp, *pi2_pred1; 723 UWORD8* pu1_dst_tmp; 724 WORD32 x_offset = dydx & 0x3; 725 WORD16 i2_macro; 726 727 pi2_pred1_temp = (WORD16*)pu1_tmp; 728 pi2_pred1_temp += 2; 729 pi2_pred1 = pi2_pred1_temp; 730 pu1_dst_tmp = pu1_dst; 731 732 for(row = 0; row < ht; row++) 733 { 734 for(col = -2; col < wd + 3; col++) 735 { 736 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ 737 tmp = ih264_g_six_tap[0] * 738 (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) 739 + ih264_g_six_tap[1] * 740 (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) 741 + ih264_g_six_tap[2] * 742 (pu1_src[col] + pu1_src[col + 1 * src_strd]); 743 pi2_pred1_temp[col] = tmp; 744 } 745 746 pu1_src += src_strd; 747 pi2_pred1_temp = pi2_pred1_temp + wd + 5; 748 } 749 750 pi2_pred1_temp = pi2_pred1; 751 for(row = 0; row < ht; row++) 752 { 753 for(col = 0; col < wd; col++) 754 { 755 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ 756 tmp = ih264_g_six_tap[0] * 757 (pi2_pred1[col - 2] + pi2_pred1[col + 3]) 758 + ih264_g_six_tap[1] * 759 (pi2_pred1[col - 1] + pi2_pred1[col + 2]) 760 + ih264_g_six_tap[2] * 761 (pi2_pred1[col] + pi2_pred1[col + 1]); 762 tmp = (tmp + 512) >> 10; 763 pu1_dst[col] = CLIP_U8(tmp); 764 } 765 pi2_pred1 += (wd + 5); 766 pu1_dst += dst_strd; 767 } 768 769 pu1_dst = pu1_dst_tmp; 770 pi2_pred1_temp += (x_offset >> 1); 771 for(row = ht; row != 0; row--) 772 { 773 for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++) 774 { 775 UWORD8 uc_temp; 776 /* Clipping the output of the six tap filter obtained from the 777 first stage of the 2d filter stage */ 778 *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5; 779 i2_macro = (*pi2_pred1_temp); 780 uc_temp = CLIP_U8(i2_macro); 781 *pu1_dst = (*pu1_dst + uc_temp + 1) >> 1; 782 } 783 pi2_pred1_temp += 5; 784 pu1_dst += dst_strd - wd; 785 } 786 } 787 788 /*! 789 ************************************************************************** 790 * \if Function name : ih264_inter_pred_luma_horz_hpel_vert_qpel \endif 791 * 792 * \brief 793 * This routine applies the six tap filter to the predictors in the horizontal 794 * and vertical direction to obtain the pixel at (1/2,1/2). It then interpolates 795 * pixel at (1/2,0) and (1/2,1/2) to obtain pixel at (1/2,1/4). Similarly for (1/2,3/4). 796 * The six tap filtering operation is described in sec 8.4.2.2.1 titled 797 * "Luma sample interpolation process" 798 * 799 * \param pu1_src: Pointer to the buffer containing the predictor values. 800 * pu1_src could point to the frame buffer or the predictor buffer. 801 * \param pu1_dst: Pointer to the destination buffer where the output of 802 * the six tap filter followed by interpolation is stored. 803 * \param wd: Width of the rectangular pixel grid to be interpolated 804 * \param ht: Height of the rectangular pixel grid to be interpolated 805 * \param src_strd: Width of the buffer pointed to by puc_pred. 806 * \param dst_strd: Width of the destination buffer 807 * \param pu1_tmp: buffer to store temporary output after 1st 6-tap filter. 808 * \param dydx: x and y reference offset for qpel calculations. 809 * 810 * \return 811 * void 812 * 813 * \note 814 * This function takes the 8 bit predictor values, applies the six tap 815 * filter in the vertical direction and outputs the result clipped to 816 * 8 bit precision. The input is stored in the buffer pointed to by 817 * puc_pred while the output is stored in the buffer pointed by puc_dest. 818 * Both puc_pred and puc_dest could point to the same buffer i.e. the 819 * six tap filter could be done in place. 820 * 821 * \para <title> 822 * <paragraph> 823 * ... 824 ************************************************************************** 825 */ 826 void ih264_inter_pred_luma_horz_hpel_vert_qpel(UWORD8 *pu1_src, 827 UWORD8 *pu1_dst, 828 WORD32 src_strd, 829 WORD32 dst_strd, 830 WORD32 ht, 831 WORD32 wd, 832 UWORD8* pu1_tmp, 833 WORD32 dydx) 834 { 835 836 WORD32 row, col; 837 WORD32 tmp; 838 WORD32 y_offset = dydx >> 2; 839 WORD16* pi2_pred1_temp, *pi2_pred1; 840 UWORD8* pu1_dst_tmp; 841 //WORD32 x_offset = dydx & 0x3; 842 WORD16 i2_macro; 843 844 y_offset = y_offset & 0x3; 845 846 pi2_pred1_temp = (WORD16*)pu1_tmp; 847 pi2_pred1_temp += 2 * wd; 848 pi2_pred1 = pi2_pred1_temp; 849 pu1_dst_tmp = pu1_dst; 850 pu1_src -= 2 * src_strd; 851 for(row = -2; row < ht + 3; row++) 852 { 853 for(col = 0; col < wd; col++) 854 { 855 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ 856 tmp = ih264_g_six_tap[0] * (pu1_src[col - 2] + pu1_src[col + 3]) 857 + ih264_g_six_tap[1] * (pu1_src[col - 1] + pu1_src[col + 2]) 858 + ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1]); 859 pi2_pred1_temp[col - 2 * wd] = tmp; 860 } 861 862 pu1_src += src_strd; 863 pi2_pred1_temp += wd; 864 } 865 pi2_pred1_temp = pi2_pred1; 866 for(row = 0; row < ht; row++) 867 { 868 for(col = 0; col < wd; col++) 869 { 870 tmp = 0;/*ih264_g_six_tap[] is the array containing the filter coeffs*/ 871 tmp = ih264_g_six_tap[0] * (pi2_pred1[col - 2 * wd] + pi2_pred1[col + 3 * wd]) 872 + ih264_g_six_tap[1] * (pi2_pred1[col - 1 * wd] + pi2_pred1[col + 2 * wd]) 873 + ih264_g_six_tap[2] * (pi2_pred1[col] + pi2_pred1[col + 1 * wd]); 874 tmp = (tmp + 512) >> 10; 875 pu1_dst[col] = CLIP_U8(tmp); 876 } 877 pi2_pred1 += wd; 878 pu1_dst += dst_strd; 879 } 880 pu1_dst = pu1_dst_tmp; 881 pi2_pred1_temp += (y_offset >> 1) * wd; 882 for(row = ht; row != 0; row--) 883 884 { 885 for(col = wd; col != 0; col--, pu1_dst++, pi2_pred1_temp++) 886 { 887 UWORD8 u1_temp; 888 /* Clipping the output of the six tap filter obtained from the 889 first stage of the 2d filter stage */ 890 *pi2_pred1_temp = (*pi2_pred1_temp + 16) >> 5; 891 i2_macro = (*pi2_pred1_temp); 892 u1_temp = CLIP_U8(i2_macro); 893 *pu1_dst = (*pu1_dst + u1_temp + 1) >> 1; 894 } 895 //pi16_pred1_temp += wd; 896 pu1_dst += dst_strd - wd; 897 } 898 } 899 900 /** 901 ******************************************************************************* 902 * function:ih264_inter_pred_luma_bilinear 903 * 904 * @brief 905 * This routine applies the bilinear filter to the predictors . 906 * The filtering operation is described in 907 * sec 8.4.2.2.1 titled "Luma sample interpolation process" 908 * 909 * @par Description: 910 \note 911 * This function is called to obtain pixels lying at the following 912 * locations (1/4,1), (3/4,1),(1,1/4), (1,3/4) ,(1/4,1/2), (3/4,1/2),(1/2,1/4), (1/2,3/4),(3/4,1/4),(1/4,3/4),(3/4,3/4)&& (1/4,1/4) . 913 * The function averages the two adjacent values from the two input arrays in horizontal direction. 914 * 915 * 916 * @param[in] pu1_src1: 917 * UWORD8 Pointer to the buffer containing the first input array. 918 * 919 * @param[in] pu1_src2: 920 * UWORD8 Pointer to the buffer containing the second input array. 921 * 922 * @param[out] pu1_dst 923 * UWORD8 pointer to the destination where the output of bilinear filter is stored. 924 * 925 * @param[in] src_strd1 926 * Stride of the first input buffer 927 * 928 * @param[in] src_strd2 929 * Stride of the second input buffer 930 * 931 * @param[in] dst_strd 932 * integer destination stride of pu1_dst 933 * 934 * @param[in] ht 935 * integer height of the array 936 * 937 * @param[in] wd 938 * integer width of the array 939 * 940 * @returns 941 * 942 * @remarks 943 * None 944 * 945 ******************************************************************************* 946 */ 947 void ih264_inter_pred_luma_bilinear(UWORD8 *pu1_src1, 948 UWORD8 *pu1_src2, 949 UWORD8 *pu1_dst, 950 WORD32 src_strd1, 951 WORD32 src_strd2, 952 WORD32 dst_strd, 953 WORD32 ht, 954 WORD32 wd) 955 { 956 WORD32 row, col; 957 WORD16 i2_tmp; 958 959 for(row = 0; row < ht; row++) 960 { 961 for(col = 0; col < wd; col++) 962 { 963 i2_tmp = pu1_src1[col] + pu1_src2[col]; 964 i2_tmp = (i2_tmp + 1) >> 1; 965 pu1_dst[col] = CLIP_U8(i2_tmp); 966 } 967 pu1_src1 += src_strd1; 968 pu1_src2 += src_strd2; 969 pu1_dst += dst_strd; 970 } 971 972 } 973 974 /** 975 ******************************************************************************* 976 * 977 * @brief 978 * Interprediction chroma filter 979 * 980 * @par Description: 981 * Applies filtering to chroma samples as mentioned in 982 * sec 8.4.2.2.2 titled "chroma sample interpolation process" 983 * 984 * @param[in] pu1_src 985 * UWORD8 pointer to the source containing alternate U and V samples 986 * 987 * @param[out] pu1_dst 988 * UWORD8 pointer to the destination 989 * 990 * @param[in] src_strd 991 * integer source stride 992 * 993 * @param[in] dst_strd 994 * integer destination stride 995 * 996 * @param[in] u1_dx 997 * dx value where the sample is to be produced(refer sec 8.4.2.2.2 ) 998 * 999 * @param[in] u1_dy 1000 * dy value where the sample is to be produced(refer sec 8.4.2.2.2 ) 1001 * 1002 * @param[in] ht 1003 * integer height of the array 1004 * 1005 * @param[in] wd 1006 * integer width of the array 1007 * 1008 * @returns 1009 * 1010 * @remarks 1011 * None 1012 * 1013 ******************************************************************************* 1014 */ 1015 void ih264_inter_pred_chroma(UWORD8 *pu1_src, 1016 UWORD8 *pu1_dst, 1017 WORD32 src_strd, 1018 WORD32 dst_strd, 1019 WORD32 dx, 1020 WORD32 dy, 1021 WORD32 ht, 1022 WORD32 wd) 1023 { 1024 WORD32 row, col; 1025 WORD16 i2_tmp; 1026 1027 for(row = 0; row < ht; row++) 1028 { 1029 for(col = 0; col < 2 * wd; col++) 1030 { 1031 i2_tmp = 0; /* applies equation (8-266) in section 8.4.2.2.2 */ 1032 i2_tmp = (8 - dx) * (8 - dy) * pu1_src[col] 1033 + (dx) * (8 - dy) * pu1_src[col + 2] 1034 + (8 - dx) * (dy) * (pu1_src + src_strd)[col] 1035 + (dx) * (dy) * (pu1_src + src_strd)[col + 2]; 1036 i2_tmp = (i2_tmp + 32) >> 6; 1037 pu1_dst[col] = CLIP_U8(i2_tmp); 1038 } 1039 pu1_src += src_strd; 1040 pu1_dst += dst_strd; 1041 } 1042 } 1043