1 //****************************************************************************** 2 //* 3 //* Copyright (C) 2015 The Android Open Source Project 4 //* 5 //* Licensed under the Apache License, Version 2.0 (the "License"); 6 //* you may not use this file except in compliance with the License. 7 //* You may obtain a copy of the License at: 8 //* 9 //* http://www.apache.org/licenses/LICENSE-2.0 10 //* 11 //* Unless required by applicable law or agreed to in writing, software 12 //* distributed under the License is distributed on an "AS IS" BASIS, 13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 //* See the License for the specific language governing permissions and 15 //* limitations under the License. 16 //* 17 //***************************************************************************** 18 //* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 //*/ 20 ///** 21 //****************************************************************************** 22 //* @file 23 //* ih264_intra_pred_luma_4x4_av8.s 24 //* 25 //* @brief 26 //* Contains function definitions for intra 4x4 Luma prediction . 27 //* 28 //* @author 29 //* Ittiam 30 //* 31 //* @par List of Functions: 32 //* 33 //* -ih264_intra_pred_luma_4x4_mode_vert_av8 34 //* -ih264_intra_pred_luma_4x4_mode_horz_av8 35 //* -ih264_intra_pred_luma_4x4_mode_dc_av8 36 //* -ih264_intra_pred_luma_4x4_mode_diag_dl_av8 37 //* -ih264_intra_pred_luma_4x4_mode_diag_dr_av8 38 //* -ih264_intra_pred_luma_4x4_mode_vert_r_av8 39 //* -ih264_intra_pred_luma_4x4_mode_horz_d_av8 40 //* -ih264_intra_pred_luma_4x4_mode_vert_l_av8 41 //* -ih264_intra_pred_luma_4x4_mode_horz_u_av8 42 //* 43 //* @remarks 44 //* None 45 //* 46 //******************************************************************************* 47 //*/ 48 49 ///* All the functions here are replicated from ih264_intra_pred_filters.c 50 // 51 52 ///** 53 ///** 54 ///** 55 // 56 57 .text 58 .p2align 2 59 .include "ih264_neon_macros.s" 60 61 62 63 64 ///** 65 //******************************************************************************* 66 //* 67 //*ih264_intra_pred_luma_4x4_mode_vert 68 //* 69 //* @brief 70 //* Perform Intra prediction for luma_4x4 mode:vertical 71 //* 72 //* @par Description: 73 //* Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1 74 //* 75 //* @param[in] pu1_src 76 //* UWORD8 pointer to the source 77 //* 78 //* @param[out] pu1_dst 79 //* UWORD8 pointer to the destination 80 //* 81 //* @param[in] src_strd 82 //* integer source stride 83 //* 84 //* @param[in] dst_strd 85 //* integer destination stride 86 //* 87 //* @param[in] ui_neighboravailability 88 //* availability of neighbouring pixels(Not used in this function) 89 //* 90 //* @returns 91 //* 92 //* @remarks 93 //* None 94 //* 95 //******************************************************************************* 96 //void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src, 97 // UWORD8 *pu1_dst, 98 // WORD32 src_strd, 99 // WORD32 dst_strd, 100 // WORD32 ui_neighboravailability) 101 102 //**************Variables Vs Registers***************************************** 103 // x0 => *pu1_src 104 // x1 => *pu1_dst 105 // x2 => src_strd 106 // x3 => dst_strd 107 // x4 => ui_neighboravailability 108 109 .global ih264_intra_pred_luma_4x4_mode_vert_av8 110 111 ih264_intra_pred_luma_4x4_mode_vert_av8: 112 113 push_v_regs 114 115 add x0, x0, #5 116 117 ld1 {v0.s}[0], [x0] 118 st1 {v0.s}[0], [x1], x3 119 st1 {v0.s}[0], [x1], x3 120 st1 {v0.s}[0], [x1], x3 121 st1 {v0.s}[0], [x1], x3 122 123 pop_v_regs 124 ret 125 126 127 128 129 130 ///****************************************************************************** 131 132 133 ///** 134 //******************************************************************************* 135 //* 136 //*ih264_intra_pred_luma_4x4_mode_horz 137 //* 138 //* @brief 139 //* Perform Intra prediction for luma_4x4 mode:horizontal 140 //* 141 //* @par Description: 142 //* Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2 143 //* 144 //* @param[in] pu1_src 145 //* UWORD8 pointer to the source 146 //* 147 //* @param[out] pu1_dst 148 //* UWORD8 pointer to the destination 149 //* 150 //* @param[in] src_strd 151 //* integer source stride 152 //* 153 //* @param[in] dst_strd 154 //* integer destination stride 155 //* 156 //* @param[in] ui_neighboravailability 157 //* availability of neighbouring pixels(Not used in this function) 158 //* 159 //* @returns 160 //* 161 //* @remarks 162 //* None 163 //* 164 //******************************************************************************* 165 //*/ 166 //void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src, 167 // UWORD8 *pu1_dst, 168 // WORD32 src_strd, 169 // WORD32 dst_strd, 170 // WORD32 ui_neighboravailability) 171 //**************Variables Vs Registers***************************************** 172 // x0 => *pu1_src 173 // x1 => *pu1_dst 174 // x2 => src_strd 175 // x3 => dst_strd 176 // x4 => ui_neighboravailability 177 178 179 180 .global ih264_intra_pred_luma_4x4_mode_horz_av8 181 182 ih264_intra_pred_luma_4x4_mode_horz_av8: 183 184 push_v_regs 185 186 ld1 {v1.s}[0], [x0] 187 dup v0.8b, v1.b[3] 188 dup v2.8b, v1.b[2] 189 st1 {v0.s}[0], [x1], x3 190 dup v3.8b, v1.b[1] 191 st1 {v2.s}[0], [x1], x3 192 dup v4.8b, v1.b[0] 193 st1 {v3.s}[0], [x1], x3 194 st1 {v4.s}[0], [x1], x3 195 196 pop_v_regs 197 ret 198 199 200 201 202 203 204 205 ///****************************************************************************** 206 207 208 ///** 209 //******************************************************************************* 210 //* 211 //*ih264_intra_pred_luma_4x4_mode_dc 212 //* 213 //* @brief 214 //* Perform Intra prediction for luma_4x4 mode:DC 215 //* 216 //* @par Description: 217 //* Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3 218 //* 219 //* @param[in] pu1_src 220 //* UWORD8 pointer to the source 221 //* 222 //* @param[out] pu1_dst 223 //* UWORD8 pointer to the destination 224 //* 225 //* @param[in] src_strd 226 //* integer source stride 227 //* 228 //* @param[in] dst_strd 229 //* integer destination stride 230 //* 231 //* @param[in] ui_neighboravailability 232 //* availability of neighbouring pixels 233 //* 234 //* @returns 235 //* 236 //* @remarks 237 //* None 238 //* 239 //*******************************************************************************/ 240 //void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src, 241 // UWORD8 *pu1_dst, 242 // WORD32 src_strd, 243 // WORD32 dst_strd, 244 // WORD32 ui_neighboravailability) 245 246 //**************Variables Vs Registers***************************************** 247 // x0 => *pu1_src 248 // x1 => *pu1_dst 249 // x2 => src_strd 250 // x3 => dst_strd 251 // x4 => ui_neighboravailability 252 253 254 255 .global ih264_intra_pred_luma_4x4_mode_dc_av8 256 257 ih264_intra_pred_luma_4x4_mode_dc_av8: 258 259 260 261 262 push_v_regs 263 stp x19, x20, [sp, #-16]! 264 265 ands x5, x4, #0x01 266 beq top_available //LEFT NOT AVAILABLE 267 268 add x10, x0, #3 269 mov x2, #-1 270 ldrb w5, [x10], #-1 271 sxtw x5, w5 272 ldrb w6, [x10], #-1 273 sxtw x6, w6 274 ldrb w7, [x10], #-1 275 sxtw x7, w7 276 add x5, x5, x6 277 ldrb w8, [x10], #-1 278 sxtw x8, w8 279 add x5, x5, x7 280 ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE 281 add x5, x5, x8 282 beq left_available 283 add x10, x0, #5 284 // BOTH LEFT AND TOP AVAILABLE 285 ldrb w6, [x10], #1 286 sxtw x6, w6 287 ldrb w7, [x10], #1 288 sxtw x7, w7 289 add x5, x5, x6 290 ldrb w8, [x10], #1 291 sxtw x8, w8 292 add x5, x5, x7 293 ldrb w9, [x10], #1 294 sxtw x9, w9 295 add x5, x5, x8 296 add x5, x5, x9 297 add x5, x5, #4 298 lsr x5, x5, #3 299 dup v0.8b, w5 300 st1 {v0.s}[0], [x1], x3 301 st1 {v0.s}[0], [x1], x3 302 st1 {v0.s}[0], [x1], x3 303 st1 {v0.s}[0], [x1], x3 304 b end_func 305 306 top_available: // ONLT TOP AVAILABLE 307 ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE 308 beq none_available 309 310 add x10, x0, #5 311 ldrb w6, [x10], #1 312 sxtw x6, w6 313 ldrb w7, [x10], #1 314 sxtw x7, w7 315 ldrb w8, [x10], #1 316 sxtw x8, w8 317 add x5, x6, x7 318 ldrb w9, [x10], #1 319 sxtw x9, w9 320 add x5, x5, x8 321 add x5, x5, x9 322 add x5, x5, #2 323 lsr x5, x5, #2 324 dup v0.8b, w5 325 st1 {v0.s}[0], [x1], x3 326 st1 {v0.s}[0], [x1], x3 327 st1 {v0.s}[0], [x1], x3 328 st1 {v0.s}[0], [x1], x3 329 b end_func 330 331 left_available: //ONLY LEFT AVAILABLE 332 add x5, x5, #2 333 lsr x5, x5, #2 334 dup v0.8b, w5 335 st1 {v0.s}[0], [x1], x3 336 st1 {v0.s}[0], [x1], x3 337 st1 {v0.s}[0], [x1], x3 338 st1 {v0.s}[0], [x1], x3 339 b end_func 340 341 none_available: //NONE AVAILABLE 342 mov x5, #128 343 dup v0.8b, w5 344 st1 {v0.s}[0], [x1], x3 345 st1 {v0.s}[0], [x1], x3 346 st1 {v0.s}[0], [x1], x3 347 st1 {v0.s}[0], [x1], x3 348 b end_func 349 350 351 end_func: 352 353 ldp x19, x20, [sp], #16 354 pop_v_regs 355 ret 356 357 358 359 360 361 362 363 ///** 364 //******************************************************************************* 365 //* 366 //*ih264_intra_pred_luma_4x4_mode_diag_dl 367 //* 368 //* @brief 369 //* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left 370 //* 371 //* @par Description: 372 //* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4 373 //* 374 //* @param[in] pu1_src 375 //* UWORD8 pointer to the source 376 //* 377 //* @param[out] pu1_dst 378 //* UWORD8 pointer to the destination 379 //* 380 //* @param[in] src_strd 381 //* integer source stride 382 //* 383 //* @param[in] dst_strd 384 //* integer destination stride 385 //* 386 //* @param[in] ui_neighboravailability 387 //* availability of neighbouring pixels 388 //* 389 //* @returns 390 //* 391 //* @remarks 392 //* None 393 //* 394 //*******************************************************************************/ 395 //void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src, 396 // UWORD8 *pu1_dst, 397 // WORD32 src_strd, 398 // WORD32 dst_strd, 399 // WORD32 ui_neighboravailability) 400 401 //**************Variables Vs Registers***************************************** 402 // x0 => *pu1_src 403 // x1 => *pu1_dst 404 // x2 => src_strd 405 // x3 => dst_strd 406 // x4 => ui_neighboravailability 407 408 409 .global ih264_intra_pred_luma_4x4_mode_diag_dl_av8 410 411 ih264_intra_pred_luma_4x4_mode_diag_dl_av8: 412 413 414 push_v_regs 415 stp x19, x20, [sp, #-16]! 416 417 add x0, x0, #5 418 sub x5, x3, #2 419 add x6, x0, #7 420 ld1 {v0.8b}, [x0] 421 ext v1.8b, v0.8b , v0.8b , #1 422 ext v2.8b, v0.8b , v0.8b , #2 423 ld1 {v2.b}[6], [x6] 424 uaddl v20.8h, v0.8b, v1.8b 425 uaddl v22.8h, v1.8b, v2.8b 426 add v24.8h, v20.8h , v22.8h 427 sqrshrun v3.8b, v24.8h, #2 428 st1 {v3.s}[0], [x1], x3 429 ext v4.8b, v3.8b , v3.8b , #1 430 st1 {v4.s}[0], [x1], x3 431 st1 {v3.h}[1], [x1], #2 432 st1 {v3.h}[2], [x1], x5 433 st1 {v4.h}[1], [x1], #2 434 st1 {v4.h}[2], [x1] 435 436 end_func_diag_dl: 437 438 ldp x19, x20, [sp], #16 439 pop_v_regs 440 ret 441 442 443 444 445 446 447 448 449 450 ///** 451 //******************************************************************************* 452 //* 453 //*ih264_intra_pred_luma_4x4_mode_diag_dr 454 //* 455 //* @brief 456 //* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right 457 //* 458 //* @par Description: 459 //* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5 460 //* 461 //* @param[in] pu1_src 462 //* UWORD8 pointer to the source 463 //* 464 //* @param[out] pu1_dst 465 //* UWORD8 pointer to the destination 466 //* 467 //* @param[in] src_strd 468 //* integer source stride 469 //* 470 //* @param[in] dst_strd 471 //* integer destination stride 472 //* 473 //* @param[in] ui_neighboravailability 474 //* availability of neighbouring pixels 475 //* 476 //* @returns 477 //* 478 //* @remarks 479 //* None 480 //* 481 //*******************************************************************************/ 482 //void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src, 483 // UWORD8 *pu1_dst, 484 // WORD32 src_strd, 485 // WORD32 dst_strd, 486 // WORD32 ui_neighboravailability) 487 488 //**************Variables Vs Registers***************************************** 489 // x0 => *pu1_src 490 // x1 => *pu1_dst 491 // x2 => src_strd 492 // x3 => dst_strd 493 // x4 => ui_neighboravailability 494 495 496 .global ih264_intra_pred_luma_4x4_mode_diag_dr_av8 497 498 ih264_intra_pred_luma_4x4_mode_diag_dr_av8: 499 500 push_v_regs 501 stp x19, x20, [sp, #-16]! 502 503 504 ld1 {v0.8b}, [x0] 505 add x0, x0, #1 506 ld1 {v1.8b}, [x0] 507 ext v2.8b, v1.8b , v1.8b , #1 508 uaddl v20.8h, v0.8b, v1.8b 509 uaddl v22.8h, v1.8b, v2.8b 510 add v24.8h, v20.8h , v22.8h 511 sqrshrun v3.8b, v24.8h, #2 512 513 ext v4.8b, v3.8b , v3.8b , #1 514 sub x5, x3, #2 515 st1 {v4.h}[1], [x1], #2 516 st1 {v4.h}[2], [x1], x5 517 st1 {v3.h}[1], [x1], #2 518 st1 {v3.h}[2], [x1], x5 519 st1 {v4.s}[0], [x1], x3 520 st1 {v3.s}[0], [x1], x3 521 522 end_func_diag_dr: 523 ldp x19, x20, [sp], #16 524 pop_v_regs 525 ret 526 527 528 529 530 531 532 533 ///** 534 //******************************************************************************* 535 //* 536 //*ih264_intra_pred_luma_4x4_mode_vert_r 537 //* 538 //* @brief 539 //* Perform Intra prediction for luma_4x4 mode:Vertical_Right 540 //* 541 //* @par Description: 542 //* Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6 543 //* 544 //* @param[in] pu1_src 545 //* UWORD8 pointer to the source 546 //* 547 //* @param[out] pu1_dst 548 //* UWORD8 pointer to the destination 549 //* 550 //* @param[in] src_strd 551 //* integer source stride 552 //* 553 //* @param[in] dst_strd 554 //* integer destination stride 555 //* 556 //* @param[in] ui_neighboravailability 557 //* availability of neighbouring pixels 558 //* 559 //* @returns 560 //* 561 //* @remarks 562 //* None 563 //* 564 //*******************************************************************************/ 565 //void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src, 566 // UWORD8 *pu1_dst, 567 // WORD32 src_strd, 568 // WORD32 dst_strd, 569 // WORD32 ui_neighboravailability) 570 571 //**************Variables Vs Registers***************************************** 572 // x0 => *pu1_src 573 // x1 => *pu1_dst 574 // x2 => src_strd 575 // x3 => dst_strd 576 // x4 => ui_neighboravailability 577 578 579 .global ih264_intra_pred_luma_4x4_mode_vert_r_av8 580 581 ih264_intra_pred_luma_4x4_mode_vert_r_av8: 582 583 push_v_regs 584 stp x19, x20, [sp, #-16]! 585 586 587 ld1 {v0.8b}, [x0] 588 add x0, x0, #1 589 ld1 {v1.8b}, [x0] 590 ext v2.8b, v1.8b , v1.8b , #1 591 uaddl v20.8h, v0.8b, v1.8b 592 uaddl v22.8h, v1.8b, v2.8b 593 add v24.8h, v20.8h , v22.8h 594 sqrshrun v4.8b, v20.8h, #1 595 sqrshrun v3.8b, v24.8h, #2 596 sub x5, x3, #2 597 ext v5.8b, v3.8b , v3.8b , #3 598 st1 {v4.s}[1], [x1], x3 599 st1 {v5.s}[0], [x1], x3 600 sub x8, x3, #3 601 st1 {v3.b}[2], [x1], #1 602 st1 {v4.h}[2], [x1], #2 603 st1 {v4.b}[6], [x1], x8 604 st1 {v3.b}[1], [x1], #1 605 st1 {v5.h}[0], [x1], #2 606 st1 {v5.b}[2], [x1] 607 608 609 end_func_vert_r: 610 ldp x19, x20, [sp], #16 611 pop_v_regs 612 ret 613 614 615 616 617 618 ///** 619 //******************************************************************************* 620 //* 621 //*ih264_intra_pred_luma_4x4_mode_horz_d 622 //* 623 //* @brief 624 //* Perform Intra prediction for luma_4x4 mode:Horizontal_Down 625 //* 626 //* @par Description: 627 //* Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7 628 //* 629 //* @param[in] pu1_src 630 //* UWORD8 pointer to the source 631 //* 632 //* @param[out] pu1_dst 633 //* UWORD8 pointer to the destination 634 //* 635 //* @param[in] src_strd 636 //* integer source stride 637 //* 638 //* @param[in] dst_strd 639 //* integer destination stride 640 //* 641 //* @param[in] ui_neighboravailability 642 //* availability of neighbouring pixels 643 //* 644 //* @returns 645 //* 646 //* @remarks 647 //* None 648 //* 649 //*******************************************************************************/ 650 //void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src, 651 // UWORD8 *pu1_dst, 652 // WORD32 src_strd, 653 // WORD32 dst_strd, 654 // WORD32 ui_neighboravailability) 655 656 //**************Variables Vs Registers***************************************** 657 // x0 => *pu1_src 658 // x1 => *pu1_dst 659 // x2 => src_strd 660 // x3 => dst_strd 661 // x4 => ui_neighboravailability 662 663 664 .global ih264_intra_pred_luma_4x4_mode_horz_d_av8 665 666 ih264_intra_pred_luma_4x4_mode_horz_d_av8: 667 668 push_v_regs 669 stp x19, x20, [sp, #-16]! 670 671 ld1 {v0.8b}, [x0] 672 add x0, x0, #1 673 ld1 {v1.8b}, [x0] 674 ext v2.8b, v1.8b , v0.8b , #1 675 uaddl v20.8h, v0.8b, v1.8b 676 uaddl v22.8h, v1.8b, v2.8b 677 add v24.8h, v20.8h , v22.8h 678 sqrshrun v4.8b, v20.8h, #1 679 sqrshrun v5.8b, v24.8h, #2 680 sub x5, x3, #2 681 mov v6.8b, v5.8b 682 trn1 v10.8b, v4.8b, v5.8b 683 trn2 v5.8b, v4.8b, v5.8b // 684 mov v4.8b, v10.8b 685 st1 {v5.h}[1], [x1], #2 686 st1 {v6.h}[2], [x1], x5 687 st1 {v4.h}[1], [x1], #2 688 st1 {v5.h}[1], [x1], x5 689 st1 {v5.h}[0], [x1], #2 690 st1 {v4.h}[1], [x1], x5 691 st1 {v4.h}[0], [x1], #2 692 st1 {v5.h}[0], [x1], x5 693 694 end_func_horz_d: 695 ldp x19, x20, [sp], #16 696 pop_v_regs 697 ret 698 699 700 701 702 703 704 705 ///** 706 //******************************************************************************* 707 //* 708 //*ih264_intra_pred_luma_4x4_mode_vert_l 709 //* 710 //* @brief 711 //* Perform Intra prediction for luma_4x4 mode:Vertical_Left 712 //* 713 //* @par Description: 714 //* Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8 715 //* 716 //* @param[in] pu1_src 717 //* UWORD8 pointer to the source 718 //* 719 //* @param[out] pu1_dst 720 //* UWORD8 pointer to the destination 721 //* 722 //* @param[in] src_strd 723 //* integer source stride 724 //* 725 //* @param[in] dst_strd 726 //* integer destination stride 727 //* 728 //* @param[in] ui_neighboravailability 729 //* availability of neighbouring pixels 730 //* 731 //* @returns 732 //* 733 //* @remarks 734 //* None 735 //* 736 //*******************************************************************************/ 737 //void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src, 738 // UWORD8 *pu1_dst, 739 // WORD32 src_strd, 740 // WORD32 dst_strd, 741 // WORD32 ui_neighboravailability) 742 743 //**************Variables Vs Registers***************************************** 744 // x0 => *pu1_src 745 // x1 => *pu1_dst 746 // x2 => src_strd 747 // x3 => dst_strd 748 // x4 => ui_neighboravailability 749 750 751 .global ih264_intra_pred_luma_4x4_mode_vert_l_av8 752 753 ih264_intra_pred_luma_4x4_mode_vert_l_av8: 754 755 push_v_regs 756 stp x19, x20, [sp, #-16]! 757 add x0, x0, #4 758 ld1 {v0.8b}, [x0] 759 add x0, x0, #1 760 ld1 {v1.8b}, [x0] 761 ext v2.8b, v1.8b , v0.8b , #1 762 uaddl v20.8h, v0.8b, v1.8b 763 uaddl v22.8h, v1.8b, v2.8b 764 add v24.8h, v20.8h , v22.8h 765 sqrshrun v4.8b, v20.8h, #1 766 sqrshrun v5.8b, v24.8h, #2 767 ext v6.8b, v4.8b , v4.8b , #1 768 ext v7.8b, v5.8b , v5.8b , #1 769 st1 {v6.s}[0], [x1], x3 770 ext v8.8b, v4.8b , v4.8b , #2 771 ext v9.8b, v5.8b , v5.8b , #2 772 st1 {v7.s}[0], [x1], x3 773 st1 {v8.s}[0], [x1], x3 774 st1 {v9.s}[0], [x1], x3 775 776 end_func_vert_l: 777 ldp x19, x20, [sp], #16 778 pop_v_regs 779 ret 780 781 782 783 784 785 786 787 ///** 788 //******************************************************************************* 789 //* 790 //*ih264_intra_pred_luma_4x4_mode_horz_u 791 //* 792 //* @brief 793 //* Perform Intra prediction for luma_4x4 mode:Horizontal_Up 794 //* 795 //* @par Description: 796 //* Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9 797 //* 798 //* @param[in] pu1_src 799 //* UWORD8 pointer to the source 800 //* 801 //* @param[out] pu1_dst 802 //* UWORD8 pointer to the destination 803 //* 804 //* @param[in] src_strd 805 //* integer source stride 806 //* 807 //* @param[in] dst_strd 808 //* integer destination stride 809 //* 810 //* @param[in] ui_neighboravailability 811 //* availability of neighbouring pixels 812 //* 813 //* @returns 814 //* 815 //* @remarks 816 //* None 817 //* 818 //*******************************************************************************/ 819 //void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src, 820 // UWORD8 *pu1_dst, 821 // WORD32 src_strd, 822 // WORD32 dst_strd, 823 // WORD32 ui_neighboravailability) 824 825 //**************Variables Vs Registers***************************************** 826 // x0 => *pu1_src 827 // x1 => *pu1_dst 828 // x2 => src_strd 829 // x3 => dst_strd 830 // x4 => ui_neighboravailability 831 832 833 .global ih264_intra_pred_luma_4x4_mode_horz_u_av8 834 835 ih264_intra_pred_luma_4x4_mode_horz_u_av8: 836 837 push_v_regs 838 stp x19, x20, [sp, #-16]! 839 mov x10, x0 840 ld1 {v0.8b}, [x0] 841 ldrb w9, [x0], #1 842 sxtw x9, w9 843 ext v1.8b, v0.8b , v0.8b , #1 844 ld1 {v0.b}[7], [x10] 845 ext v2.8b, v1.8b , v1.8b , #1 846 uaddl v20.8h, v0.8b, v1.8b 847 uaddl v22.8h, v1.8b, v2.8b 848 add v24.8h, v20.8h , v22.8h 849 sqrshrun v4.8b, v20.8h, #1 850 sqrshrun v5.8b, v24.8h, #2 851 mov v6.8b, v4.8b 852 ext v6.8b, v5.8b , v4.8b , #1 853 st1 {v4.b}[2], [x1], #1 854 st1 {v6.b}[0], [x1], #1 855 trn1 v10.8b, v6.8b, v5.8b 856 trn2 v5.8b, v6.8b, v5.8b // 857 mov v6.8b , v10.8b 858 sub x5, x3, #2 859 trn1 v10.8b, v4.8b, v6.8b 860 trn2 v6.8b, v4.8b, v6.8b // 861 mov v4.8b , v10.8b 862 dup v7.8b, w9 863 st1 {v6.h}[0], [x1], x5 864 st1 {v6.h}[0], [x1], #2 865 st1 {v5.h}[3], [x1], x5 866 st1 {v5.h}[3], [x1], #2 867 st1 {v7.h}[3], [x1], x5 868 st1 {v7.s}[0], [x1], x3 869 870 end_func_horz_u: 871 ldp x19, x20, [sp], #16 872 pop_v_regs 873 ret 874 875 876 877