1 @/****************************************************************************** 2 @ * 3 @ * Copyright (C) 2015 The Android Open Source Project 4 @ * 5 @ * Licensed under the Apache License, Version 2.0 (the "License"); 6 @ * you may not use this file except in compliance with the License. 7 @ * You may obtain a copy of the License at: 8 @ * 9 @ * http://www.apache.org/licenses/LICENSE-2.0 10 @ * 11 @ * Unless required by applicable law or agreed to in writing, software 12 @ * distributed under the License is distributed on an "AS IS" BASIS, 13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @ * See the License for the specific language governing permissions and 15 @ * limitations under the License. 16 @ * 17 @ ***************************************************************************** 18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 @*/ 20 @** 21 @****************************************************************************** 22 @* @file 23 @* ih264_intra_pred_luma_8x8_a9q.s 24 @* 25 @* @brief 26 @* Contains function definitions for intra 8x8 Luma prediction . 27 @* 28 @* @author 29 @* Ittiam 30 @* 31 @* @par List of Functions: 32 @* 33 @* -ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q 34 @* -ih264_intra_pred_luma_8x8_mode_vert_a9q 35 @* -ih264_intra_pred_luma_8x8_mode_horz_a9q 36 @* -ih264_intra_pred_luma_8x8_mode_dc_a9q 37 @* -ih264_intra_pred_luma_8x8_mode_diag_dl_a9q 38 @* -ih264_intra_pred_luma_8x8_mode_diag_dr_a9q 39 @* -ih264_intra_pred_luma_8x8_mode_vert_r_a9q 40 @* -ih264_intra_pred_luma_8x8_mode_horz_d_a9q 41 @* -ih264_intra_pred_luma_8x8_mode_vert_l_a9q 42 @* -ih264_intra_pred_luma_8x8_mode_horz_u_a9q 43 @* 44 @* @remarks 45 @* None 46 @* 47 @******************************************************************************* 48 @* 49 50 @* All the functions here are replicated from ih264_intra_pred_filters.c 51 @ 52 53 .text 54 .p2align 2 55 56 .extern ih264_gai1_intrapred_luma_8x8_horz_u 57 .hidden ih264_gai1_intrapred_luma_8x8_horz_u 58 scratch_intrapred_addr_8x8: 59 .long ih264_gai1_intrapred_luma_8x8_horz_u - scrlb8x8l2 - 8 60 61 @** 62 @******************************************************************************* 63 @* 64 @*ih264_intra_pred_luma_8x8_mode_ref_filtering 65 @* 66 @* @brief 67 @* Reference sample filtering process for Intra_8x8 sample prediction 68 @* 69 @* @par Description: 70 @* Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1 71 @* 72 @* @param[in] pu1_src 73 @* UWORD8 pointer to the source 74 @* 75 @* @param[out] pu1_dst 76 @* UWORD8 pointer to the destination 77 @* 78 @* @param[in] src_strd 79 @* integer source stride [Not used] 80 @* 81 @* @param[in] dst_strd 82 @* integer destination stride[Not used] 83 @* 84 @* @param[in] ui_neighboravailability 85 @* availability of neighbouring pixels[Not used] 86 @* 87 @* @returns 88 @* 89 @* @remarks 90 @* None 91 @* 92 @******************************************************************************* 93 @void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src, 94 @ UWORD8 *pu1_dst) 95 96 @**************Variables Vs Registers***************************************** 97 @ r0 => *pu1_src 98 @ r1 => *pu1_dst 99 100 101 .global ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q 102 103 ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q: 104 105 stmfd sp!, {r4-r12, r14} @store register values to stack 106 vpush {d8-d15} 107 108 vld1.u8 {q0}, [r0]! @ 109 vld1.u8 {q1}, [r0] 110 add r0, r0, #8 @ 111 vext.8 q2, q0, q1, #1 112 vext.8 q3, q1, q1, #1 113 vext.8 q4, q2, q3, #1 114 vext.8 q5, q3, q3, #1 115 vld1.8 {d10[7]}, [r0] @ LOADING SRC[24] AGIN TO THE END FOR p'[ 15, -1 ] = ( p[ 14, -1 ] + 3 * p[ 15, -1 ] + 2 ) >> 2 116 vaddl.u8 q10, d0, d4 117 vaddl.u8 q7, d0, d0 @ SPECIAL CASE FOR p'[ -1 ,7 ] = ( p[ -1, 6 ] + 3 * p[ -1, 7 ] + 2 ) >> 2 118 vadd.u16 q7, q10, q7 119 vaddl.u8 q11, d1, d5 120 vqrshrun.s16 d14, q7, #2 121 vaddl.u8 q12, d4, d8 122 vaddl.u8 q13, d5, d9 123 vst1.8 {d14[0]}, [r1]! 124 vadd.u16 q12, q10, q12 125 vadd.u16 q13, q11, q13 126 vaddl.u8 q9, d2, d6 127 vaddl.u8 q8, d6, d10 128 vqrshrun.s16 d4, q12, #2 129 vqrshrun.s16 d5, q13, #2 130 vadd.u16 q6, q8, q9 131 vst1.8 {q2}, [r1]! 132 vqrshrun.s16 d6, q6, #2 133 vst1.8 {d6}, [r1] 134 135 136 end_func_ref_filt: 137 138 vpop {d8-d15} 139 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 140 141 142 143 144 145 146 @** 147 @******************************************************************************* 148 @* 149 @*ih264_intra_pred_luma_8x8_mode_vert 150 @* 151 @* @brief 152 @* Perform Intra prediction for luma_8x8 mode:vertical 153 @* 154 @* @par Description: 155 @* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2 156 @* 157 @* @param[in] pu1_src 158 @* UWORD8 pointer to the source 159 @* 160 @* @param[out] pu1_dst 161 @* UWORD8 pointer to the destination 162 @* 163 @* @param[in] src_strd 164 @* integer source stride 165 @* 166 @* @param[in] dst_strd 167 @* integer destination stride 168 @* 169 @* @param[in] ui_neighboravailability 170 @* availability of neighbouring pixels(Not used in this function) 171 @* 172 @* @returns 173 @* 174 @* @remarks 175 @* None 176 @* 177 @******************************************************************************* 178 @void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src, 179 @ UWORD8 *pu1_dst, 180 @ WORD32 src_strd, 181 @ WORD32 dst_strd, 182 @ WORD32 ui_neighboravailability) 183 184 @**************Variables Vs Registers***************************************** 185 @ r0 => *pu1_src 186 @ r1 => *pu1_dst 187 @ r2 => src_strd 188 @ r3 => dst_strd 189 @ r4 => ui_neighboravailability 190 191 192 .global ih264_intra_pred_luma_8x8_mode_vert_a9q 193 194 ih264_intra_pred_luma_8x8_mode_vert_a9q: 195 196 stmfd sp!, {r4-r12, r14} @store register values to stack 197 198 add r0, r0, #9 199 vld1.8 d0, [r0] 200 201 vst1.8 d0, [r1], r3 202 vst1.8 d0, [r1], r3 203 vst1.8 d0, [r1], r3 204 vst1.8 d0, [r1], r3 205 vst1.8 d0, [r1], r3 206 vst1.8 d0, [r1], r3 207 vst1.8 d0, [r1], r3 208 vst1.8 d0, [r1], r3 209 210 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 211 212 213 214 215 216 @****************************************************************************** 217 218 219 @** 220 @******************************************************************************* 221 @* 222 @*ih264_intra_pred_luma_8x8_mode_horz 223 @* 224 @* @brief 225 @* Perform Intra prediction for luma_8x8 mode:horizontal 226 @* 227 @* @par Description: 228 @* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2 229 @* 230 @* @param[in] pu1_src 231 @* UWORD8 pointer to the source 232 @* 233 @* @param[out] pu1_dst 234 @* UWORD8 pointer to the destination 235 @* 236 @* @param[in] src_strd 237 @* integer source stride 238 @* 239 @* @param[in] dst_strd 240 @* integer destination stride 241 @* 242 @* @param[in] ui_neighboravailability 243 @* availability of neighbouring pixels(Not used in this function) 244 @* 245 @* @returns 246 @* 247 @* @remarks 248 @* None 249 @* 250 @******************************************************************************* 251 @* 252 @void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src, 253 @ UWORD8 *pu1_dst, 254 @ WORD32 src_strd, 255 @ WORD32 dst_strd, 256 @ WORD32 ui_neighboravailability) 257 @**************Variables Vs Registers***************************************** 258 @ r0 => *pu1_src 259 @ r1 => *pu1_dst 260 @ r2 => src_strd 261 @ r3 => dst_strd 262 @ r4 => ui_neighboravailability 263 264 265 .global ih264_intra_pred_luma_8x8_mode_horz_a9q 266 267 ih264_intra_pred_luma_8x8_mode_horz_a9q: 268 269 stmfd sp!, {r14} @store register values to stack 270 271 vld1.u8 {d0}, [r0] 272 mov r2, #6 273 274 vdup.u8 d1, d0[7] 275 vdup.u8 d2, d0[6] 276 vst1.8 {d1}, [r1], r3 277 278 loop_8x8_horz: 279 vext.8 d0, d0, d0, #6 280 vst1.8 {d2}, [r1], r3 281 vdup.u8 d1, d0[7] 282 subs r2, #2 283 vdup.u8 d2, d0[6] 284 vst1.8 {d1}, [r1], r3 285 bne loop_8x8_horz 286 287 vext.8 d0, d0, d0, #6 288 vst1.8 {d2}, [r1], r3 289 290 ldmfd sp!, {pc} @restoring registers from stack 291 292 293 294 295 296 @****************************************************************************** 297 298 299 @** 300 @******************************************************************************* 301 @* 302 @*ih264_intra_pred_luma_8x8_mode_dc 303 @* 304 @* @brief 305 @* Perform Intra prediction for luma_8x8 mode:DC 306 @* 307 @* @par Description: 308 @* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3 309 @* 310 @* @param[in] pu1_src 311 @* UWORD8 pointer to the source 312 @* 313 @* @param[out] pu1_dst 314 @* UWORD8 pointer to the destination 315 @* 316 @* @param[in] src_strd 317 @* integer source stride 318 @* 319 @* @param[in] dst_strd 320 @* integer destination stride 321 @* 322 @* @param[in] ui_neighboravailability 323 @* availability of neighbouring pixels 324 @* 325 @* @returns 326 @* 327 @* @remarks 328 @* None 329 @* 330 @******************************************************************************* 331 @void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src, 332 @ UWORD8 *pu1_dst, 333 @ WORD32 src_strd, 334 @ WORD32 dst_strd, 335 @ WORD32 ui_neighboravailability) 336 337 @**************Variables Vs Registers***************************************** 338 @ r0 => *pu1_src 339 @ r1 => *pu1_dst 340 @ r2 => src_strd 341 @ r3 => dst_strd 342 @ r4 => ui_neighboravailability 343 344 345 .global ih264_intra_pred_luma_8x8_mode_dc_a9q 346 347 ih264_intra_pred_luma_8x8_mode_dc_a9q: 348 349 stmfd sp!, {r4, r14} @store register values to stack 350 ldr r4, [sp, #8] @r4 => ui_neighboravailability 351 352 ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE 353 beq top_available 354 ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE 355 beq left_available 356 357 vld1.u8 {d0}, [r0] @BOTH LEFT AND TOP AVAILABLE 358 add r0, r0, #9 359 vld1.u8 {d1}, [r0] 360 vpaddl.u8 q0, q0 361 vadd.u16 d0, d0, d1 362 vpaddl.u16 d0, d0 363 vpaddl.u32 d0, d0 364 vqrshrun.s16 d0, q0, #4 365 vdup.u8 d0, d0[0] 366 b str_pred 367 368 top_available: @ONLY TOP AVAILABLE 369 ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE 370 beq none_available 371 372 add r0, r0, #9 373 vld1.u8 {d0}, [r0] 374 vpaddl.u8 d0, d0 375 vpaddl.u16 d0, d0 376 vpaddl.u32 d0, d0 377 vqrshrun.s16 d0, q0, #3 378 vdup.u8 d0, d0[0] 379 b str_pred 380 381 left_available: @ONLY LEFT AVAILABLE 382 vld1.u8 {d0}, [r0] 383 vpaddl.u8 d0, d0 384 vpaddl.u16 d0, d0 385 vpaddl.u32 d0, d0 386 vqrshrun.s16 d0, q0, #3 387 vdup.u8 d0, d0[0] 388 b str_pred 389 390 none_available: @NONE AVAILABLE 391 vmov.u8 q0, #128 392 393 str_pred: 394 vst1.8 {d0}, [r1], r3 395 vst1.8 {d0}, [r1], r3 396 vst1.8 {d0}, [r1], r3 397 vst1.8 {d0}, [r1], r3 398 vst1.8 {d0}, [r1], r3 399 vst1.8 {d0}, [r1], r3 400 vst1.8 {d0}, [r1], r3 401 vst1.8 {d0}, [r1], r3 402 403 ldmfd sp!, {r4, pc} @Restoring registers from stack 404 405 406 407 408 409 410 @** 411 @******************************************************************************* 412 @* 413 @*ih264_intra_pred_luma_8x8_mode_diag_dl 414 @* 415 @* @brief 416 @* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left 417 @* 418 @* @par Description: 419 @* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4 420 @* 421 @* @param[in] pu1_src 422 @* UWORD8 pointer to the source 423 @* 424 @* @param[out] pu1_dst 425 @* UWORD8 pointer to the destination 426 @* 427 @* @param[in] src_strd 428 @* integer source stride 429 @* 430 @* @param[in] dst_strd 431 @* integer destination stride 432 @* 433 @* @param[in] ui_neighboravailability 434 @* availability of neighbouring pixels 435 @* 436 @* @returns 437 @* 438 @* @remarks 439 @* None 440 @* 441 @******************************************************************************* 442 @void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src, 443 @ UWORD8 *pu1_dst, 444 @ WORD32 src_strd, 445 @ WORD32 dst_strd, 446 @ WORD32 ui_neighboravailability) 447 448 @**************Variables Vs Registers***************************************** 449 @ r0 => *pu1_src 450 @ r1 => *pu1_dst 451 @ r2 => src_strd 452 @ r3 => dst_strd 453 @ r4 => ui_neighboravailability 454 455 .global ih264_intra_pred_luma_8x8_mode_diag_dl_a9q 456 457 ih264_intra_pred_luma_8x8_mode_diag_dl_a9q: 458 459 stmfd sp!, {r4-r12, r14} @store register values to stack 460 461 add r0, r0, #9 462 sub r5, r3, #4 463 add r6, r0, #15 464 vld1.8 {q0}, [r0] 465 vext.8 q2, q0, q0, #2 466 vext.8 q1, q0, q0, #1 467 vld1.8 {d5[6]}, [r6] 468 @ q1 = q0 shifted to left once 469 @ q2 = q1 shifted to left once 470 vaddl.u8 q10, d0, d2 @Adding for FILT121 471 vaddl.u8 q11, d1, d3 472 vaddl.u8 q12, d2, d4 473 vaddl.u8 q13, d3, d5 474 vadd.u16 q12, q10, q12 475 vadd.u16 q13, q11, q13 476 477 vqrshrun.s16 d4, q12, #2 478 vqrshrun.s16 d5, q13, #2 479 @Q2 has all FILT121 values 480 vst1.8 {d4}, [r1], r3 481 vext.8 q9, q2, q2, #1 482 vext.8 q8, q9, q9, #1 483 vst1.8 {d18}, [r1], r3 484 vext.8 q15, q8, q8, #1 485 vst1.8 {d16}, [r1], r3 486 vst1.8 {d30}, [r1], r3 487 vst1.32 {d4[1]}, [r1]! 488 vst1.32 {d5[0]}, [r1], r5 489 vst1.32 {d18[1]}, [r1]! 490 vst1.32 {d19[0]}, [r1], r5 491 vst1.32 {d16[1]}, [r1]! 492 vst1.32 {d17[0]}, [r1], r5 493 vst1.32 {d30[1]}, [r1]! 494 vst1.32 {d31[0]}, [r1], r5 495 496 497 end_func_diag_dl: 498 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 499 500 501 502 503 @** 504 @******************************************************************************* 505 @* 506 @*ih264_intra_pred_luma_8x8_mode_diag_dr 507 @* 508 @* @brief 509 @* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right 510 @* 511 @* @par Description: 512 @* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5 513 @* 514 @* @param[in] pu1_src 515 @* UWORD8 pointer to the source 516 @* 517 @* @param[out] pu1_dst 518 @* UWORD8 pointer to the destination 519 @* 520 @* @param[in] src_strd 521 @* integer source stride 522 @* 523 @* @param[in] dst_strd 524 @* integer destination stride 525 @* 526 @* @param[in] ui_neighboravailability 527 @* availability of neighbouring pixels 528 @* 529 @* @returns 530 @* 531 @* @remarks 532 @* None 533 @* 534 @******************************************************************************* 535 @void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src, 536 @ UWORD8 *pu1_dst, 537 @ WORD32 src_strd, 538 @ WORD32 dst_strd, 539 @ WORD32 ui_neighboravailability) 540 541 @**************Variables Vs Registers***************************************** 542 @ r0 => *pu1_src 543 @ r1 => *pu1_dst 544 @ r2 => src_strd 545 @ r3 => dst_strd 546 @ r4 => ui_neighboravailability 547 548 549 .global ih264_intra_pred_luma_8x8_mode_diag_dr_a9q 550 551 ih264_intra_pred_luma_8x8_mode_diag_dr_a9q: 552 553 stmfd sp!, {r4-r12, r14} @store register values to stack 554 555 556 vld1.u8 {q0}, [r0] 557 add r0, r0, #1 558 vld1.u8 {q1}, [r0] 559 vext.8 q2, q1, q1, #1 560 @ q1 = q0 shifted to left once 561 @ q2 = q1 shifted to left once 562 vaddl.u8 q10, d0, d2 @Adding for FILT121 563 vaddl.u8 q11, d1, d3 564 vaddl.u8 q12, d2, d4 565 vaddl.u8 q13, d3, d5 566 vadd.u16 q12, q10, q12 567 vadd.u16 q13, q11, q13 568 vqrshrun.s16 d4, q12, #2 569 vqrshrun.s16 d5, q13, #2 570 @Q2 has all FILT121 values 571 sub r5, r3, #4 572 vext.8 q9, q2, q2, #15 573 vst1.8 {d19}, [r1], r3 574 vext.8 q8, q9, q9, #15 575 vst1.8 {d17}, [r1], r3 576 vext.8 q15, q8, q8, #15 577 vst1.8 {d31}, [r1], r3 578 vst1.32 {d4[1]}, [r1]! 579 vst1.32 {d5[0]}, [r1], r5 580 vst1.32 {d18[1]}, [r1]! 581 vst1.32 {d19[0]}, [r1], r5 582 vst1.32 {d16[1]}, [r1]! 583 vst1.32 {d17[0]}, [r1], r5 584 vst1.32 {d30[1]}, [r1]! 585 vst1.32 {d31[0]}, [r1], r5 586 vst1.8 {d4}, [r1], r3 587 588 end_func_diag_dr: 589 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 590 591 592 593 594 @** 595 @******************************************************************************* 596 @* 597 @*ih264_intra_pred_luma_8x8_mode_vert_r 598 @* 599 @* @brief 600 @* Perform Intra prediction for luma_8x8 mode:Vertical_Right 601 @* 602 @* @par Description: 603 @* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6 604 @* 605 @* @param[in] pu1_src 606 @* UWORD8 pointer to the source 607 @* 608 @* @param[out] pu1_dst 609 @* UWORD8 pointer to the destination 610 @* 611 @* @param[in] src_strd 612 @* integer source stride 613 @* 614 @* @param[in] dst_strd 615 @* integer destination stride 616 @* 617 @* @param[in] ui_neighboravailability 618 @* availability of neighbouring pixels 619 @* 620 @* @returns 621 @* 622 @* @remarks 623 @* None 624 @* 625 @******************************************************************************* 626 @void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src, 627 @ UWORD8 *pu1_dst, 628 @ WORD32 src_strd, 629 @ WORD32 dst_strd, 630 @ WORD32 ui_neighboravailability) 631 632 @**************Variables Vs Registers***************************************** 633 @ r0 => *pu1_src 634 @ r1 => *pu1_dst 635 @ r2 => src_strd 636 @ r3 => dst_strd 637 @ r4 => ui_neighboravailability 638 639 640 .global ih264_intra_pred_luma_8x8_mode_vert_r_a9q 641 642 ih264_intra_pred_luma_8x8_mode_vert_r_a9q: 643 644 stmfd sp!, {r4-r12, r14} @store register values to stack 645 646 vld1.u8 {q0}, [r0] 647 add r0, r0, #1 648 vld1.u8 {q1}, [r0] 649 vext.8 q2, q1, q1, #1 650 @ q1 = q0 shifted to left once 651 @ q2 = q1 shifted to left once 652 vaddl.u8 q10, d0, d2 653 vaddl.u8 q11, d1, d3 654 vaddl.u8 q12, d2, d4 655 vaddl.u8 q13, d3, d5 656 vadd.u16 q12, q10, q12 657 vadd.u16 q13, q11, q13 658 659 vqrshrun.s16 d4, q10, #1 660 vqrshrun.s16 d5, q11, #1 661 vqrshrun.s16 d6, q12, #2 662 vqrshrun.s16 d7, q13, #2 663 @Q2 has all FILT11 values 664 @Q3 has all FILT121 values 665 sub r5, r3, #6 666 sub r6, r3, #4 667 vst1.8 {d5}, [r1], r3 @ row 0 668 vext.8 q9, q3, q3, #15 669 vmov.8 q11, q9 670 vext.8 q8, q2, q2, #1 671 vst1.8 {d19}, [r1], r3 @row 1 672 673 vmov.8 q15, q8 674 vext.8 q10, q2, q2, #15 675 vuzp.8 q8, q9 676 @row 2 677 vext.8 q14, q8, q8, #1 678 vst1.8 {d21}, [r1] 679 vst1.8 {d6[6]}, [r1], r3 680 @row 3 681 682 vst1.16 {d29[1]}, [r1]! 683 vst1.32 {d7[0]}, [r1]! 684 vst1.16 {d7[2]}, [r1], r5 685 @row 4 686 vst1.16 {d19[1]}, [r1]! 687 vst1.32 {d5[0]}, [r1]! 688 vst1.16 {d5[2]}, [r1], r5 689 690 @row 5 691 vext.8 q13, q9, q9, #1 692 vst1.16 {d17[1]}, [r1]! 693 vst1.32 {d23[0]}, [r1]! 694 vst1.16 {d23[2]}, [r1], r5 695 696 697 @row 6 698 vst1.16 {d27[0]}, [r1]! 699 vst1.8 {d27[2]}, [r1]! 700 vst1.8 {d5[0]}, [r1]! 701 vst1.32 {d31[0]}, [r1], r6 702 @row 7 703 vst1.32 {d29[0]}, [r1]! 704 vst1.32 {d7[0]}, [r1]! 705 706 707 708 end_func_vert_r: 709 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 710 711 712 713 714 @** 715 @******************************************************************************* 716 @* 717 @*ih264_intra_pred_luma_8x8_mode_horz_d 718 @* 719 @* @brief 720 @* Perform Intra prediction for luma_8x8 mode:Horizontal_Down 721 @* 722 @* @par Description: 723 @* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7 724 @* 725 @* @param[in] pu1_src 726 @* UWORD8 pointer to the source 727 @* 728 @* @param[out] pu1_dst 729 @* UWORD8 pointer to the destination 730 @* 731 @* @param[in] src_strd 732 @* integer source stride 733 @* 734 @* @param[in] dst_strd 735 @* integer destination stride 736 @* 737 @* @param[in] ui_neighboravailability 738 @* availability of neighbouring pixels 739 @* 740 @* @returns 741 @* 742 @* @remarks 743 @* None 744 @* 745 @******************************************************************************* 746 @void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src, 747 @ UWORD8 *pu1_dst, 748 @ WORD32 src_strd, 749 @ WORD32 dst_strd, 750 @ WORD32 ui_neighboravailability) 751 752 @**************Variables Vs Registers***************************************** 753 @ r0 => *pu1_src 754 @ r1 => *pu1_dst 755 @ r2 => src_strd 756 @ r3 => dst_strd 757 @ r4 => ui_neighboravailability 758 759 .global ih264_intra_pred_luma_8x8_mode_horz_d_a9q 760 761 ih264_intra_pred_luma_8x8_mode_horz_d_a9q: 762 763 stmfd sp!, {r4-r12, r14} @store register values to stack 764 vpush {d8-d15} 765 766 vld1.u8 {q0}, [r0] 767 add r0, r0, #1 768 vld1.u8 {q1}, [r0] 769 vext.8 q2, q1, q1, #1 770 @ q1 = q0 shifted to left once 771 @ q2 = q1 shifted to left once 772 vaddl.u8 q10, d0, d2 773 vaddl.u8 q11, d1, d3 774 vaddl.u8 q12, d2, d4 775 vaddl.u8 q13, d3, d5 776 vadd.u16 q12, q10, q12 777 vadd.u16 q13, q11, q13 778 779 vqrshrun.s16 d4, q10, #1 780 vqrshrun.s16 d5, q11, #1 781 vqrshrun.s16 d6, q12, #2 782 vqrshrun.s16 d7, q13, #2 783 @Q2 has all FILT11 values 784 @Q3 has all FILT121 values 785 vmov.8 q4, q2 786 vmov.8 q5, q3 787 sub r6, r3, #6 788 vtrn.8 q4, q5 @ 789 vmov.8 q6, q4 790 vmov.8 q7, q5 791 sub r5, r3, #4 792 vtrn.16 q6, q7 793 vext.8 q8, q3, q3, #14 794 @ROW 0 795 vst1.8 {d17}, [r1] 796 vst1.16 {d10[3]}, [r1], r3 797 798 @ROW 1 799 vst1.32 {d14[1]}, [r1]! 800 vst1.32 {d7[0]}, [r1], r5 801 @ROW 2 802 vst1.16 {d10[2]}, [r1]! 803 vst1.32 {d14[1]}, [r1]! 804 vst1.16 {d7[0]}, [r1], r6 805 @ROW 3 806 vst1.32 {d12[1]}, [r1]! 807 vst1.32 {d14[1]}, [r1], r5 808 @ROW 4 809 vst1.16 {d14[1]}, [r1]! 810 vst1.32 {d12[1]}, [r1]! 811 vst1.16 {d14[2]}, [r1], r6 812 @ROW 5 813 vst1.32 {d14[0]}, [r1]! 814 vst1.32 {d12[1]}, [r1], r5 815 @ROW 6 816 vst1.16 {d10[0]}, [r1]! 817 vst1.16 {d8[1]}, [r1]! 818 vst1.16 {d14[1]}, [r1]! 819 vst1.16 {d12[2]}, [r1], r6 820 @ROW 7 821 vst1.32 {d12[0]}, [r1]! 822 vst1.32 {d14[0]}, [r1], r5 823 824 end_func_horz_d: 825 vpop {d8-d15} 826 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 827 828 829 830 831 832 @** 833 @******************************************************************************* 834 @* 835 @*ih264_intra_pred_luma_8x8_mode_vert_l 836 @* 837 @* @brief 838 @* Perform Intra prediction for luma_8x8 mode:Vertical_Left 839 @* 840 @* @par Description: 841 @* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8 842 @* 843 @* @param[in] pu1_src 844 @* UWORD8 pointer to the source 845 @* 846 @* @param[out] pu1_dst 847 @* UWORD8 pointer to the destination 848 @* 849 @* @param[in] src_strd 850 @* integer source stride 851 @* 852 @* @param[in] dst_strd 853 @* integer destination stride 854 @* 855 @* @param[in] ui_neighboravailability 856 @* availability of neighbouring pixels 857 @* 858 @* @returns 859 @* 860 @* @remarks 861 @* None 862 @* 863 @******************************************************************************* 864 @void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src, 865 @ UWORD8 *pu1_dst, 866 @ WORD32 src_strd, 867 @ WORD32 dst_strd, 868 @ WORD32 ui_neighboravailability) 869 870 @**************Variables Vs Registers***************************************** 871 @ r0 => *pu1_src 872 @ r1 => *pu1_dst 873 @ r2 => src_strd 874 @ r3 => dst_strd 875 @ r4 => ui_neighboravailability 876 877 878 .global ih264_intra_pred_luma_8x8_mode_vert_l_a9q 879 880 ih264_intra_pred_luma_8x8_mode_vert_l_a9q: 881 882 stmfd sp!, {r4-r12, r14} @Restoring registers from stack 883 vpush {d8-d15} 884 885 add r0, r0, #9 886 vld1.u8 {q0}, [r0] 887 add r0, r0, #1 888 vld1.u8 {q1}, [r0] 889 vext.8 q2, q1, q1, #1 890 vaddl.u8 q10, d0, d2 891 vaddl.u8 q11, d1, d3 892 vaddl.u8 q12, d2, d4 893 vaddl.u8 q13, d3, d5 894 vadd.u16 q12, q10, q12 895 vadd.u16 q13, q11, q13 896 897 vqrshrun.s16 d4, q10, #1 898 vqrshrun.s16 d5, q11, #1 899 vqrshrun.s16 d6, q12, #2 900 vext.8 q4, q2, q2, #1 901 vqrshrun.s16 d7, q13, #2 902 @Q2 has all FILT11 values 903 @Q3 has all FILT121 values 904 905 vext.8 q5, q3, q3, #1 906 @ROW 0,1 907 vst1.8 {d4}, [r1], r3 908 vst1.8 {d6}, [r1], r3 909 910 vext.8 q6, q4, q4, #1 911 vext.8 q7, q5, q5, #1 912 @ROW 2,3 913 vst1.8 {d8}, [r1], r3 914 vst1.8 {d10}, [r1], r3 915 916 vext.8 q8, q6, q6, #1 917 vext.8 q9, q7, q7, #1 918 @ROW 4,5 919 vst1.8 {d12}, [r1], r3 920 vst1.8 {d14}, [r1], r3 921 @ROW 6,7 922 vst1.8 {d16}, [r1], r3 923 vst1.8 {d18}, [r1], r3 924 925 end_func_vert_l: 926 vpop {d8-d15} 927 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 928 929 930 931 932 933 @** 934 @******************************************************************************* 935 @* 936 @*ih264_intra_pred_luma_8x8_mode_horz_u 937 @* 938 @* @brief 939 @* Perform Intra prediction for luma_8x8 mode:Horizontal_Up 940 @* 941 @* @par Description: 942 @* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9 943 @* 944 @* @param[in] pu1_src 945 @* UWORD8 pointer to the source 946 @* 947 @* @param[out] pu1_dst 948 @* UWORD8 pointer to the destination 949 @* 950 @* @param[in] src_strd 951 @* integer source stride 952 @* 953 @* @param[in] dst_strd 954 @* integer destination stride 955 @* 956 @* @param[in] ui_neighboravailability 957 @* availability of neighbouring pixels 958 @* 959 @* @returns 960 @* 961 @* @remarks 962 @* None 963 @* 964 @******************************************************************************* 965 @void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src, 966 @ UWORD8 *pu1_dst, 967 @ WORD32 src_strd, 968 @ WORD32 dst_strd, 969 @ WORD32 ui_neighboravailability) 970 971 @**************Variables Vs Registers***************************************** 972 @ r0 => *pu1_src 973 @ r1 => *pu1_dst 974 @ r2 => src_strd 975 @ r3 => dst_strd 976 @ r4 => ui_neighboravailability 977 978 .global ih264_intra_pred_luma_8x8_mode_horz_u_a9q 979 980 ih264_intra_pred_luma_8x8_mode_horz_u_a9q: 981 982 stmfd sp!, {r4-r12, r14} @store register values to stack 983 vpush {d8-d15} 984 985 vld1.u8 {q0}, [r0] 986 vld1.u8 {d1[7]}, [r0] 987 vext.8 q1, q0, q0, #1 988 vext.8 q2, q1, q1, #1 989 @ LOADING V TABLE 990 ldr r12, scratch_intrapred_addr_8x8 991 scrlb8x8l2: 992 add r12, r12, pc 993 vaddl.u8 q10, d0, d2 994 vaddl.u8 q11, d1, d3 995 vaddl.u8 q12, d2, d4 996 vaddl.u8 q13, d3, d5 997 vadd.u16 q12, q10, q12 998 vadd.u16 q13, q11, q13 999 vld1.u8 {q5}, [r12] 1000 vqrshrun.s16 d4, q10, #1 1001 vqrshrun.s16 d5, q11, #1 1002 vqrshrun.s16 d6, q12, #2 1003 vqrshrun.s16 d7, q13, #2 1004 @Q2 has all FILT11 values 1005 @Q3 has all FILT121 values 1006 vtbl.u8 d12, {q2, q3}, d10 1007 vdup.u8 q7, d5[7] @ 1008 vtbl.u8 d13, {q2, q3}, d11 1009 vext.8 q8, q6, q7, #2 1010 vext.8 q9, q8, q7, #2 1011 vst1.8 {d12}, [r1], r3 1012 vext.8 q10, q9, q7, #2 1013 vst1.8 {d16}, [r1], r3 1014 vst1.8 {d18}, [r1], r3 1015 vst1.8 {d20}, [r1], r3 1016 vst1.8 {d13}, [r1], r3 1017 vst1.8 {d17}, [r1], r3 1018 vst1.8 {d19}, [r1], r3 1019 vst1.8 {d21}, [r1], r3 1020 1021 1022 end_func_horz_u: 1023 vpop {d8-d15} 1024 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 1025 1026 1027 1028 1029 1030 1031 1032 1033