1 @/****************************************************************************** 2 @ * 3 @ * Copyright (C) 2015 The Android Open Source Project 4 @ * 5 @ * Licensed under the Apache License, Version 2.0 (the "License"); 6 @ * you may not use this file except in compliance with the License. 7 @ * You may obtain a copy of the License at: 8 @ * 9 @ * http://www.apache.org/licenses/LICENSE-2.0 10 @ * 11 @ * Unless required by applicable law or agreed to in writing, software 12 @ * distributed under the License is distributed on an "AS IS" BASIS, 13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @ * See the License for the specific language governing permissions and 15 @ * limitations under the License. 16 @ * 17 @ ***************************************************************************** 18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 @*/ 20 @** 21 @****************************************************************************** 22 @* @file 23 @* ih264_intra_pred_chroma_a9q.s 24 @* 25 @* @brief 26 @* Contains function definitions for intra chroma prediction . 27 @* 28 @* @author 29 @* Ittiam 30 @* 31 @* @par List of Functions: 32 @* 33 @* - ih264_intra_pred_chroma_mode_horz_a9q() 34 @* - ih264_intra_pred_chroma_8x8_mode_vert_a9q() 35 @* - ih264_intra_pred_chroma_mode_dc_a9q() 36 @* - ih264_intra_pred_chroma_mode_plane_a9q() 37 @* 38 @* @remarks 39 @* None 40 @* 41 @******************************************************************************* 42 @* 43 44 @* All the functions here are replicated from ih264_chroma_intra_pred_filters.c 45 @ 46 47 .text 48 .p2align 2 49 50 .extern ih264_gai1_intrapred_chroma_plane_coeffs1 51 .hidden ih264_gai1_intrapred_chroma_plane_coeffs1 52 .extern ih264_gai1_intrapred_chroma_plane_coeffs2 53 .hidden ih264_gai1_intrapred_chroma_plane_coeffs2 54 scratch_chroma_intrapred_addr1: 55 .long ih264_gai1_intrapred_chroma_plane_coeffs1 - scrlblc1 - 8 56 57 scratch_intrapred_chroma_plane_addr1: 58 .long ih264_gai1_intrapred_chroma_plane_coeffs2 - scrlblc2 - 8 59 @** 60 @******************************************************************************* 61 @* 62 @*ih264_intra_pred_chroma_8x8_mode_dc 63 @* 64 @* @brief 65 @* Perform Intra prediction for chroma_8x8 mode:DC 66 @* 67 @* @par Description: 68 @* Perform Intra prediction for chroma_8x8 mode:DC ,described in sec 8.3.4.1 69 @* 70 @* @param[in] pu1_src 71 @* UWORD8 pointer to the source containing alternate U and V samples 72 @* 73 @* @param[out] pu1_dst 74 @* UWORD8 pointer to the destination with alternate U and V samples 75 @* 76 @* @param[in] src_strd 77 @* integer source stride 78 @* 79 @* @param[in] dst_strd 80 @* integer destination stride 81 @* 82 @** @param[in] ui_neighboravailability 83 @* availability of neighbouring pixels 84 @* 85 @* @returns 86 @* 87 @* @remarks 88 @* None 89 @* 90 @******************************************************************************* 91 @void ih264_intra_pred_chroma_8x8_mode_dc(UWORD8 *pu1_src, 92 @ UWORD8 *pu1_dst, 93 @ WORD32 src_strd, 94 @ WORD32 dst_strd, 95 @ WORD32 ui_neighboravailability) 96 97 @**************Variables Vs Registers***************************************** 98 @ r0 => *pu1_src 99 @ r1 => *pu1_dst 100 @ r2 => src_strd 101 @ r3 => dst_strd 102 @ r4 => ui_neighboravailability 103 104 .global ih264_intra_pred_chroma_8x8_mode_dc_a9q 105 106 ih264_intra_pred_chroma_8x8_mode_dc_a9q: 107 108 stmfd sp!, {r4, r14} @store register values to stack 109 ldr r4, [sp, #8] @r4 => ui_neighboravailability 110 vpush {d8-d15} 111 112 ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE 113 beq top_available 114 ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE 115 beq left_available 116 117 vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE 118 add r0, r0, #18 119 vld1.u8 {q1}, [r0] 120 vaddl.u8 q2, d1, d2 121 vaddl.u8 q3, d0, d3 122 vmovl.u8 q1, d3 123 vmovl.u8 q0, d0 124 125 vadd.u16 d12, d4, d5 126 vadd.u16 d13, d2, d3 127 vadd.u16 d15, d6, d7 128 vadd.u16 d14, d0, d1 129 130 vpadd.u32 d12, d12, d15 131 vpadd.u32 d14, d13, d14 132 vqrshrun.s16 d12, q6, #3 133 vqrshrun.s16 d14, q7, #2 134 vdup.u16 d8, d12[0] 135 vdup.u16 d9, d14[0] 136 vdup.u16 d10, d14[1] 137 vdup.u16 d11, d12[1] 138 b str_pred 139 140 top_available: @ONLY TOP AVAILABLE 141 ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE 142 beq none_available 143 144 add r0, r0, #18 145 vld1.u8 {q0}, [r0] 146 vmovl.u8 q1, d0 147 vmovl.u8 q2, d1 148 vadd.u16 d0, d2, d3 149 vadd.u16 d1, d4, d5 150 vpaddl.u32 q0, q0 151 vqrshrun.s16 d0, q0, #2 152 vdup.u16 d8, d0[0] 153 vdup.u16 d9, d0[2] 154 vmov q5, q4 155 b str_pred 156 157 left_available: @ONLY LEFT AVAILABLE 158 vld1.u8 {q0}, [r0] 159 vmovl.u8 q1, d0 160 vmovl.u8 q2, d1 161 vadd.u16 d0, d2, d3 162 vadd.u16 d1, d4, d5 163 vpaddl.u32 q0, q0 164 vqrshrun.s16 d0, q0, #2 165 vdup.u16 q5, d0[0] 166 vdup.u16 q4, d0[2] 167 b str_pred 168 169 none_available: @NONE AVAILABLE 170 vmov.u8 q4, #128 171 vmov.u8 q5, #128 172 173 str_pred: 174 vst1.8 {q4}, [r1], r3 175 vst1.8 {q4}, [r1], r3 176 vst1.8 {q4}, [r1], r3 177 vst1.8 {q4}, [r1], r3 178 vst1.8 {q5}, [r1], r3 179 vst1.8 {q5}, [r1], r3 180 vst1.8 {q5}, [r1], r3 181 vst1.8 {q5}, [r1], r3 182 183 vpop {d8-d15} 184 ldmfd sp!, {r4, pc} @Restoring registers from stack 185 186 187 188 @****************************************************************************** 189 190 191 @** 192 @******************************************************************************* 193 @* 194 @*ih264_intra_pred_chroma_8x8_mode_horz 195 @* 196 @* @brief 197 @* Perform Intra prediction for chroma_8x8 mode:Horizontal 198 @* 199 @* @par Description: 200 @* Perform Intra prediction for chroma_8x8 mode:Horizontal ,described in sec 8.3.4.2 201 @* 202 @* @param[in] pu1_src 203 @* UWORD8 pointer to the source containing alternate U and V samples 204 @* 205 @* @param[out] pu1_dst 206 @* UWORD8 pointer to the destination with alternate U and V samples 207 @* 208 @* @param[in] src_strd 209 @* integer source stride 210 @* 211 @* @param[in] dst_strd 212 @* integer destination stride 213 @* 214 @* @param[in] ui_neighboravailability 215 @* availability of neighbouring pixels(Not used in this function) 216 @* 217 @* @returns 218 @* 219 @* @remarks 220 @* None 221 @* 222 @******************************************************************************* 223 @* 224 @void ih264_intra_pred_chroma_8x8_mode_horz(UWORD8 *pu1_src, 225 @ UWORD8 *pu1_dst, 226 @ WORD32 src_strd, 227 @ WORD32 dst_strd, 228 @ WORD32 ui_neighboravailability) 229 @**************Variables Vs Registers***************************************** 230 @ r0 => *pu1_src 231 @ r1 => *pu1_dst 232 @ r2 => src_strd 233 @ r3 => dst_strd 234 @ r4 => ui_neighboravailability 235 236 237 .global ih264_intra_pred_chroma_8x8_mode_horz_a9q 238 239 ih264_intra_pred_chroma_8x8_mode_horz_a9q: 240 241 stmfd sp!, {r14} @store register values to stack 242 243 vld1.u8 {q0}, [r0] 244 mov r2, #6 245 246 vdup.u16 q1, d1[3] 247 vdup.u16 q2, d1[2] 248 vst1.8 {q1}, [r1], r3 249 250 loop_8x8_horz: 251 vext.8 q0, q0, q0, #12 252 vst1.8 {q2}, [r1], r3 253 vdup.u16 q1, d1[3] 254 subs r2, #2 255 vdup.u16 q2, d1[2] 256 vst1.8 {q1}, [r1], r3 257 bne loop_8x8_horz 258 259 vext.8 q0, q0, q0, #12 260 vst1.8 {q2}, [r1], r3 261 262 ldmfd sp!, {pc} @restoring registers from stack 263 264 265 266 267 @** 268 @******************************************************************************* 269 @* 270 @*ih264_intra_pred_chroma_8x8_mode_vert 271 @* 272 @* @brief 273 @* Perform Intra prediction for chroma_8x8 mode:vertical 274 @* 275 @* @par Description: 276 @*Perform Intra prediction for chroma_8x8 mode:vertical ,described in sec 8.3.4.3 277 @* 278 @* @param[in] pu1_src 279 @* UWORD8 pointer to the source containing alternate U and V samples 280 @* 281 @* @param[out] pu1_dst 282 @* UWORD8 pointer to the destination with alternate U and V samples 283 @* 284 @* @param[in] src_strd 285 @* integer source stride 286 @* 287 @* @param[in] dst_strd 288 @* integer destination stride 289 @* 290 @* @param[in] ui_neighboravailability 291 @* availability of neighbouring pixels(Not used in this function) 292 @* 293 @* @returns 294 @* 295 @* @remarks 296 @* None 297 @* 298 @******************************************************************************* 299 @void ih264_intra_pred_chroma_8x8_mode_vert(UWORD8 *pu1_src, 300 @ UWORD8 *pu1_dst, 301 @ WORD32 src_strd, 302 @ WORD32 dst_strd, 303 @ WORD32 ui_neighboravailability) 304 305 @**************Variables Vs Registers***************************************** 306 @ r0 => *pu1_src 307 @ r1 => *pu1_dst 308 @ r2 => src_strd 309 @ r3 => dst_strd 310 @ r4 => ui_neighboravailability 311 312 313 .global ih264_intra_pred_chroma_8x8_mode_vert_a9q 314 315 ih264_intra_pred_chroma_8x8_mode_vert_a9q: 316 317 stmfd sp!, {r4-r12, r14} @store register values to stack 318 319 add r0, r0, #18 320 vld1.8 {q0}, [r0] 321 322 vst1.8 {q0}, [r1], r3 323 vst1.8 {q0}, [r1], r3 324 vst1.8 {q0}, [r1], r3 325 vst1.8 {q0}, [r1], r3 326 vst1.8 {q0}, [r1], r3 327 vst1.8 {q0}, [r1], r3 328 vst1.8 {q0}, [r1], r3 329 vst1.8 {q0}, [r1], r3 330 331 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 332 333 334 335 336 @****************************************************************************** 337 338 339 @** 340 @******************************************************************************* 341 @* 342 @*ih264_intra_pred_chroma_8x8_mode_plane 343 @* 344 @* @brief 345 @* Perform Intra prediction for chroma_8x8 mode:PLANE 346 @* 347 @* @par Description: 348 @* Perform Intra prediction for chroma_8x8 mode:PLANE ,described in sec 8.3.4.4 349 @* 350 @* @param[in] pu1_src 351 @* UWORD8 pointer to the source containing alternate U and V samples 352 @* 353 @* @param[out] pu1_dst 354 @* UWORD8 pointer to the destination with alternate U and V samples 355 @* 356 @* @param[in] src_strd 357 @* integer source stride 358 @* 359 @* @param[in] dst_strd 360 @* integer destination stride 361 @* 362 @* @param[in] ui_neighboravailability 363 @* availability of neighbouring pixels 364 @* 365 @* @returns 366 @* 367 @* @remarks 368 @* None 369 @* 370 @******************************************************************************* 371 @void ih264_intra_pred_chroma_8x8_mode_plane(UWORD8 *pu1_src, 372 @ UWORD8 *pu1_dst, 373 @ WORD32 src_strd, 374 @ WORD32 dst_strd, 375 @ WORD32 ui_neighboravailability) 376 377 @**************Variables Vs Registers***************************************** 378 @ r0 => *pu1_src 379 @ r1 => *pu1_dst 380 @ r2 => src_strd 381 @ r3 => dst_strd 382 @ r4 => ui_neighboravailability 383 384 .global ih264_intra_pred_chroma_8x8_mode_plane_a9q 385 ih264_intra_pred_chroma_8x8_mode_plane_a9q: 386 387 stmfd sp!, {r4-r10, r12, lr} 388 vpush {d8-d15} 389 390 vld1.32 d0, [r0] 391 add r10, r0, #10 392 vld1.32 d1, [r10] 393 add r10, r10, #6 394 vrev64.16 d5, d0 395 vld1.32 d2, [r10]! 396 add r10, r10, #2 397 vrev64.16 d7, d2 398 vld1.32 d3, [r10] 399 sub r5, r3, #8 400 ldr r12, scratch_chroma_intrapred_addr1 401 scrlblc1: 402 add r12, r12, pc 403 vsubl.u8 q5, d5, d1 404 vld1.64 {q4}, [r12] @ Load multiplication factors 1 to 8 into D3 405 vsubl.u8 q6, d3, d7 406 vmul.s16 q7, q5, q4 407 vmul.s16 q8, q6, q4 408 vuzp.16 q7, q8 409 410 vpadd.s16 d14, d14 411 vpadd.s16 d15, d15 412 vpadd.s16 d16, d16 413 vpadd.s16 d17, d17 414 vpadd.s16 d14, d14 415 vpadd.s16 d15, d15 416 vpadd.s16 d16, d16 417 vpadd.s16 d17, d17 418 419 mov r6, #34 420 vdup.16 q9, r6 421 422 vmull.s16 q11, d14, d18 423 vmull.s16 q12, d15, d18 424 vmull.s16 q13, d16, d18 425 vmull.s16 q14, d17, d18 426 427 vrshrn.s32 d10, q11, #6 428 vrshrn.s32 d12, q12, #6 429 vrshrn.s32 d13, q13, #6 430 vrshrn.s32 d14, q14, #6 431 432 433 ldrb r6, [r0], #1 434 add r10, r0, #31 435 ldrb r8, [r0], #1 436 ldrb r7, [r10], #1 437 ldrb r9, [r10], #1 438 439 add r6, r6, r7 440 add r8, r8, r9 441 lsl r6, r6, #4 442 lsl r8, r8, #4 443 444 vdup.16 q0, r6 445 vdup.16 q1, r8 446 vdup.16 q2, d12[0] 447 vdup.16 q3, d10[0] 448 449 vdup.16 q12, d14[0] 450 vdup.16 q13, d13[0] 451 vzip.16 q2, q12 452 vzip.16 q3, q13 453 vzip.16 q0, q1 454 455 ldr r12, scratch_intrapred_chroma_plane_addr1 456 scrlblc2: 457 add r12, r12, pc 458 vld1.64 {q4}, [r12] 459 vmov.16 q5, q4 460 vmov q11, q4 461 vzip.16 q4, q5 462 463 vmul.s16 q6, q2, q4 464 vmul.s16 q8, q2, q5 465 vadd.s16 q6, q0, q6 466 vadd.s16 q8, q0, q8 467 468 469 vdup.16 q10, d22[0] 470 vmul.s16 q2, q3, q10 471 vdup.16 q15, d22[1] 472 vmul.s16 q9, q3, q10 473 vmul.s16 q7, q3, q15 474 vmul.s16 q4, q3, q15 475 vadd.s16 q12, q6, q2 476 vadd.s16 q0, q8, q9 477 vadd.s16 q1, q6, q7 478 vqrshrun.s16 d28, q12, #5 479 vadd.s16 q13, q8, q4 480 vqrshrun.s16 d29, q0, #5 481 vdup.16 q10, d22[2] 482 vst1.8 {q14}, [r1], r3 483 vqrshrun.s16 d28, q1, #5 484 vqrshrun.s16 d29, q13, #5 485 vmul.s16 q2, q3, q10 486 vmul.s16 q9, q3, q10 487 vst1.8 {q14}, [r1], r3 488 vadd.s16 q12, q6, q2 489 vadd.s16 q0, q8, q9 490 vdup.16 q15, d22[3] 491 vqrshrun.s16 d28, q12, #5 492 vqrshrun.s16 d29, q0, #5 493 vmul.s16 q7, q3, q15 494 vmul.s16 q4, q3, q15 495 vst1.8 {q14}, [r1], r3 496 vadd.s16 q1, q6, q7 497 vadd.s16 q13, q8, q4 498 vdup.16 q10, d23[0] 499 vqrshrun.s16 d28, q1, #5 500 vqrshrun.s16 d29, q13, #5 501 vmul.s16 q2, q3, q10 502 vmul.s16 q9, q3, q10 503 vst1.8 {q14}, [r1], r3 504 vadd.s16 q12, q6, q2 505 vadd.s16 q0, q8, q9 506 vdup.16 q15, d23[1] 507 vqrshrun.s16 d28, q12, #5 508 vqrshrun.s16 d29, q0, #5 509 vmul.s16 q7, q3, q15 510 vmul.s16 q4, q3, q15 511 vst1.8 {q14}, [r1], r3 512 vadd.s16 q1, q6, q7 513 vadd.s16 q13, q8, q4 514 vdup.16 q10, d23[2] 515 vqrshrun.s16 d28, q1, #5 516 vqrshrun.s16 d29, q13, #5 517 vmul.s16 q2, q3, q10 518 vmul.s16 q9, q3, q10 519 vst1.8 {q14}, [r1], r3 520 vadd.s16 q12, q6, q2 521 vadd.s16 q0, q8, q9 522 vdup.16 q15, d23[3] 523 vqrshrun.s16 d28, q12, #5 524 vqrshrun.s16 d29, q0, #5 525 vmul.s16 q7, q3, q15 526 vmul.s16 q4, q3, q15 527 vst1.8 {q14}, [r1], r3 528 vadd.s16 q1, q6, q7 529 vadd.s16 q13, q8, q4 530 vqrshrun.s16 d28, q1, #5 531 vqrshrun.s16 d29, q13, #5 532 vst1.8 {q14}, [r1], r3 533 534 535 536 end_func_plane: 537 538 vpop {d8-d15} 539 ldmfd sp!, {r4-r10, r12, pc} 540 541 542 543 544