1 @/****************************************************************************** 2 @ * 3 @ * Copyright (C) 2015 The Android Open Source Project 4 @ * 5 @ * Licensed under the Apache License, Version 2.0 (the "License"); 6 @ * you may not use this file except in compliance with the License. 7 @ * You may obtain a copy of the License at: 8 @ * 9 @ * http://www.apache.org/licenses/LICENSE-2.0 10 @ * 11 @ * Unless required by applicable law or agreed to in writing, software 12 @ * distributed under the License is distributed on an "AS IS" BASIS, 13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @ * See the License for the specific language governing permissions and 15 @ * limitations under the License. 16 @ * 17 @ ***************************************************************************** 18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 @*/ 20 @** 21 @****************************************************************************** 22 @* @file 23 @* ih264_intra_pred_luma_16x16_a9q.s 24 @* 25 @* @brief 26 @* Contains function definitions for intra 16x16 Luma prediction . 27 @* 28 @* @author 29 @* Ittiam 30 @* 31 @* @par List of Functions: 32 @* 33 @* - ih264_intra_pred_luma_16x16_mode_vert_a9q() 34 @* - ih264_intra_pred_luma_16x16_mode_horz_a9q() 35 @* - ih264_intra_pred_luma_16x16_mode_dc_a9q() 36 @* - ih264_intra_pred_luma_16x16_mode_plane_a9q() 37 @* 38 @* @remarks 39 @* None 40 @* 41 @******************************************************************************* 42 @* 43 44 @* All the functions here are replicated from ih264_intra_pred_filters.c 45 @ 46 47 @** 48 @** 49 @** 50 @ 51 52 .text 53 .p2align 2 54 55 56 .extern ih264_gai1_intrapred_luma_plane_coeffs 57 .hidden ih264_gai1_intrapred_luma_plane_coeffs 58 scratch_intrapred_addr1: 59 .long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8 60 @** 61 @******************************************************************************* 62 @* 63 @*ih264_intra_pred_luma_16x16_mode_vert 64 @* 65 @* @brief 66 @* Perform Intra prediction for luma_16x16 mode:vertical 67 @* 68 @* @par Description: 69 @* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1 70 @* 71 @* @param[in] pu1_src 72 @* UWORD8 pointer to the source 73 @* 74 @* @param[out] pu1_dst 75 @* UWORD8 pointer to the destination 76 @* 77 @* @param[in] src_strd 78 @* integer source stride 79 @* 80 @* @param[in] dst_strd 81 @* integer destination stride 82 @* 83 @* @param[in] ui_neighboravailability 84 @* availability of neighbouring pixels(Not used in this function) 85 @* 86 @* @returns 87 @* 88 @* @remarks 89 @* None 90 @* 91 @******************************************************************************* 92 @void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src, 93 @ UWORD8 *pu1_dst, 94 @ WORD32 src_strd, 95 @ WORD32 dst_strd, 96 @ WORD32 ui_neighboravailability) 97 98 @**************Variables Vs Registers***************************************** 99 @ r0 => *pu1_src 100 @ r1 => *pu1_dst 101 @ r2 => src_strd 102 @ r3 => dst_strd 103 @ r4 => ui_neighboravailability 104 105 106 .global ih264_intra_pred_luma_16x16_mode_vert_a9q 107 108 ih264_intra_pred_luma_16x16_mode_vert_a9q: 109 110 stmfd sp!, {r4-r12, r14} @store register values to stack 111 112 add r0, r0, #17 113 vld1.8 {q0}, [r0] 114 115 vst1.8 {q0}, [r1], r3 116 vst1.8 {q0}, [r1], r3 117 vst1.8 {q0}, [r1], r3 118 vst1.8 {q0}, [r1], r3 119 vst1.8 {q0}, [r1], r3 120 vst1.8 {q0}, [r1], r3 121 vst1.8 {q0}, [r1], r3 122 vst1.8 {q0}, [r1], r3 123 vst1.8 {q0}, [r1], r3 124 vst1.8 {q0}, [r1], r3 125 vst1.8 {q0}, [r1], r3 126 vst1.8 {q0}, [r1], r3 127 vst1.8 {q0}, [r1], r3 128 vst1.8 {q0}, [r1], r3 129 vst1.8 {q0}, [r1], r3 130 vst1.8 {q0}, [r1], r3 131 132 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 133 134 135 136 137 138 @****************************************************************************** 139 140 141 @** 142 @******************************************************************************* 143 @* 144 @*ih264_intra_pred_luma_16x16_mode_horz 145 @* 146 @* @brief 147 @* Perform Intra prediction for luma_16x16 mode:horizontal 148 @* 149 @* @par Description: 150 @* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2 151 @* 152 @* @param[in] pu1_src 153 @* UWORD8 pointer to the source 154 @* 155 @* @param[out] pu1_dst 156 @* UWORD8 pointer to the destination 157 @* 158 @* @param[in] src_strd 159 @* integer source stride 160 @* 161 @* @param[in] dst_strd 162 @* integer destination stride 163 @* 164 @* @param[in] ui_neighboravailability 165 @* availability of neighbouring pixels(Not used in this function) 166 @* 167 @* @returns 168 @* 169 @* @remarks 170 @* None 171 @* 172 @******************************************************************************* 173 @* 174 @void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src, 175 @ UWORD8 *pu1_dst, 176 @ WORD32 src_strd, 177 @ WORD32 dst_strd, 178 @ WORD32 ui_neighboravailability) 179 @**************Variables Vs Registers***************************************** 180 @ r0 => *pu1_src 181 @ r1 => *pu1_dst 182 @ r2 => src_strd 183 @ r3 => dst_strd 184 @ r4 => ui_neighboravailability 185 186 .global ih264_intra_pred_luma_16x16_mode_horz_a9q 187 188 ih264_intra_pred_luma_16x16_mode_horz_a9q: 189 190 stmfd sp!, {r14} @store register values to stack 191 192 vld1.u8 {q0}, [r0] 193 mov r2, #14 194 195 vdup.u8 q1, d1[7] 196 vdup.u8 q2, d1[6] 197 vst1.8 {q1}, [r1], r3 198 199 loop_16x16_horz: 200 vext.8 q0, q0, q0, #14 201 vst1.8 {q2}, [r1], r3 202 vdup.u8 q1, d1[7] 203 subs r2, #2 204 vdup.u8 q2, d1[6] 205 vst1.8 {q1}, [r1], r3 206 bne loop_16x16_horz 207 208 vext.8 q0, q0, q0, #14 209 vst1.8 {q2}, [r1], r3 210 211 ldmfd sp!, {pc} @Restoring registers from stack 212 213 214 215 216 @****************************************************************************** 217 218 219 @** 220 @******************************************************************************* 221 @* 222 @*ih264_intra_pred_luma_16x16_mode_dc 223 @* 224 @* @brief 225 @* Perform Intra prediction for luma_16x16 mode:DC 226 @* 227 @* @par Description: 228 @* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3 229 @* 230 @* @param[in] pu1_src 231 @* UWORD8 pointer to the source 232 @* 233 @* @param[out] pu1_dst 234 @* UWORD8 pointer to the destination 235 @* 236 @* @param[in] src_strd 237 @* integer source stride 238 @* 239 @* @param[in] dst_strd 240 @* integer destination stride 241 @* 242 @* @param[in] ui_neighboravailability 243 @* availability of neighbouring pixels 244 @* 245 @* @returns 246 @* 247 @* @remarks 248 @* None 249 @* 250 @******************************************************************************* 251 @void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src, 252 @ UWORD8 *pu1_dst, 253 @ WORD32 src_strd, 254 @ WORD32 dst_strd, 255 @ WORD32 ui_neighboravailability) 256 257 @**************Variables Vs Registers***************************************** 258 @ r0 => *pu1_src 259 @ r1 => *pu1_dst 260 @ r2 => src_strd 261 @ r3 => dst_strd 262 @ r4 => ui_neighboravailability 263 264 .global ih264_intra_pred_luma_16x16_mode_dc_a9q 265 266 ih264_intra_pred_luma_16x16_mode_dc_a9q: 267 268 stmfd sp!, {r4, r14} @store register values to stack 269 ldr r4, [sp, #8] @r4 => ui_neighboravailability 270 271 ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE 272 beq top_available 273 ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE 274 beq left_available 275 276 vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE 277 add r0, r0, #17 278 vpaddl.u8 q0, q0 279 vld1.u8 {q1}, [r0] 280 vpaddl.u8 q1, q1 281 vadd.u16 q0, q0, q1 282 vadd.u16 d0, d0, d1 283 vpaddl.u16 d0, d0 284 vpaddl.u32 d0, d0 285 vqrshrun.s16 d0, q0, #5 286 vdup.u8 q0, d0[0] 287 b str_pred 288 289 top_available: @ONLY TOP AVAILABLE 290 ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE 291 beq none_available 292 293 add r0, r0, #17 294 vld1.u8 {q0}, [r0] 295 vpaddl.u8 q0, q0 296 vadd.u16 d0, d0, d1 297 vpaddl.u16 d0, d0 298 vpaddl.u32 d0, d0 299 vqrshrun.s16 d0, q0, #4 300 vdup.u8 q0, d0[0] 301 b str_pred 302 303 left_available: @ONLY LEFT AVAILABLE 304 vld1.u8 {q0}, [r0] 305 vpaddl.u8 q0, q0 306 vadd.u16 d0, d0, d1 307 vpaddl.u16 d0, d0 308 vpaddl.u32 d0, d0 309 vqrshrun.s16 d0, q0, #4 310 vdup.u8 q0, d0[0] 311 b str_pred 312 313 none_available: @NONE AVAILABLE 314 vmov.u8 q0, #128 315 316 str_pred: 317 vst1.8 {q0}, [r1], r3 318 vst1.8 {q0}, [r1], r3 319 vst1.8 {q0}, [r1], r3 320 vst1.8 {q0}, [r1], r3 321 vst1.8 {q0}, [r1], r3 322 vst1.8 {q0}, [r1], r3 323 vst1.8 {q0}, [r1], r3 324 vst1.8 {q0}, [r1], r3 325 vst1.8 {q0}, [r1], r3 326 vst1.8 {q0}, [r1], r3 327 vst1.8 {q0}, [r1], r3 328 vst1.8 {q0}, [r1], r3 329 vst1.8 {q0}, [r1], r3 330 vst1.8 {q0}, [r1], r3 331 vst1.8 {q0}, [r1], r3 332 vst1.8 {q0}, [r1], r3 333 334 ldmfd sp!, {r4, pc} @Restoring registers from stack 335 336 337 338 339 340 @****************************************************************************** 341 342 343 @** 344 @******************************************************************************* 345 @* 346 @*ih264_intra_pred_luma_16x16_mode_plane 347 @* 348 @* @brief 349 @* Perform Intra prediction for luma_16x16 mode:PLANE 350 @* 351 @* @par Description: 352 @* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4 353 @* 354 @* @param[in] pu1_src 355 @* UWORD8 pointer to the source 356 @* 357 @* @param[out] pu1_dst 358 @* UWORD8 pointer to the destination 359 @* 360 @* @param[in] src_strd 361 @* integer source stride 362 @* 363 @* @param[in] dst_strd 364 @* integer destination stride 365 @* 366 @* @param[in] ui_neighboravailability 367 @* availability of neighbouring pixels 368 @* 369 @* @returns 370 @* 371 @* @remarks 372 @* None 373 @* 374 @******************************************************************************* 375 @void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src, 376 @ UWORD8 *pu1_dst, 377 @ WORD32 src_strd, 378 @ WORD32 dst_strd, 379 @ WORD32 ui_neighboravailability) 380 381 @**************Variables Vs Registers***************************************** 382 @ r0 => *pu1_src 383 @ r1 => *pu1_dst 384 @ r2 => src_strd 385 @ r3 => dst_strd 386 @ r4 => ui_neighboravailability 387 388 .global ih264_intra_pred_luma_16x16_mode_plane_a9q 389 ih264_intra_pred_luma_16x16_mode_plane_a9q: 390 391 stmfd sp!, {r4-r10, r12, lr} 392 393 mov r2, r1 394 add r1, r0, #17 395 add r0, r0, #15 396 397 mov r8, #9 398 sub r1, r1, #1 399 mov r10, r1 @top_left 400 mov r4, #-1 401 vld1.32 d2, [r1], r8 402 ldr r7, scratch_intrapred_addr1 403 scrlbl1: 404 add r7, r7, pc 405 406 vld1.32 d0, [r1] 407 vrev64.8 d2, d2 408 vld1.32 {q3}, [r7] 409 vsubl.u8 q0, d0, d2 410 vmovl.u8 q8, d6 411 vmul.s16 q0, q0, q8 412 vmovl.u8 q9, d7 413 414 add r7, r0, r4, lsl #3 415 sub r0, r7, r4, lsl #1 416 neg lr, r4 417 418 vpadd.s16 d0, d0, d1 419 420 ldrb r8, [r7], r4 421 ldrb r9, [r0], lr 422 423 vpaddl.s16 d0, d0 424 sub r12, r8, r9 425 426 ldrb r8, [r7], r4 427 428 vpaddl.s32 d0, d0 429 ldrb r9, [r0], lr 430 sub r8, r8, r9 431 vshl.s32 d2, d0, #2 432 add r12, r12, r8, lsl #1 433 434 vadd.s32 d0, d0, d2 435 ldrb r8, [r7], r4 436 ldrb r9, [r0], lr 437 vrshr.s32 d0, d0, #6 @ i_b = D0[0] 438 sub r8, r8, r9 439 ldrb r5, [r7], r4 440 add r8, r8, r8, lsl #1 441 442 vdup.16 q2, d0[0] 443 add r12, r12, r8 444 ldrb r9, [r0], lr 445 vmul.s16 q0, q2, q8 446 sub r5, r5, r9 447 vmul.s16 q1, q2, q9 448 add r12, r12, r5, lsl #2 449 450 ldrb r8, [r7], r4 451 ldrb r9, [r0], lr 452 sub r8, r8, r9 453 ldrb r5, [r7], r4 454 add r8, r8, r8, lsl #2 455 ldrb r6, [r0], lr 456 add r12, r12, r8 457 ldrb r8, [r7], r4 458 ldrb r9, [r0], lr 459 460 sub r5, r5, r6 461 sub r8, r8, r9 462 add r5, r5, r5, lsl #1 463 rsb r8, r8, r8, lsl #3 464 add r12, r12, r5, lsl #1 465 ldrb r5, [r7], r4 466 ldrb r6, [r10] @top_left 467 add r12, r12, r8 468 sub r9, r5, r6 469 ldrb r6, [r1, #7] 470 add r12, r12, r9, lsl #3 @ i_c = r12 471 add r8, r5, r6 472 473 add r12, r12, r12, lsl #2 474 lsl r8, r8, #4 @ i_a = r8 475 476 add r12, r12, #0x20 477 lsr r12, r12, #6 478 479 vshl.s16 q14, q2, #3 480 vdup.16 q3, r12 481 482 vdup.16 q15, r8 483 vshl.s16 q13, q3, #3 484 vsub.s16 q15, q15, q14 485 vsub.s16 q15, q15, q13 486 vadd.s16 q14, q15, q3 487 488 mov r0, #14 489 vadd.s16 q13, q14, q0 490 vadd.s16 q14, q14, q1 491 vqrshrun.s16 d20, q13, #5 492 vqrshrun.s16 d21, q14, #5 493 494 loop_16x16_plane: 495 496 vadd.s16 q13, q13, q3 497 vadd.s16 q14, q14, q3 498 vqrshrun.s16 d22, q13, #5 499 vst1.32 {q10}, [r2], r3 500 vqrshrun.s16 d23, q14, #5 501 502 vadd.s16 q13, q13, q3 503 subs r0, #2 504 vadd.s16 q14, q14, q3 505 vqrshrun.s16 d20, q13, #5 506 vst1.32 {q11}, [r2], r3 507 vqrshrun.s16 d21, q14, #5 508 bne loop_16x16_plane 509 510 vadd.s16 q13, q13, q3 511 vadd.s16 q14, q14, q3 512 vqrshrun.s16 d22, q13, #5 513 vst1.32 {q10}, [r2], r3 514 vqrshrun.s16 d23, q14, #5 515 vst1.32 {q11}, [r2], r3 516 517 ldmfd sp!, {r4-r10, r12, pc} 518 519 520 521