1 ; 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license 5 ; that can be found in the LICENSE file in the root of the source 6 ; tree. An additional intellectual property rights grant can be found 7 ; in the file PATENTS. All contributing project authors may 8 ; be found in the AUTHORS file in the root of the source tree. 9 ; 10 11 12 EXPORT |vp8_yv12_extend_frame_borders_neon| 13 ARM 14 REQUIRE8 15 PRESERVE8 16 17 INCLUDE asm_com_offsets.asm 18 19 AREA ||.text||, CODE, READONLY, ALIGN=2 20 ;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); 21 ;Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_height 22 ; are always multiples of 16. 23 24 |vp8_yv12_extend_frame_borders_neon| PROC 25 push {r4 - r10, lr} 26 vpush {d8 - d15} 27 28 ;Not need to load y_width, since: y_width = y_stride - 2*border 29 ldr r3, [r0, #yv12_buffer_config_border] 30 ldr r1, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 31 ldr r4, [r0, #yv12_buffer_config_y_height] 32 ldr lr, [r0, #yv12_buffer_config_y_stride] 33 34 cmp r3, #16 35 beq b16_extend_frame_borders 36 37 ;======================= 38 b32_extend_frame_borders 39 ;border = 32 40 ;======================= 41 ;Border copy for Y plane 42 ;copy the left and right most columns out 43 sub r5, r1, r3 ;destptr1 44 add r6, r1, lr 45 sub r6, r6, r3, lsl #1 ;destptr2 46 sub r2, r6, #1 ;srcptr2 47 48 ;Do four rows at one time 49 mov r12, r4, lsr #2 50 51 copy_left_right_y 52 vld1.8 {d0[], d1[]}, [r1], lr 53 vld1.8 {d4[], d5[]}, [r2], lr 54 vld1.8 {d8[], d9[]}, [r1], lr 55 vld1.8 {d12[], d13[]}, [r2], lr 56 vld1.8 {d16[], d17[]}, [r1], lr 57 vld1.8 {d20[], d21[]}, [r2], lr 58 vld1.8 {d24[], d25[]}, [r1], lr 59 vld1.8 {d28[], d29[]}, [r2], lr 60 61 vmov q1, q0 62 vmov q3, q2 63 vmov q5, q4 64 vmov q7, q6 65 vmov q9, q8 66 vmov q11, q10 67 vmov q13, q12 68 vmov q15, q14 69 70 subs r12, r12, #1 71 72 vst1.8 {q0, q1}, [r5], lr 73 vst1.8 {q2, q3}, [r6], lr 74 vst1.8 {q4, q5}, [r5], lr 75 vst1.8 {q6, q7}, [r6], lr 76 vst1.8 {q8, q9}, [r5], lr 77 vst1.8 {q10, q11}, [r6], lr 78 vst1.8 {q12, q13}, [r5], lr 79 vst1.8 {q14, q15}, [r6], lr 80 81 bne copy_left_right_y 82 83 ;Now copy the top and bottom source lines into each line of the respective borders 84 ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 85 mul r8, r3, lr 86 87 mov r12, lr, lsr #7 88 89 sub r6, r1, r3 ;destptr2 90 sub r2, r6, lr ;srcptr2 91 sub r1, r7, r3 ;srcptr1 92 sub r5, r1, r8 ;destptr1 93 94 copy_top_bottom_y 95 vld1.8 {q0, q1}, [r1]! 96 vld1.8 {q8, q9}, [r2]! 97 vld1.8 {q2, q3}, [r1]! 98 vld1.8 {q10, q11}, [r2]! 99 vld1.8 {q4, q5}, [r1]! 100 vld1.8 {q12, q13}, [r2]! 101 vld1.8 {q6, q7}, [r1]! 102 vld1.8 {q14, q15}, [r2]! 103 104 mov r7, r3 105 106 top_bottom_32 107 subs r7, r7, #1 108 109 vst1.8 {q0, q1}, [r5]! 110 vst1.8 {q8, q9}, [r6]! 111 vst1.8 {q2, q3}, [r5]! 112 vst1.8 {q10, q11}, [r6]! 113 vst1.8 {q4, q5}, [r5]! 114 vst1.8 {q12, q13}, [r6]! 115 vst1.8 {q6, q7}, [r5]! 116 vst1.8 {q14, q15}, [r6]! 117 118 add r5, r5, lr 119 sub r5, r5, #128 120 add r6, r6, lr 121 sub r6, r6, #128 122 123 bne top_bottom_32 124 125 sub r5, r1, r8 126 add r6, r2, lr 127 128 subs r12, r12, #1 129 bne copy_top_bottom_y 130 131 mov r7, lr, lsr #4 ;check to see if extra copy is needed 132 ands r7, r7, #0x7 133 bne extra_top_bottom_y 134 end_of_border_copy_y 135 136 ;Border copy for U, V planes 137 ldr r1, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 138 mov lr, lr, lsr #1 ;uv_stride 139 mov r3, r3, lsr #1 ;border 140 mov r4, r4, lsr #1 ;uv_height 141 mov r8, r8, lsr #2 142 143 mov r10, #2 144 145 ;copy the left and right most columns out 146 border_copy_uv 147 sub r5, r1, r3 ;destptr1 148 add r6, r1, lr 149 sub r6, r6, r3, lsl #1 ;destptr2 150 sub r2, r6, #1 ;srcptr2 151 152 mov r7, r1 153 154 ;Do eight rows at one time 155 mov r12, r4, lsr #3 156 157 copy_left_right_uv 158 vld1.8 {d0[], d1[]}, [r1], lr 159 vld1.8 {d2[], d3[]}, [r2], lr 160 vld1.8 {d4[], d5[]}, [r1], lr 161 vld1.8 {d6[], d7[]}, [r2], lr 162 vld1.8 {d8[], d9[]}, [r1], lr 163 vld1.8 {d10[], d11[]}, [r2], lr 164 vld1.8 {d12[], d13[]}, [r1], lr 165 vld1.8 {d14[], d15[]}, [r2], lr 166 vld1.8 {d16[], d17[]}, [r1], lr 167 vld1.8 {d18[], d19[]}, [r2], lr 168 vld1.8 {d20[], d21[]}, [r1], lr 169 vld1.8 {d22[], d23[]}, [r2], lr 170 vld1.8 {d24[], d25[]}, [r1], lr 171 vld1.8 {d26[], d27[]}, [r2], lr 172 vld1.8 {d28[], d29[]}, [r1], lr 173 vld1.8 {d30[], d31[]}, [r2], lr 174 175 subs r12, r12, #1 176 177 vst1.8 {q0}, [r5], lr 178 vst1.8 {q1}, [r6], lr 179 vst1.8 {q2}, [r5], lr 180 vst1.8 {q3}, [r6], lr 181 vst1.8 {q4}, [r5], lr 182 vst1.8 {q5}, [r6], lr 183 vst1.8 {q6}, [r5], lr 184 vst1.8 {q7}, [r6], lr 185 vst1.8 {q8}, [r5], lr 186 vst1.8 {q9}, [r6], lr 187 vst1.8 {q10}, [r5], lr 188 vst1.8 {q11}, [r6], lr 189 vst1.8 {q12}, [r5], lr 190 vst1.8 {q13}, [r6], lr 191 vst1.8 {q14}, [r5], lr 192 vst1.8 {q15}, [r6], lr 193 194 bne copy_left_right_uv 195 196 ;Now copy the top and bottom source lines into each line of the respective borders 197 mov r12, lr, lsr #6 198 199 sub r6, r1, r3 ;destptr2 200 sub r2, r6, lr ;srcptr2 201 sub r1, r7, r3 ;srcptr1 202 sub r5, r1, r8 ;destptr1 203 204 copy_top_bottom_uv 205 vld1.8 {q0, q1}, [r1]! 206 vld1.8 {q8, q9}, [r2]! 207 vld1.8 {q2, q3}, [r1]! 208 vld1.8 {q10, q11}, [r2]! 209 210 mov r7, r3 211 212 top_bottom_16 213 subs r7, r7, #1 214 215 vst1.8 {q0, q1}, [r5]! 216 vst1.8 {q8, q9}, [r6]! 217 vst1.8 {q2, q3}, [r5]! 218 vst1.8 {q10, q11}, [r6]! 219 220 add r5, r5, lr 221 sub r5, r5, #64 222 add r6, r6, lr 223 sub r6, r6, #64 224 225 bne top_bottom_16 226 227 sub r5, r1, r8 228 add r6, r2, lr 229 230 subs r12, r12, #1 231 bne copy_top_bottom_uv 232 233 mov r7, lr, lsr #3 ;check to see if extra copy is needed 234 ands r7, r7, #0x7 235 bne extra_top_bottom_uv 236 237 end_of_border_copy_uv 238 subs r10, r10, #1 239 ldrne r1, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 240 bne border_copy_uv 241 242 vpop {d8 - d15} 243 pop {r4 - r10, pc} 244 245 ;;;;;;;;;;;;;;;;;;;;;; 246 ;extra copy part for Y 247 extra_top_bottom_y 248 vld1.8 {q0}, [r1]! 249 vld1.8 {q2}, [r2]! 250 251 mov r9, r3, lsr #3 252 253 extra_top_bottom_32 254 subs r9, r9, #1 255 256 vst1.8 {q0}, [r5], lr 257 vst1.8 {q2}, [r6], lr 258 vst1.8 {q0}, [r5], lr 259 vst1.8 {q2}, [r6], lr 260 vst1.8 {q0}, [r5], lr 261 vst1.8 {q2}, [r6], lr 262 vst1.8 {q0}, [r5], lr 263 vst1.8 {q2}, [r6], lr 264 vst1.8 {q0}, [r5], lr 265 vst1.8 {q2}, [r6], lr 266 vst1.8 {q0}, [r5], lr 267 vst1.8 {q2}, [r6], lr 268 vst1.8 {q0}, [r5], lr 269 vst1.8 {q2}, [r6], lr 270 vst1.8 {q0}, [r5], lr 271 vst1.8 {q2}, [r6], lr 272 bne extra_top_bottom_32 273 274 sub r5, r1, r8 275 add r6, r2, lr 276 subs r7, r7, #1 277 bne extra_top_bottom_y 278 279 b end_of_border_copy_y 280 281 ;extra copy part for UV 282 extra_top_bottom_uv 283 vld1.8 {d0}, [r1]! 284 vld1.8 {d8}, [r2]! 285 286 mov r9, r3, lsr #3 287 288 extra_top_bottom_16 289 subs r9, r9, #1 290 291 vst1.8 {d0}, [r5], lr 292 vst1.8 {d8}, [r6], lr 293 vst1.8 {d0}, [r5], lr 294 vst1.8 {d8}, [r6], lr 295 vst1.8 {d0}, [r5], lr 296 vst1.8 {d8}, [r6], lr 297 vst1.8 {d0}, [r5], lr 298 vst1.8 {d8}, [r6], lr 299 vst1.8 {d0}, [r5], lr 300 vst1.8 {d8}, [r6], lr 301 vst1.8 {d0}, [r5], lr 302 vst1.8 {d8}, [r6], lr 303 vst1.8 {d0}, [r5], lr 304 vst1.8 {d8}, [r6], lr 305 vst1.8 {d0}, [r5], lr 306 vst1.8 {d8}, [r6], lr 307 bne extra_top_bottom_16 308 309 sub r5, r1, r8 310 add r6, r2, lr 311 subs r7, r7, #1 312 bne extra_top_bottom_uv 313 314 b end_of_border_copy_uv 315 316 317 ;======================= 318 b16_extend_frame_borders 319 ;border = 16 320 ;======================= 321 ;Border copy for Y plane 322 ;copy the left and right most columns out 323 sub r5, r1, r3 ;destptr1 324 add r6, r1, lr 325 sub r6, r6, r3, lsl #1 ;destptr2 326 sub r2, r6, #1 ;srcptr2 327 328 ;Do four rows at one time 329 mov r12, r4, lsr #2 330 331 copy_left_right_y_b16 332 vld1.8 {d0[], d1[]}, [r1], lr 333 vld1.8 {d4[], d5[]}, [r2], lr 334 vld1.8 {d8[], d9[]}, [r1], lr 335 vld1.8 {d12[], d13[]}, [r2], lr 336 vld1.8 {d16[], d17[]}, [r1], lr 337 vld1.8 {d20[], d21[]}, [r2], lr 338 vld1.8 {d24[], d25[]}, [r1], lr 339 vld1.8 {d28[], d29[]}, [r2], lr 340 341 subs r12, r12, #1 342 343 vst1.8 {q0}, [r5], lr 344 vst1.8 {q2}, [r6], lr 345 vst1.8 {q4}, [r5], lr 346 vst1.8 {q6}, [r6], lr 347 vst1.8 {q8}, [r5], lr 348 vst1.8 {q10}, [r6], lr 349 vst1.8 {q12}, [r5], lr 350 vst1.8 {q14}, [r6], lr 351 352 bne copy_left_right_y_b16 353 354 ;Now copy the top and bottom source lines into each line of the respective borders 355 ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 356 mul r8, r3, lr 357 358 mov r12, lr, lsr #7 359 360 sub r6, r1, r3 ;destptr2 361 sub r2, r6, lr ;srcptr2 362 sub r1, r7, r3 ;srcptr1 363 sub r5, r1, r8 ;destptr1 364 365 copy_top_bottom_y_b16 366 vld1.8 {q0, q1}, [r1]! 367 vld1.8 {q8, q9}, [r2]! 368 vld1.8 {q2, q3}, [r1]! 369 vld1.8 {q10, q11}, [r2]! 370 vld1.8 {q4, q5}, [r1]! 371 vld1.8 {q12, q13}, [r2]! 372 vld1.8 {q6, q7}, [r1]! 373 vld1.8 {q14, q15}, [r2]! 374 375 mov r7, r3 376 377 top_bottom_16_b16 378 subs r7, r7, #1 379 380 vst1.8 {q0, q1}, [r5]! 381 vst1.8 {q8, q9}, [r6]! 382 vst1.8 {q2, q3}, [r5]! 383 vst1.8 {q10, q11}, [r6]! 384 vst1.8 {q4, q5}, [r5]! 385 vst1.8 {q12, q13}, [r6]! 386 vst1.8 {q6, q7}, [r5]! 387 vst1.8 {q14, q15}, [r6]! 388 389 add r5, r5, lr 390 sub r5, r5, #128 391 add r6, r6, lr 392 sub r6, r6, #128 393 394 bne top_bottom_16_b16 395 396 sub r5, r1, r8 397 add r6, r2, lr 398 399 subs r12, r12, #1 400 bne copy_top_bottom_y_b16 401 402 mov r7, lr, lsr #4 ;check to see if extra copy is needed 403 ands r7, r7, #0x7 404 bne extra_top_bottom_y_b16 405 end_of_border_copy_y_b16 406 407 ;Border copy for U, V planes 408 ldr r1, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 409 mov lr, lr, lsr #1 ;uv_stride 410 mov r3, r3, lsr #1 ;border 411 mov r4, r4, lsr #1 ;uv_height 412 mov r8, r8, lsr #2 413 414 mov r10, #2 415 416 ;copy the left and right most columns out 417 border_copy_uv_b16 418 sub r5, r1, r3 ;destptr1 419 add r6, r1, lr 420 sub r6, r6, r3, lsl #1 ;destptr2 421 sub r2, r6, #1 ;srcptr2 422 423 mov r7, r1 424 425 ;Do eight rows at one time 426 mov r12, r4, lsr #3 427 428 copy_left_right_uv_b16 429 vld1.8 {d0[]}, [r1], lr 430 vld1.8 {d2[]}, [r2], lr 431 vld1.8 {d4[]}, [r1], lr 432 vld1.8 {d6[]}, [r2], lr 433 vld1.8 {d8[]}, [r1], lr 434 vld1.8 {d10[]}, [r2], lr 435 vld1.8 {d12[]}, [r1], lr 436 vld1.8 {d14[]}, [r2], lr 437 vld1.8 {d16[]}, [r1], lr 438 vld1.8 {d18[]}, [r2], lr 439 vld1.8 {d20[]}, [r1], lr 440 vld1.8 {d22[]}, [r2], lr 441 vld1.8 {d24[]}, [r1], lr 442 vld1.8 {d26[]}, [r2], lr 443 vld1.8 {d28[]}, [r1], lr 444 vld1.8 {d30[]}, [r2], lr 445 446 subs r12, r12, #1 447 448 vst1.8 {d0}, [r5], lr 449 vst1.8 {d2}, [r6], lr 450 vst1.8 {d4}, [r5], lr 451 vst1.8 {d6}, [r6], lr 452 vst1.8 {d8}, [r5], lr 453 vst1.8 {d10}, [r6], lr 454 vst1.8 {d12}, [r5], lr 455 vst1.8 {d14}, [r6], lr 456 vst1.8 {d16}, [r5], lr 457 vst1.8 {d18}, [r6], lr 458 vst1.8 {d20}, [r5], lr 459 vst1.8 {d22}, [r6], lr 460 vst1.8 {d24}, [r5], lr 461 vst1.8 {d26}, [r6], lr 462 vst1.8 {d28}, [r5], lr 463 vst1.8 {d30}, [r6], lr 464 465 bne copy_left_right_uv_b16 466 467 ;Now copy the top and bottom source lines into each line of the respective borders 468 mov r12, lr, lsr #6 469 470 sub r6, r1, r3 ;destptr2 471 sub r2, r6, lr ;srcptr2 472 sub r1, r7, r3 ;srcptr1 473 sub r5, r1, r8 ;destptr1 474 475 copy_top_bottom_uv_b16 476 vld1.8 {q0, q1}, [r1]! 477 vld1.8 {q8, q9}, [r2]! 478 vld1.8 {q2, q3}, [r1]! 479 vld1.8 {q10, q11}, [r2]! 480 481 mov r7, r3 482 483 top_bottom_8_b16 484 subs r7, r7, #1 485 486 vst1.8 {q0, q1}, [r5]! 487 vst1.8 {q8, q9}, [r6]! 488 vst1.8 {q2, q3}, [r5]! 489 vst1.8 {q10, q11}, [r6]! 490 491 add r5, r5, lr 492 sub r5, r5, #64 493 add r6, r6, lr 494 sub r6, r6, #64 495 496 bne top_bottom_8_b16 497 498 sub r5, r1, r8 499 add r6, r2, lr 500 501 subs r12, r12, #1 502 bne copy_top_bottom_uv_b16 503 504 mov r7, lr, lsr #3 ;check to see if extra copy is needed 505 ands r7, r7, #0x7 506 bne extra_top_bottom_uv_b16 507 508 end_of_border_copy_uv_b16 509 subs r10, r10, #1 510 ldrne r1, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 511 bne border_copy_uv_b16 512 513 vpop {d8-d15} 514 pop {r4 - r10, pc} 515 516 ;;;;;;;;;;;;;;;;;;;;;; 517 ;extra copy part for Y 518 extra_top_bottom_y_b16 519 vld1.8 {q0}, [r1]! 520 vld1.8 {q2}, [r2]! 521 522 mov r9, r3, lsr #3 523 524 extra_top_bottom_16_b16 525 subs r9, r9, #1 526 527 vst1.8 {q0}, [r5], lr 528 vst1.8 {q2}, [r6], lr 529 vst1.8 {q0}, [r5], lr 530 vst1.8 {q2}, [r6], lr 531 vst1.8 {q0}, [r5], lr 532 vst1.8 {q2}, [r6], lr 533 vst1.8 {q0}, [r5], lr 534 vst1.8 {q2}, [r6], lr 535 vst1.8 {q0}, [r5], lr 536 vst1.8 {q2}, [r6], lr 537 vst1.8 {q0}, [r5], lr 538 vst1.8 {q2}, [r6], lr 539 vst1.8 {q0}, [r5], lr 540 vst1.8 {q2}, [r6], lr 541 vst1.8 {q0}, [r5], lr 542 vst1.8 {q2}, [r6], lr 543 bne extra_top_bottom_16_b16 544 545 sub r5, r1, r8 546 add r6, r2, lr 547 subs r7, r7, #1 548 bne extra_top_bottom_y_b16 549 550 b end_of_border_copy_y_b16 551 552 ;extra copy part for UV 553 extra_top_bottom_uv_b16 554 vld1.8 {d0}, [r1]! 555 vld1.8 {d8}, [r2]! 556 557 mov r9, r3, lsr #3 558 559 extra_top_bottom_8_b16 560 subs r9, r9, #1 561 562 vst1.8 {d0}, [r5], lr 563 vst1.8 {d8}, [r6], lr 564 vst1.8 {d0}, [r5], lr 565 vst1.8 {d8}, [r6], lr 566 vst1.8 {d0}, [r5], lr 567 vst1.8 {d8}, [r6], lr 568 vst1.8 {d0}, [r5], lr 569 vst1.8 {d8}, [r6], lr 570 vst1.8 {d0}, [r5], lr 571 vst1.8 {d8}, [r6], lr 572 vst1.8 {d0}, [r5], lr 573 vst1.8 {d8}, [r6], lr 574 vst1.8 {d0}, [r5], lr 575 vst1.8 {d8}, [r6], lr 576 vst1.8 {d0}, [r5], lr 577 vst1.8 {d8}, [r6], lr 578 bne extra_top_bottom_8_b16 579 580 sub r5, r1, r8 581 add r6, r2, lr 582 subs r7, r7, #1 583 bne extra_top_bottom_uv_b16 584 585 b end_of_border_copy_uv_b16 586 587 ENDP 588 END 589