1 ; 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 ; 4 ; Use of this source code is governed by a BSD-style license 5 ; that can be found in the LICENSE file in the root of the source 6 ; tree. An additional intellectual property rights grant can be found 7 ; in the file PATENTS. All contributing project authors may 8 ; be found in the AUTHORS file in the root of the source tree. 9 ; 10 11 12 EXPORT |vp8_yv12_copy_frame_yonly_neon| 13 EXPORT |vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon| 14 15 ARM 16 REQUIRE8 17 PRESERVE8 18 19 INCLUDE asm_com_offsets.asm 20 21 AREA ||.text||, CODE, READONLY, ALIGN=2 22 ;void vpxyv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); 23 ; Note: this is VP8 function, which has border=32 and 16. Internal y_width and y_height 24 ; are always multiples of 16. 25 26 |vp8_yv12_copy_frame_yonly_neon| PROC 27 push {r4 - r11, lr} 28 vpush {d8 - d15} 29 30 ldr r4, [r0, #yv12_buffer_config_y_height] 31 ldr r5, [r0, #yv12_buffer_config_y_width] 32 ldr r6, [r0, #yv12_buffer_config_y_stride] 33 ldr r7, [r1, #yv12_buffer_config_y_stride] 34 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 35 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 36 37 ; copy two rows at one time 38 mov lr, r4, lsr #1 39 40 cp_src_to_dst_height_loop 41 mov r8, r2 42 mov r9, r3 43 add r10, r2, r6 44 add r11, r3, r7 45 mov r12, r5, lsr #7 46 47 cp_src_to_dst_width_loop 48 vld1.8 {q0, q1}, [r8]! 49 vld1.8 {q8, q9}, [r10]! 50 vld1.8 {q2, q3}, [r8]! 51 vld1.8 {q10, q11}, [r10]! 52 vld1.8 {q4, q5}, [r8]! 53 vld1.8 {q12, q13}, [r10]! 54 vld1.8 {q6, q7}, [r8]! 55 vld1.8 {q14, q15}, [r10]! 56 57 subs r12, r12, #1 58 59 vst1.8 {q0, q1}, [r9]! 60 vst1.8 {q8, q9}, [r11]! 61 vst1.8 {q2, q3}, [r9]! 62 vst1.8 {q10, q11}, [r11]! 63 vst1.8 {q4, q5}, [r9]! 64 vst1.8 {q12, q13}, [r11]! 65 vst1.8 {q6, q7}, [r9]! 66 vst1.8 {q14, q15}, [r11]! 67 68 bne cp_src_to_dst_width_loop 69 70 subs lr, lr, #1 71 add r2, r2, r6, lsl #1 72 add r3, r3, r7, lsl #1 73 74 bne cp_src_to_dst_height_loop 75 76 ands r10, r5, #0x7f ;check to see if extra copy is needed 77 sub r11, r5, r10 78 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 79 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 80 bne extra_cp_src_to_dst_width 81 end_of_cp_src_to_dst 82 83 84 ;vpxyv12_extend_frame_borders_yonly 85 mov r0, r1 86 ;Not need to load y_width, since: y_width = y_stride - 2*border 87 ldr r3, [r0, #yv12_buffer_config_border] 88 ldr r1, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 89 ldr r4, [r0, #yv12_buffer_config_y_height] 90 ldr lr, [r0, #yv12_buffer_config_y_stride] 91 92 cmp r3, #16 93 beq b16_extend_frame_borders 94 95 ;======================= 96 b32_extend_frame_borders 97 ;border = 32 98 ;======================= 99 ;Border copy for Y plane 100 ;copy the left and right most columns out 101 sub r5, r1, r3 ;destptr1 102 add r6, r1, lr 103 sub r6, r6, r3, lsl #1 ;destptr2 104 sub r2, r6, #1 ;srcptr2 105 106 ;Do four rows at one time 107 mov r12, r4, lsr #2 108 109 copy_left_right_y 110 vld1.8 {d0[], d1[]}, [r1], lr 111 vld1.8 {d4[], d5[]}, [r2], lr 112 vld1.8 {d8[], d9[]}, [r1], lr 113 vld1.8 {d12[], d13[]}, [r2], lr 114 vld1.8 {d16[], d17[]}, [r1], lr 115 vld1.8 {d20[], d21[]}, [r2], lr 116 vld1.8 {d24[], d25[]}, [r1], lr 117 vld1.8 {d28[], d29[]}, [r2], lr 118 119 vmov q1, q0 120 vmov q3, q2 121 vmov q5, q4 122 vmov q7, q6 123 vmov q9, q8 124 vmov q11, q10 125 vmov q13, q12 126 vmov q15, q14 127 128 subs r12, r12, #1 129 130 vst1.8 {q0, q1}, [r5], lr 131 vst1.8 {q2, q3}, [r6], lr 132 vst1.8 {q4, q5}, [r5], lr 133 vst1.8 {q6, q7}, [r6], lr 134 vst1.8 {q8, q9}, [r5], lr 135 vst1.8 {q10, q11}, [r6], lr 136 vst1.8 {q12, q13}, [r5], lr 137 vst1.8 {q14, q15}, [r6], lr 138 139 bne copy_left_right_y 140 141 ;Now copy the top and bottom source lines into each line of the respective borders 142 ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 143 mul r8, r3, lr 144 145 mov r12, lr, lsr #7 146 147 sub r6, r1, r3 ;destptr2 148 sub r2, r6, lr ;srcptr2 149 sub r1, r7, r3 ;srcptr1 150 sub r5, r1, r8 ;destptr1 151 152 copy_top_bottom_y 153 vld1.8 {q0, q1}, [r1]! 154 vld1.8 {q8, q9}, [r2]! 155 vld1.8 {q2, q3}, [r1]! 156 vld1.8 {q10, q11}, [r2]! 157 vld1.8 {q4, q5}, [r1]! 158 vld1.8 {q12, q13}, [r2]! 159 vld1.8 {q6, q7}, [r1]! 160 vld1.8 {q14, q15}, [r2]! 161 162 mov r7, r3 163 164 top_bottom_32 165 subs r7, r7, #1 166 167 vst1.8 {q0, q1}, [r5]! 168 vst1.8 {q8, q9}, [r6]! 169 vst1.8 {q2, q3}, [r5]! 170 vst1.8 {q10, q11}, [r6]! 171 vst1.8 {q4, q5}, [r5]! 172 vst1.8 {q12, q13}, [r6]! 173 vst1.8 {q6, q7}, [r5]! 174 vst1.8 {q14, q15}, [r6]! 175 176 add r5, r5, lr 177 sub r5, r5, #128 178 add r6, r6, lr 179 sub r6, r6, #128 180 181 bne top_bottom_32 182 183 sub r5, r1, r8 184 add r6, r2, lr 185 186 subs r12, r12, #1 187 bne copy_top_bottom_y 188 189 mov r7, lr, lsr #4 ;check to see if extra copy is needed 190 ands r7, r7, #0x7 191 bne extra_top_bottom_y 192 end_of_border_copy_y 193 194 vpop {d8 - d15} 195 pop {r4 - r11, pc} 196 197 ;===================== 198 ;extra copy part for Y 199 extra_top_bottom_y 200 vld1.8 {q0}, [r1]! 201 vld1.8 {q2}, [r2]! 202 203 mov r9, r3, lsr #3 204 205 extra_top_bottom_32 206 subs r9, r9, #1 207 208 vst1.8 {q0}, [r5], lr 209 vst1.8 {q2}, [r6], lr 210 vst1.8 {q0}, [r5], lr 211 vst1.8 {q2}, [r6], lr 212 vst1.8 {q0}, [r5], lr 213 vst1.8 {q2}, [r6], lr 214 vst1.8 {q0}, [r5], lr 215 vst1.8 {q2}, [r6], lr 216 vst1.8 {q0}, [r5], lr 217 vst1.8 {q2}, [r6], lr 218 vst1.8 {q0}, [r5], lr 219 vst1.8 {q2}, [r6], lr 220 vst1.8 {q0}, [r5], lr 221 vst1.8 {q2}, [r6], lr 222 vst1.8 {q0}, [r5], lr 223 vst1.8 {q2}, [r6], lr 224 bne extra_top_bottom_32 225 226 sub r5, r1, r8 227 add r6, r2, lr 228 subs r7, r7, #1 229 bne extra_top_bottom_y 230 231 b end_of_border_copy_y 232 233 234 ;======================= 235 b16_extend_frame_borders 236 ;border = 16 237 ;======================= 238 ;Border copy for Y plane 239 ;copy the left and right most columns out 240 sub r5, r1, r3 ;destptr1 241 add r6, r1, lr 242 sub r6, r6, r3, lsl #1 ;destptr2 243 sub r2, r6, #1 ;srcptr2 244 245 ;Do four rows at one time 246 mov r12, r4, lsr #2 247 248 copy_left_right_y_b16 249 vld1.8 {d0[], d1[]}, [r1], lr 250 vld1.8 {d4[], d5[]}, [r2], lr 251 vld1.8 {d8[], d9[]}, [r1], lr 252 vld1.8 {d12[], d13[]}, [r2], lr 253 vld1.8 {d16[], d17[]}, [r1], lr 254 vld1.8 {d20[], d21[]}, [r2], lr 255 vld1.8 {d24[], d25[]}, [r1], lr 256 vld1.8 {d28[], d29[]}, [r2], lr 257 258 subs r12, r12, #1 259 260 vst1.8 {q0}, [r5], lr 261 vst1.8 {q2}, [r6], lr 262 vst1.8 {q4}, [r5], lr 263 vst1.8 {q6}, [r6], lr 264 vst1.8 {q8}, [r5], lr 265 vst1.8 {q10}, [r6], lr 266 vst1.8 {q12}, [r5], lr 267 vst1.8 {q14}, [r6], lr 268 269 bne copy_left_right_y_b16 270 271 ;Now copy the top and bottom source lines into each line of the respective borders 272 ldr r7, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 273 mul r8, r3, lr 274 275 mov r12, lr, lsr #7 276 277 sub r6, r1, r3 ;destptr2 278 sub r2, r6, lr ;srcptr2 279 sub r1, r7, r3 ;srcptr1 280 sub r5, r1, r8 ;destptr1 281 282 copy_top_bottom_y_b16 283 vld1.8 {q0, q1}, [r1]! 284 vld1.8 {q8, q9}, [r2]! 285 vld1.8 {q2, q3}, [r1]! 286 vld1.8 {q10, q11}, [r2]! 287 vld1.8 {q4, q5}, [r1]! 288 vld1.8 {q12, q13}, [r2]! 289 vld1.8 {q6, q7}, [r1]! 290 vld1.8 {q14, q15}, [r2]! 291 292 mov r7, r3 293 294 top_bottom_16_b16 295 subs r7, r7, #1 296 297 vst1.8 {q0, q1}, [r5]! 298 vst1.8 {q8, q9}, [r6]! 299 vst1.8 {q2, q3}, [r5]! 300 vst1.8 {q10, q11}, [r6]! 301 vst1.8 {q4, q5}, [r5]! 302 vst1.8 {q12, q13}, [r6]! 303 vst1.8 {q6, q7}, [r5]! 304 vst1.8 {q14, q15}, [r6]! 305 306 add r5, r5, lr 307 sub r5, r5, #128 308 add r6, r6, lr 309 sub r6, r6, #128 310 311 bne top_bottom_16_b16 312 313 sub r5, r1, r8 314 add r6, r2, lr 315 316 subs r12, r12, #1 317 bne copy_top_bottom_y_b16 318 319 mov r7, lr, lsr #4 ;check to see if extra copy is needed 320 ands r7, r7, #0x7 321 bne extra_top_bottom_y_b16 322 end_of_border_copy_y_b16 323 324 vpop {d8 - d15} 325 pop {r4 - r11, pc} 326 327 ;===================== 328 ;extra copy part for Y 329 extra_top_bottom_y_b16 330 vld1.8 {q0}, [r1]! 331 vld1.8 {q2}, [r2]! 332 333 mov r9, r3, lsr #3 334 335 extra_top_bottom_16_b16 336 subs r9, r9, #1 337 338 vst1.8 {q0}, [r5], lr 339 vst1.8 {q2}, [r6], lr 340 vst1.8 {q0}, [r5], lr 341 vst1.8 {q2}, [r6], lr 342 vst1.8 {q0}, [r5], lr 343 vst1.8 {q2}, [r6], lr 344 vst1.8 {q0}, [r5], lr 345 vst1.8 {q2}, [r6], lr 346 vst1.8 {q0}, [r5], lr 347 vst1.8 {q2}, [r6], lr 348 vst1.8 {q0}, [r5], lr 349 vst1.8 {q2}, [r6], lr 350 vst1.8 {q0}, [r5], lr 351 vst1.8 {q2}, [r6], lr 352 vst1.8 {q0}, [r5], lr 353 vst1.8 {q2}, [r6], lr 354 bne extra_top_bottom_16_b16 355 356 sub r5, r1, r8 357 add r6, r2, lr 358 subs r7, r7, #1 359 bne extra_top_bottom_y_b16 360 361 b end_of_border_copy_y_b16 362 363 ;============================= 364 extra_cp_src_to_dst_width 365 add r2, r2, r11 366 add r3, r3, r11 367 add r0, r8, r6 368 add r11, r9, r7 369 370 mov lr, r4, lsr #1 371 extra_cp_src_to_dst_height_loop 372 mov r8, r2 373 mov r9, r3 374 add r0, r8, r6 375 add r11, r9, r7 376 377 mov r12, r10 378 379 extra_cp_src_to_dst_width_loop 380 vld1.8 {q0}, [r8]! 381 vld1.8 {q1}, [r0]! 382 383 subs r12, r12, #16 384 385 vst1.8 {q0}, [r9]! 386 vst1.8 {q1}, [r11]! 387 bne extra_cp_src_to_dst_width_loop 388 389 subs lr, lr, #1 390 391 add r2, r2, r6, lsl #1 392 add r3, r3, r7, lsl #1 393 394 bne extra_cp_src_to_dst_height_loop 395 396 b end_of_cp_src_to_dst 397 398 ENDP 399 400 ;=========================================================== 401 ;In vp8cx_pick_filter_level(), call vp8_yv12_copy_frame_yonly 402 ;without extend_frame_borders. 403 |vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon| PROC 404 push {r4 - r11, lr} 405 vpush {d8-d15} 406 407 ldr r4, [r0, #yv12_buffer_config_y_height] 408 ldr r5, [r0, #yv12_buffer_config_y_width] 409 ldr r6, [r0, #yv12_buffer_config_y_stride] 410 ldr r7, [r1, #yv12_buffer_config_y_stride] 411 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 412 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 413 414 ; copy two rows at one time 415 mov lr, r4, lsr #1 416 417 cp_src_to_dst_height_loop1 418 mov r8, r2 419 mov r9, r3 420 add r10, r2, r6 421 add r11, r3, r7 422 mov r12, r5, lsr #7 423 424 cp_src_to_dst_width_loop1 425 vld1.8 {q0, q1}, [r8]! 426 vld1.8 {q8, q9}, [r10]! 427 vld1.8 {q2, q3}, [r8]! 428 vld1.8 {q10, q11}, [r10]! 429 vld1.8 {q4, q5}, [r8]! 430 vld1.8 {q12, q13}, [r10]! 431 vld1.8 {q6, q7}, [r8]! 432 vld1.8 {q14, q15}, [r10]! 433 434 subs r12, r12, #1 435 436 vst1.8 {q0, q1}, [r9]! 437 vst1.8 {q8, q9}, [r11]! 438 vst1.8 {q2, q3}, [r9]! 439 vst1.8 {q10, q11}, [r11]! 440 vst1.8 {q4, q5}, [r9]! 441 vst1.8 {q12, q13}, [r11]! 442 vst1.8 {q6, q7}, [r9]! 443 vst1.8 {q14, q15}, [r11]! 444 445 bne cp_src_to_dst_width_loop1 446 447 subs lr, lr, #1 448 add r2, r2, r6, lsl #1 449 add r3, r3, r7, lsl #1 450 451 bne cp_src_to_dst_height_loop1 452 453 ands r10, r5, #0x7f ;check to see if extra copy is needed 454 sub r11, r5, r10 455 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 456 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 457 bne extra_cp_src_to_dst_width1 458 end_of_cp_src_to_dst1 459 460 vpop {d8 - d15} 461 pop {r4-r11, pc} 462 463 ;============================= 464 extra_cp_src_to_dst_width1 465 add r2, r2, r11 466 add r3, r3, r11 467 add r0, r8, r6 468 add r11, r9, r7 469 470 mov lr, r4, lsr #1 471 extra_cp_src_to_dst_height_loop1 472 mov r8, r2 473 mov r9, r3 474 add r0, r8, r6 475 add r11, r9, r7 476 477 mov r12, r10 478 479 extra_cp_src_to_dst_width_loop1 480 vld1.8 {q0}, [r8]! 481 vld1.8 {q1}, [r0]! 482 483 subs r12, r12, #16 484 485 vst1.8 {q0}, [r9]! 486 vst1.8 {q1}, [r11]! 487 bne extra_cp_src_to_dst_width_loop1 488 489 subs lr, lr, #1 490 491 add r2, r2, r6, lsl #1 492 add r3, r3, r7, lsl #1 493 494 bne extra_cp_src_to_dst_height_loop1 495 496 b end_of_cp_src_to_dst1 497 498 ENDP 499 500 END 501