1 @/***************************************************************************** 2 @* 3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 @* 5 @* Licensed under the Apache License, Version 2.0 (the "License"); 6 @* you may not use this file except in compliance with the License. 7 @* You may obtain a copy of the License at: 8 @* 9 @* http://www.apache.org/licenses/LICENSE-2.0 10 @* 11 @* Unless required by applicable law or agreed to in writing, software 12 @* distributed under the License is distributed on an "AS IS" BASIS, 13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @* See the License for the specific language governing permissions and 15 @* limitations under the License. 16 @* 17 @*****************************************************************************/ 18 @/** 19 @ ******************************************************************************* 20 @ * @file 21 @ * ihevc_padding_neon.s 22 @ * 23 @ * @brief 24 @ * contains function definitions padding 25 @ * 26 @ * @author 27 @ * naveen sr 28 @ * 29 @ * @par list of functions: 30 @ * - ihevc_pad_left_luma() 31 @ * - ihevc_pad_left_chroma() 32 @ * 33 @ * @remarks 34 @ * none 35 @ * 36 @ ******************************************************************************* 37 @*/ 38 39 @/** 40 @******************************************************************************* 41 @* 42 @* @brief 43 @* padding (luma block) at the left of a 2d array 44 @* 45 @* @par description: 46 @* the left column of a 2d array is replicated for pad_size times at the left 47 @* 48 @* 49 @* @param[in] pu1_src 50 @* uword8 pointer to the source 51 @* 52 @* @param[in] src_strd 53 @* integer source stride 54 @* 55 @* @param[in] ht 56 @* integer height of the array 57 @* 58 @* @param[in] wd 59 @* integer width of the array 60 @* 61 @* @param[in] pad_size 62 @* integer -padding size of the array 63 @* 64 @* @param[in] ht 65 @* integer height of the array 66 @* 67 @* @param[in] wd 68 @* integer width of the array 69 @* 70 @* @returns 71 @* 72 @* @remarks 73 @* none 74 @* 75 @******************************************************************************* 76 @*/ 77 @.if pad_left_luma == c 78 @void ihevc_pad_left_luma(uword8 *pu1_src, 79 @ word32 src_strd, 80 @ word32 ht, 81 @ word32 pad_size) 82 @**************variables vs registers************************* 83 @ r0 => *pu1_src 84 @ r1 => src_strd 85 @ r2 => ht 86 @ r3 => pad_size 87 88 .text 89 .align 4 90 91 92 93 94 .globl ihevc_pad_left_luma_a9q 95 96 .type ihevc_pad_left_luma_a9q, %function 97 98 ihevc_pad_left_luma_a9q: 99 100 stmfd sp!, {r4-r11,lr} @stack stores the values of the arguments 101 102 loop_start_luma_left: 103 @ pad size is assumed to be pad_left = 80 104 sub r4,r0,r3 105 106 ldrb r8,[r0] 107 add r0,r1 108 ldrb r9,[r0] 109 add r0,r1 110 ldrb r10,[r0] 111 add r0,r1 112 ldrb r11,[r0] 113 add r0,r1 114 115 vdup.u8 q0,r8 116 vdup.u8 q1,r9 117 vdup.u8 q2,r10 118 vdup.u8 q3,r11 119 120 add r5,r4,r1 121 122 vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 123 vst1.8 {d0,d1},[r4]! @ 16 bytes store 124 vst1.8 {d0,d1},[r4]! @ 16 bytes store 125 vst1.8 {d0,d1},[r4]! @ 16 bytes store 126 vst1.8 {d0,d1},[r4] @ 16 bytes store 127 128 add r6,r5,r1 129 130 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 131 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 132 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 133 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 134 vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 135 136 add r7,r6,r1 137 138 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 139 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 140 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 141 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 142 vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 143 144 subs r2,#4 145 146 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 147 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 148 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 149 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 150 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 151 152 @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 153 154 bne loop_start_luma_left 155 156 ldmfd sp!,{r4-r11,pc} @reload the registers from sp 157 158 159 160 161 162 @/** 163 @******************************************************************************* 164 @* 165 @* @brief 166 @* padding (chroma block) at the left of a 2d array 167 @* 168 @* @par description: 169 @* the left column of a 2d array is replicated for pad_size times at the left 170 @* 171 @* 172 @* @param[in] pu1_src 173 @* uword8 pointer to the source 174 @* 175 @* @param[in] src_strd 176 @* integer source stride 177 @* 178 @* @param[in] ht 179 @* integer height of the array 180 @* 181 @* @param[in] wd 182 @* integer width of the array (each colour component) 183 @* 184 @* @param[in] pad_size 185 @* integer -padding size of the array 186 @* 187 @* @param[in] ht 188 @* integer height of the array 189 @* 190 @* @param[in] wd 191 @* integer width of the array 192 @* 193 @* @returns 194 @* 195 @* @remarks 196 @* none 197 @* 198 @******************************************************************************* 199 @*/ 200 @.if pad_left_chroma == c 201 @void ihevc_pad_left_chroma(uword8 *pu1_src, 202 @ word32 src_strd, 203 @ word32 ht, 204 @ word32 pad_size) 205 @{ 206 @ r0 => *pu1_src 207 @ r1 => src_strd 208 @ r2 => ht 209 @ r3 => pad_size 210 211 212 213 .globl ihevc_pad_left_chroma_a9q 214 215 .type ihevc_pad_left_chroma_a9q, %function 216 217 ihevc_pad_left_chroma_a9q: 218 219 stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments 220 221 loop_start_chroma_left: 222 @ pad size is assumed to be pad_left = 80 223 sub r4,r0,r3 224 225 ldrh r8,[r0] 226 add r0,r1 227 ldrh r9,[r0] 228 add r0,r1 229 ldrh r10,[r0] 230 add r0,r1 231 ldrh r11,[r0] 232 add r0,r1 233 234 vdup.u16 q0,r8 235 vdup.u16 q1,r9 236 vdup.u16 q2,r10 237 vdup.u16 q3,r11 238 239 add r5,r4,r1 240 241 vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 242 vst1.8 {d0,d1},[r4]! @ 16 bytes store 243 vst1.8 {d0,d1},[r4]! @ 16 bytes store 244 vst1.8 {d0,d1},[r4]! @ 16 bytes store 245 vst1.8 {d0,d1},[r4] @ 16 bytes store 246 247 add r6,r5,r1 248 249 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 250 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 251 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 252 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 253 vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 254 255 add r7,r6,r1 256 257 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 258 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 259 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 260 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 261 vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 262 263 subs r2,#4 264 265 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 266 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 267 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 268 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 269 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 270 271 @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 272 273 bne loop_start_chroma_left 274 275 ldmfd sp!,{r4-r11,pc} @reload the registers from sp 276 277 278 279 280 281 @/** 282 @******************************************************************************* 283 @* 284 @* @brief 285 @* padding (luma block) at the right of a 2d array 286 @* 287 @* @par description: 288 @* the right column of a 2d array is replicated for pad_size times at the right 289 @* 290 @* 291 @* @param[in] pu1_src 292 @* uword8 pointer to the source 293 @* 294 @* @param[in] src_strd 295 @* integer source stride 296 @* 297 @* @param[in] ht 298 @* integer height of the array 299 @* 300 @* @param[in] wd 301 @* integer width of the array 302 @* 303 @* @param[in] pad_size 304 @* integer -padding size of the array 305 @* 306 @* @param[in] ht 307 @* integer height of the array 308 @* 309 @* @param[in] wd 310 @* integer width of the array 311 @* 312 @* @returns 313 @* 314 @* @remarks 315 @* none 316 @* 317 @******************************************************************************* 318 @*/ 319 @.if pad_right_luma == c 320 @void ihevc_pad_right_luma(uword8 *pu1_src, 321 @ word32 src_strd, 322 @ word32 ht, 323 @ word32 pad_size) 324 @{ 325 @ word32 row@ 326 @ 327 @ for(row = 0@ row < ht@ row++) 328 @ { 329 @ memset(pu1_src, *(pu1_src -1), pad_size)@ 330 @ 331 @ pu1_src += src_strd@ 332 @ } 333 @} 334 @ 335 @ r0 => *pu1_src 336 @ r1 => src_strd 337 @ r2 => ht 338 @ r3 => pad_size 339 340 341 342 .globl ihevc_pad_right_luma_a9q 343 344 .type ihevc_pad_right_luma_a9q, %function 345 346 ihevc_pad_right_luma_a9q: 347 348 stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments 349 350 loop_start_luma_right: 351 @ pad size is assumed to be pad_left = 80 352 mov r4,r0 353 354 ldrb r8,[r0, #-1] 355 add r0,r1 356 ldrb r9,[r0, #-1] 357 add r0,r1 358 ldrb r10,[r0, #-1] 359 add r0,r1 360 ldrb r11,[r0, #-1] 361 add r0,r1 362 363 add r5,r4,r1 364 add r6,r5,r1 365 add r7,r6,r1 366 367 vdup.u8 q0,r8 368 vdup.u8 q1,r9 369 vdup.u8 q2,r10 370 vdup.u8 q3,r11 371 372 vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 373 vst1.8 {d0,d1},[r4]! @ 16 bytes store 374 vst1.8 {d0,d1},[r4]! @ 16 bytes store 375 vst1.8 {d0,d1},[r4]! @ 16 bytes store 376 vst1.8 {d0,d1},[r4] @ 16 bytes store 377 378 379 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 380 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 381 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 382 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 383 vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 384 385 subs r2,#4 386 387 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 388 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 389 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 390 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 391 vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 392 393 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 394 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 395 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 396 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 397 vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store 398 399 400 @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 401 402 403 bne loop_start_luma_right 404 405 ldmfd sp!,{r4-r11,pc} @reload the registers from sp 406 407 408 409 410 411 @/** 412 @******************************************************************************* 413 @* 414 @* @brief 415 @@* padding (chroma block) at the right of a 2d array 416 @* 417 @* @par description: 418 @* the right column of a 2d array is replicated for pad_size times at the right 419 @* 420 @* 421 @* @param[in] pu1_src 422 @@* uword8 pointer to the source 423 @* 424 @* @param[in] src_strd 425 @* integer source stride 426 @* 427 @* @param[in] ht 428 @@* integer height of the array 429 @* 430 @* @param[in] wd 431 @* integer width of the array (each colour component) 432 @* 433 @* @param[in] pad_size 434 @* integer -padding size of the array 435 @* 436 @* @param[in] ht 437 @@* integer height of the array 438 @* 439 @* @param[in] wd 440 @* integer width of the array 441 @* 442 @* @returns 443 @* 444 @* @remarks 445 @* none 446 @* 447 @******************************************************************************* 448 @*/ 449 @.if pad_right_chroma == c 450 @void ihevc_pad_right_chroma(uword8 *pu1_src, 451 @ word32 src_strd, 452 @ word32 ht, 453 @ word32 pad_size) 454 @ r0 => *pu1_src 455 @ r1 => src_strd 456 @ r2 => ht 457 @ r3 => pad_size 458 459 460 461 .globl ihevc_pad_right_chroma_a9q 462 463 .type ihevc_pad_right_chroma_a9q, %function 464 465 ihevc_pad_right_chroma_a9q: 466 467 stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments 468 469 loop_start_chroma_right: 470 @ pad size is assumed to be pad_left = 80 471 mov r4,r0 472 473 ldrh r8,[r0, #-2] 474 add r0,r1 475 ldrh r9,[r0, #-2] 476 add r0,r1 477 ldrh r10,[r0, #-2] 478 add r0,r1 479 ldrh r11,[r0, #-2] 480 add r0,r1 481 482 vdup.u16 q0,r8 483 vdup.u16 q1,r9 484 vdup.u16 q2,r10 485 vdup.u16 q3,r11 486 487 add r5,r4,r1 488 489 vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 490 vst1.8 {d0,d1},[r4]! @ 16 bytes store 491 vst1.8 {d0,d1},[r4]! @ 16 bytes store 492 vst1.8 {d0,d1},[r4]! @ 16 bytes store 493 vst1.8 {d0,d1},[r4] @ 16 bytes store 494 495 add r6,r5,r1 496 497 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 498 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 499 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 500 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 501 vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 502 503 add r7,r6,r1 504 505 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 506 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 507 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 508 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 509 vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 510 511 subs r2,#4 512 513 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 514 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 515 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 516 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 517 vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store 518 519 @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 520 521 bne loop_start_chroma_right 522 523 ldmfd sp!,{r4-r11,pc} @reload the registers from sp 524 525 526 527 528 529 530 531 532