1 ;// 2 ;// Copyright (C) 2007-2008 ARM Limited 3 ;// 4 ;// Licensed under the Apache License, Version 2.0 (the "License"); 5 ;// you may not use this file except in compliance with the License. 6 ;// You may obtain a copy of the License at 7 ;// 8 ;// http://www.apache.org/licenses/LICENSE-2.0 9 ;// 10 ;// Unless required by applicable law or agreed to in writing, software 11 ;// distributed under the License is distributed on an "AS IS" BASIS, 12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ;// See the License for the specific language governing permissions and 14 ;// limitations under the License. 15 ;// 16 ;// 17 ;// 18 ;// File Name: omxVCM4P10_InterpolateLuma_s.s 19 ;// OpenMAX DL: v1.0.2 20 ;// Revision: 9641 21 ;// Date: Thursday, February 7, 2008 22 ;// 23 ;// 24 ;// 25 ;// 26 27 ;// Function: 28 ;// omxVCM4P10_InterpolateLuma 29 ;// 30 ;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly. 31 ;// Performs quarter pel interpolation of inter luma MB. 32 ;// It's assumed that the frame is already padded when calling this function. 33 ;// Parameters: 34 ;// [in] pSrc Pointer to the source reference frame buffer 35 ;// [in] srcStep Reference frame step in byte 36 ;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width 37 ;// [in] dx Fractional part of horizontal motion vector 38 ;// component in 1/4 pixel unit; valid in the range [0,3] 39 ;// [in] dy Fractional part of vertical motion vector 40 ;// component in 1/4 pixel unit; valid in the range [0,3] 41 ;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must 42 ;// be equal to either 4, 8, or 16. 43 ;// [out] pDst Pointer to the destination frame buffer. 44 ;// if roi.width==4, 4-byte alignment required 45 ;// if roi.width==8, 8-byte alignment required 46 ;// if roi.width==16, 16-byte alignment required 47 ;// 48 ;// Return Value: 49 ;// If the function runs without error, it returns OMX_Sts_NoErr. 50 ;// It is assued that following cases are satisfied before calling this function: 51 ;// pSrc or pDst is not NULL. 52 ;// srcStep or dstStep >= roi.width. 53 ;// dx or dy is in the range [0-3]. 54 ;// roi.width or roi.height is not out of range {4, 8, 16}. 55 ;// If roi.width is equal to 4, Dst is 4 byte aligned. 56 ;// If roi.width is equal to 8, pDst is 8 byte aligned. 57 ;// If roi.width is equal to 16, pDst is 16 byte aligned. 58 ;// srcStep and dstStep is multiple of 8. 59 ;// 60 ;// 61 62 63 INCLUDE omxtypes_s.h 64 INCLUDE armCOMM_s.h 65 66 M_VARIANTS ARM1136JS 67 68 EXPORT omxVCM4P10_InterpolateLuma 69 70 IF ARM1136JS 71 IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe 72 IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 73 IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 74 IMPORT armVCM4P10_Average_4x4_Align0_unsafe 75 IMPORT armVCM4P10_Average_4x4_Align2_unsafe 76 IMPORT armVCM4P10_Average_4x4_Align3_unsafe 77 IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 78 IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 79 ENDIF 80 81 IF ARM1136JS 82 IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 83 IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 84 IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 85 IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 86 ENDIF 87 88 89 90 ;// Declare input registers 91 pSrc RN 0 92 srcStep RN 1 93 pDst RN 2 94 dstStep RN 3 95 iHeight RN 4 96 iWidth RN 5 97 98 ;// Declare other intermediate registers 99 idx RN 6 100 idy RN 7 101 index RN 6 102 Temp RN 12 103 pArgs RN 11 104 105 106 ;// End of CortexA8 107 108 ;//------------------------------------------------------------------------------------------------------------------------- 109 ;//------------------------------------------------------------------------------------------------------------------------- 110 IF ARM1136JS 111 112 113 M_ALLOC4 ppDst, 8 114 M_ALLOC4 ppSrc, 8 115 M_ALLOC4 ppArgs, 16 116 M_ALLOC4 pBuffer, 120 ;// 120 = 12x10 117 M_ALLOC8 pInterBuf, 120 ;// 120 = 12*5*2 118 M_ALLOC8 pTempBuf, 32 ;// 32 = 8*4 119 120 ;// Function header 121 ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time. 122 ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed. 123 ;// Registers r4, r5, r6 to be preserved by internal unsafe functions 124 ;// r4 - iHeight 125 ;// r5 - iWidth 126 ;// r6 - index 127 M_START omxVCM4P10_InterpolateLuma, r11 128 129 ;// Declare other intermediate registers 130 idx RN 6 131 idy RN 7 132 index RN 6 133 Temp RN 12 134 pArgs RN 11 135 136 pBuf RN 8 137 Height RN 9 138 bufStep RN 9 139 140 ;// Define stack arguments 141 M_ARG ptridx, 4 142 M_ARG ptridy, 4 143 M_ARG ptrWidth, 4 144 M_ARG ptrHeight, 4 145 146 ;// Load structure elements of roi 147 M_LDR idx, ptridx 148 M_LDR idy, ptridy 149 M_LDR iWidth, ptrWidth 150 M_LDR iHeight, ptrHeight 151 152 M_PRINTF "roi.width %d\n", iWidth 153 M_PRINTF "roi.height %d\n", iHeight 154 155 ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx] 156 M_ADR pArgs, ppArgs 157 158 InterpolateLuma 159 Block4x4WidthLoop 160 Block4x4HeightLoop 161 162 STM pArgs, {pSrc,srcStep,pDst,dstStep} 163 M_ADR pBuf, pBuffer 164 165 ;// switch table using motion vector as index 166 M_SWITCH index, L 167 M_CASE Case_0 168 M_CASE Case_1 169 M_CASE Case_2 170 M_CASE Case_3 171 M_CASE Case_4 172 M_CASE Case_5 173 M_CASE Case_6 174 M_CASE Case_7 175 M_CASE Case_8 176 M_CASE Case_9 177 M_CASE Case_a 178 M_CASE Case_b 179 M_CASE Case_c 180 M_CASE Case_d 181 M_CASE Case_e 182 M_CASE Case_f 183 M_ENDSWITCH 184 185 Case_0 186 ;// Case G 187 M_PRINTF "Case 0 \n" 188 189 BL armVCM4P10_InterpolateLuma_Copy4x4_unsafe 190 B Block4x4LoopEnd 191 192 Case_1 193 ;// Case a 194 M_PRINTF "Case 1 \n" 195 196 SUB pSrc, pSrc, #2 197 MOV Height, #4 198 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 199 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 200 BL armVCM4P10_Average_4x4_Align2_unsafe 201 B Block4x4LoopEnd 202 Case_2 203 ;// Case b 204 M_PRINTF "Case 2 \n" 205 206 SUB pSrc, pSrc, #2 207 MOV Height, #4 208 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 209 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 210 B Block4x4LoopEnd 211 Case_3 212 ;// Case c 213 M_PRINTF "Case 3 \n" 214 215 SUB pSrc, pSrc, #2 216 MOV Height, #4 217 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 218 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 219 BL armVCM4P10_Average_4x4_Align3_unsafe 220 B Block4x4LoopEnd 221 Case_4 222 ;// Case d 223 M_PRINTF "Case 4 \n" 224 225 SUB pSrc, pSrc, srcStep, LSL #1 226 MOV Height, #9 227 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 228 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 229 BL armVCM4P10_Average_4x4_Align0_unsafe 230 231 B Block4x4LoopEnd 232 Case_5 233 ;// Case e 234 M_PRINTF "Case 5 \n" 235 236 SUB pSrc, pSrc, #2 237 MOV Height, #4 238 M_ADR pDst, pTempBuf 239 MOV dstStep, #4 240 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 241 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 242 M_ADR pArgs, ppArgs 243 LDM pArgs, {pSrc, srcStep, pDst, dstStep} 244 SUB pSrc, pSrc, srcStep, LSL #1 245 M_ADR pBuf, pBuffer 246 MOV Height, #9 247 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 248 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 249 M_ADR pSrc, pTempBuf 250 MOV srcStep, #4 251 BL armVCM4P10_Average_4x4_Align0_unsafe 252 253 254 B Block4x4LoopEnd 255 Case_6 256 ;// Case f 257 M_PRINTF "Case 6 \n" 258 259 SUB pSrc, pSrc, #2 260 SUB pSrc, pSrc, srcStep, LSL #1 261 MOV Height, #9 262 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 263 M_ADR pBuf, pInterBuf 264 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 265 M_ADR idy, pTempBuf 266 BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 267 BL armVCM4P10_Average_4x4_Align0_unsafe 268 B Block4x4LoopEnd 269 Case_7 270 ;// Case g 271 M_PRINTF "Case 7 \n" 272 273 SUB pSrc, pSrc, #2 274 MOV Height, #4 275 M_ADR pDst, pTempBuf 276 MOV dstStep, #4 277 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 278 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 279 M_ADR pArgs, ppArgs 280 LDM pArgs, {pSrc, srcStep, pDst, dstStep} 281 SUB pSrc, pSrc, srcStep, LSL #1 282 ADD pSrc, pSrc, #1 283 M_ADR pBuf, pBuffer 284 MOV Height, #9 285 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 286 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 287 M_ADR pSrc, pTempBuf 288 MOV srcStep, #4 289 BL armVCM4P10_Average_4x4_Align0_unsafe 290 291 B Block4x4LoopEnd 292 Case_8 293 ;// Case h 294 M_PRINTF "Case 8 \n" 295 296 SUB pSrc, pSrc, srcStep, LSL #1 297 MOV Height, #9 298 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 299 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 300 B Block4x4LoopEnd 301 Case_9 302 ;// Case i 303 M_PRINTF "Case 9 \n" 304 305 SUB pSrc, pSrc, #2 306 SUB pSrc, pSrc, srcStep, LSL #1 307 MOV Height, #9 308 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 309 ADD pSrc, pSrc, srcStep, LSL #1 310 M_ADR pBuf, pInterBuf 311 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 312 M_ADR idy, pTempBuf 313 BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 314 BL armVCM4P10_Average_4x4_Align2_unsafe 315 B Block4x4LoopEnd 316 Case_a 317 ;// Case j 318 M_PRINTF "Case a \n" 319 320 SUB pSrc, pSrc, #2 321 SUB pSrc, pSrc, srcStep, LSL #1 322 MOV Height, #9 323 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 324 ADD pSrc, pSrc, srcStep, LSL #1 325 M_ADR pBuf, pInterBuf 326 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 327 B Block4x4LoopEnd 328 Case_b 329 ;// Case k 330 M_PRINTF "Case b \n" 331 SUB pSrc, pSrc, #2 332 SUB pSrc, pSrc, srcStep, LSL #1 333 MOV Height, #9 334 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 335 ADD pSrc, pSrc, srcStep, LSL #1 336 M_ADR pBuf, pInterBuf 337 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 338 M_ADR idy, pTempBuf 339 BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 340 BL armVCM4P10_Average_4x4_Align3_unsafe 341 B Block4x4LoopEnd 342 Case_c 343 ;// Case n 344 M_PRINTF "Case c \n" 345 346 SUB pSrc, pSrc, srcStep, LSL #1 347 MOV Height, #9 348 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 349 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 350 ADD pSrc, pSrc, srcStep ;// Update pSrc to one row down 351 BL armVCM4P10_Average_4x4_Align0_unsafe 352 B Block4x4LoopEnd 353 Case_d 354 ;// Case p 355 M_PRINTF "Case d \n" 356 SUB pSrc, pSrc, #2 357 ADD pSrc, pSrc, srcStep 358 MOV Height, #4 359 M_ADR pDst, pTempBuf 360 MOV dstStep, #4 361 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 362 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 363 M_ADR pArgs, ppArgs 364 LDM pArgs, {pSrc, srcStep, pDst, dstStep} 365 SUB pSrc, pSrc, srcStep, LSL #1 366 M_ADR pBuf, pBuffer 367 MOV Height, #9 368 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 369 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 370 M_ADR pSrc, pTempBuf 371 MOV srcStep, #4 372 BL armVCM4P10_Average_4x4_Align0_unsafe 373 B Block4x4LoopEnd 374 Case_e 375 ;// Case q 376 M_PRINTF "Case e \n" 377 378 SUB pSrc, pSrc, #2 379 SUB pSrc, pSrc, srcStep, LSL #1 380 MOV Height, #9 381 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 382 M_ADR pBuf, pInterBuf 383 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 384 M_ADR idy, pTempBuf 385 BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 386 ADD pSrc, pSrc, #4 387 BL armVCM4P10_Average_4x4_Align0_unsafe 388 389 B Block4x4LoopEnd 390 Case_f 391 ;// Case r 392 M_PRINTF "Case f \n" 393 SUB pSrc, pSrc, #2 394 ADD pSrc, pSrc, srcStep 395 MOV Height, #4 396 M_ADR pDst, pTempBuf 397 MOV dstStep, #4 398 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 399 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 400 M_ADR pArgs, ppArgs 401 LDM pArgs, {pSrc, srcStep, pDst, dstStep} 402 SUB pSrc, pSrc, srcStep, LSL #1 403 ADD pSrc, pSrc, #1 404 M_ADR pBuf, pBuffer 405 MOV Height, #9 406 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 407 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 408 M_ADR pSrc, pTempBuf 409 MOV srcStep, #4 410 BL armVCM4P10_Average_4x4_Align0_unsafe 411 412 Block4x4LoopEnd 413 414 ;// Width Loop 415 SUBS iWidth, iWidth, #4 416 M_ADR pArgs, ppArgs 417 LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments 418 ADD pSrc, pSrc, #4 419 ADD pDst, pDst, #4 420 BGT Block4x4WidthLoop 421 422 ;// Height Loop 423 SUBS iHeight, iHeight, #4 424 M_LDR iWidth, ptrWidth 425 M_ADR pArgs, ppArgs 426 ADD pSrc, pSrc, srcStep, LSL #2 427 ADD pDst, pDst, dstStep, LSL #2 428 SUB pSrc, pSrc, iWidth 429 SUB pDst, pDst, iWidth 430 BGT Block4x4HeightLoop 431 432 EndOfInterpolation 433 MOV r0, #0 434 M_END 435 436 ENDIF 437 438 439 END 440 441