1 ;// 2 ;// 3 ;// File Name: omxVCM4P10_InterpolateLuma_s.s 4 ;// OpenMAX DL: v1.0.2 5 ;// Revision: 9641 6 ;// Date: Thursday, February 7, 2008 7 ;// 8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 9 ;// 10 ;// 11 ;// 12 13 ;// Function: 14 ;// omxVCM4P10_InterpolateLuma 15 ;// 16 ;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly. 17 ;// Performs quarter pel interpolation of inter luma MB. 18 ;// It's assumed that the frame is already padded when calling this function. 19 ;// Parameters: 20 ;// [in] pSrc Pointer to the source reference frame buffer 21 ;// [in] srcStep Reference frame step in byte 22 ;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width 23 ;// [in] dx Fractional part of horizontal motion vector 24 ;// component in 1/4 pixel unit; valid in the range [0,3] 25 ;// [in] dy Fractional part of vertical motion vector 26 ;// component in 1/4 pixel unit; valid in the range [0,3] 27 ;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must 28 ;// be equal to either 4, 8, or 16. 29 ;// [out] pDst Pointer to the destination frame buffer. 30 ;// if roi.width==4, 4-byte alignment required 31 ;// if roi.width==8, 8-byte alignment required 32 ;// if roi.width==16, 16-byte alignment required 33 ;// 34 ;// Return Value: 35 ;// If the function runs without error, it returns OMX_Sts_NoErr. 36 ;// It is assued that following cases are satisfied before calling this function: 37 ;// pSrc or pDst is not NULL. 38 ;// srcStep or dstStep >= roi.width. 39 ;// dx or dy is in the range [0-3]. 40 ;// roi.width or roi.height is not out of range {4, 8, 16}. 41 ;// If roi.width is equal to 4, Dst is 4 byte aligned. 42 ;// If roi.width is equal to 8, pDst is 8 byte aligned. 43 ;// If roi.width is equal to 16, pDst is 16 byte aligned. 44 ;// srcStep and dstStep is multiple of 8. 45 ;// 46 ;// 47 48 49 INCLUDE omxtypes_s.h 50 INCLUDE armCOMM_s.h 51 52 M_VARIANTS ARM1136JS 53 54 EXPORT omxVCM4P10_InterpolateLuma 55 56 IF ARM1136JS 57 IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe 58 IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 59 IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 60 IMPORT armVCM4P10_Average_4x4_Align0_unsafe 61 IMPORT armVCM4P10_Average_4x4_Align2_unsafe 62 IMPORT armVCM4P10_Average_4x4_Align3_unsafe 63 IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 64 IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 65 ENDIF 66 67 IF ARM1136JS 68 IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 69 IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 70 IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 71 IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 72 ENDIF 73 74 75 76 ;// Declare input registers 77 pSrc RN 0 78 srcStep RN 1 79 pDst RN 2 80 dstStep RN 3 81 iHeight RN 4 82 iWidth RN 5 83 84 ;// Declare other intermediate registers 85 idx RN 6 86 idy RN 7 87 index RN 6 88 Temp RN 12 89 pArgs RN 11 90 91 92 ;// End of CortexA8 93 94 ;//------------------------------------------------------------------------------------------------------------------------- 95 ;//------------------------------------------------------------------------------------------------------------------------- 96 IF ARM1136JS 97 98 99 M_ALLOC4 ppDst, 8 100 M_ALLOC4 ppSrc, 8 101 M_ALLOC4 ppArgs, 16 102 M_ALLOC4 pBuffer, 120 ;// 120 = 12x10 103 M_ALLOC8 pInterBuf, 120 ;// 120 = 12*5*2 104 M_ALLOC8 pTempBuf, 32 ;// 32 = 8*4 105 106 ;// Function header 107 ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time. 108 ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed. 109 ;// Registers r4, r5, r6 to be preserved by internal unsafe functions 110 ;// r4 - iHeight 111 ;// r5 - iWidth 112 ;// r6 - index 113 M_START omxVCM4P10_InterpolateLuma, r11 114 115 ;// Declare other intermediate registers 116 idx RN 6 117 idy RN 7 118 index RN 6 119 Temp RN 12 120 pArgs RN 11 121 122 pBuf RN 8 123 Height RN 9 124 bufStep RN 9 125 126 ;// Define stack arguments 127 M_ARG ptridx, 4 128 M_ARG ptridy, 4 129 M_ARG ptrWidth, 4 130 M_ARG ptrHeight, 4 131 132 ;// Load structure elements of roi 133 M_LDR idx, ptridx 134 M_LDR idy, ptridy 135 M_LDR iWidth, ptrWidth 136 M_LDR iHeight, ptrHeight 137 138 M_PRINTF "roi.width %d\n", iWidth 139 M_PRINTF "roi.height %d\n", iHeight 140 141 ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx] 142 M_ADR pArgs, ppArgs 143 144 InterpolateLuma 145 Block4x4WidthLoop 146 Block4x4HeightLoop 147 148 STM pArgs, {pSrc,srcStep,pDst,dstStep} 149 M_ADR pBuf, pBuffer 150 151 ;// switch table using motion vector as index 152 M_SWITCH index, L 153 M_CASE Case_0 154 M_CASE Case_1 155 M_CASE Case_2 156 M_CASE Case_3 157 M_CASE Case_4 158 M_CASE Case_5 159 M_CASE Case_6 160 M_CASE Case_7 161 M_CASE Case_8 162 M_CASE Case_9 163 M_CASE Case_a 164 M_CASE Case_b 165 M_CASE Case_c 166 M_CASE Case_d 167 M_CASE Case_e 168 M_CASE Case_f 169 M_ENDSWITCH 170 171 Case_0 172 ;// Case G 173 M_PRINTF "Case 0 \n" 174 175 BL armVCM4P10_InterpolateLuma_Copy4x4_unsafe 176 B Block4x4LoopEnd 177 178 Case_1 179 ;// Case a 180 M_PRINTF "Case 1 \n" 181 182 SUB pSrc, pSrc, #2 183 MOV Height, #4 184 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 185 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 186 BL armVCM4P10_Average_4x4_Align2_unsafe 187 B Block4x4LoopEnd 188 Case_2 189 ;// Case b 190 M_PRINTF "Case 2 \n" 191 192 SUB pSrc, pSrc, #2 193 MOV Height, #4 194 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 195 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 196 B Block4x4LoopEnd 197 Case_3 198 ;// Case c 199 M_PRINTF "Case 3 \n" 200 201 SUB pSrc, pSrc, #2 202 MOV Height, #4 203 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 204 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 205 BL armVCM4P10_Average_4x4_Align3_unsafe 206 B Block4x4LoopEnd 207 Case_4 208 ;// Case d 209 M_PRINTF "Case 4 \n" 210 211 SUB pSrc, pSrc, srcStep, LSL #1 212 MOV Height, #9 213 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 214 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 215 BL armVCM4P10_Average_4x4_Align0_unsafe 216 217 B Block4x4LoopEnd 218 Case_5 219 ;// Case e 220 M_PRINTF "Case 5 \n" 221 222 SUB pSrc, pSrc, #2 223 MOV Height, #4 224 M_ADR pDst, pTempBuf 225 MOV dstStep, #4 226 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 227 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 228 M_ADR pArgs, ppArgs 229 LDM pArgs, {pSrc, srcStep, pDst, dstStep} 230 SUB pSrc, pSrc, srcStep, LSL #1 231 M_ADR pBuf, pBuffer 232 MOV Height, #9 233 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 234 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 235 M_ADR pSrc, pTempBuf 236 MOV srcStep, #4 237 BL armVCM4P10_Average_4x4_Align0_unsafe 238 239 240 B Block4x4LoopEnd 241 Case_6 242 ;// Case f 243 M_PRINTF "Case 6 \n" 244 245 SUB pSrc, pSrc, #2 246 SUB pSrc, pSrc, srcStep, LSL #1 247 MOV Height, #9 248 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 249 M_ADR pBuf, pInterBuf 250 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 251 M_ADR idy, pTempBuf 252 BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 253 BL armVCM4P10_Average_4x4_Align0_unsafe 254 B Block4x4LoopEnd 255 Case_7 256 ;// Case g 257 M_PRINTF "Case 7 \n" 258 259 SUB pSrc, pSrc, #2 260 MOV Height, #4 261 M_ADR pDst, pTempBuf 262 MOV dstStep, #4 263 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 264 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 265 M_ADR pArgs, ppArgs 266 LDM pArgs, {pSrc, srcStep, pDst, dstStep} 267 SUB pSrc, pSrc, srcStep, LSL #1 268 ADD pSrc, pSrc, #1 269 M_ADR pBuf, pBuffer 270 MOV Height, #9 271 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 272 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 273 M_ADR pSrc, pTempBuf 274 MOV srcStep, #4 275 BL armVCM4P10_Average_4x4_Align0_unsafe 276 277 B Block4x4LoopEnd 278 Case_8 279 ;// Case h 280 M_PRINTF "Case 8 \n" 281 282 SUB pSrc, pSrc, srcStep, LSL #1 283 MOV Height, #9 284 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 285 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 286 B Block4x4LoopEnd 287 Case_9 288 ;// Case i 289 M_PRINTF "Case 9 \n" 290 291 SUB pSrc, pSrc, #2 292 SUB pSrc, pSrc, srcStep, LSL #1 293 MOV Height, #9 294 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 295 ADD pSrc, pSrc, srcStep, LSL #1 296 M_ADR pBuf, pInterBuf 297 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 298 M_ADR idy, pTempBuf 299 BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 300 BL armVCM4P10_Average_4x4_Align2_unsafe 301 B Block4x4LoopEnd 302 Case_a 303 ;// Case j 304 M_PRINTF "Case a \n" 305 306 SUB pSrc, pSrc, #2 307 SUB pSrc, pSrc, srcStep, LSL #1 308 MOV Height, #9 309 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 310 ADD pSrc, pSrc, srcStep, LSL #1 311 M_ADR pBuf, pInterBuf 312 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 313 B Block4x4LoopEnd 314 Case_b 315 ;// Case k 316 M_PRINTF "Case b \n" 317 SUB pSrc, pSrc, #2 318 SUB pSrc, pSrc, srcStep, LSL #1 319 MOV Height, #9 320 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 321 ADD pSrc, pSrc, srcStep, LSL #1 322 M_ADR pBuf, pInterBuf 323 BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe 324 M_ADR idy, pTempBuf 325 BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe 326 BL armVCM4P10_Average_4x4_Align3_unsafe 327 B Block4x4LoopEnd 328 Case_c 329 ;// Case n 330 M_PRINTF "Case c \n" 331 332 SUB pSrc, pSrc, srcStep, LSL #1 333 MOV Height, #9 334 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 335 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 336 ADD pSrc, pSrc, srcStep ;// Update pSrc to one row down 337 BL armVCM4P10_Average_4x4_Align0_unsafe 338 B Block4x4LoopEnd 339 Case_d 340 ;// Case p 341 M_PRINTF "Case d \n" 342 SUB pSrc, pSrc, #2 343 ADD pSrc, pSrc, srcStep 344 MOV Height, #4 345 M_ADR pDst, pTempBuf 346 MOV dstStep, #4 347 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 348 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 349 M_ADR pArgs, ppArgs 350 LDM pArgs, {pSrc, srcStep, pDst, dstStep} 351 SUB pSrc, pSrc, srcStep, LSL #1 352 M_ADR pBuf, pBuffer 353 MOV Height, #9 354 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 355 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 356 M_ADR pSrc, pTempBuf 357 MOV srcStep, #4 358 BL armVCM4P10_Average_4x4_Align0_unsafe 359 B Block4x4LoopEnd 360 Case_e 361 ;// Case q 362 M_PRINTF "Case e \n" 363 364 SUB pSrc, pSrc, #2 365 SUB pSrc, pSrc, srcStep, LSL #1 366 MOV Height, #9 367 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 368 M_ADR pBuf, pInterBuf 369 BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe 370 M_ADR idy, pTempBuf 371 BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 372 ADD pSrc, pSrc, #4 373 BL armVCM4P10_Average_4x4_Align0_unsafe 374 375 B Block4x4LoopEnd 376 Case_f 377 ;// Case r 378 M_PRINTF "Case f \n" 379 SUB pSrc, pSrc, #2 380 ADD pSrc, pSrc, srcStep 381 MOV Height, #4 382 M_ADR pDst, pTempBuf 383 MOV dstStep, #4 384 BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe 385 BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe 386 M_ADR pArgs, ppArgs 387 LDM pArgs, {pSrc, srcStep, pDst, dstStep} 388 SUB pSrc, pSrc, srcStep, LSL #1 389 ADD pSrc, pSrc, #1 390 M_ADR pBuf, pBuffer 391 MOV Height, #9 392 BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe 393 BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe 394 M_ADR pSrc, pTempBuf 395 MOV srcStep, #4 396 BL armVCM4P10_Average_4x4_Align0_unsafe 397 398 Block4x4LoopEnd 399 400 ;// Width Loop 401 SUBS iWidth, iWidth, #4 402 M_ADR pArgs, ppArgs 403 LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments 404 ADD pSrc, pSrc, #4 405 ADD pDst, pDst, #4 406 BGT Block4x4WidthLoop 407 408 ;// Height Loop 409 SUBS iHeight, iHeight, #4 410 M_LDR iWidth, ptrWidth 411 M_ADR pArgs, ppArgs 412 ADD pSrc, pSrc, srcStep, LSL #2 413 ADD pDst, pDst, dstStep, LSL #2 414 SUB pSrc, pSrc, iWidth 415 SUB pDst, pDst, iWidth 416 BGT Block4x4HeightLoop 417 418 EndOfInterpolation 419 MOV r0, #0 420 M_END 421 422 ENDIF 423 424 425 END 426