Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  omxVCM4P10_InterpolateLuma_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13 ;// Function:
     14 ;//     omxVCM4P10_InterpolateLuma
     15 ;//
     16 ;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
     17 ;// Performs quarter pel interpolation of inter luma MB.
     18 ;// It's assumed that the frame is already padded when calling this function.
     19 ;// Parameters:
     20 ;// [in]    pSrc        Pointer to the source reference frame buffer
     21 ;// [in]    srcStep     Reference frame step in byte
     22 ;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
     23 ;// [in]    dx          Fractional part of horizontal motion vector
     24 ;//                         component in 1/4 pixel unit; valid in the range [0,3]
     25 ;// [in]    dy          Fractional part of vertical motion vector
     26 ;//                         component in 1/4 pixel unit; valid in the range [0,3]
     27 ;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
     28 ;//                         be equal to either 4, 8, or 16.
     29 ;// [out]   pDst        Pointer to the destination frame buffer.
     30 ;//                   if roi.width==4,  4-byte alignment required
     31 ;//                   if roi.width==8,  8-byte alignment required
     32 ;//                   if roi.width==16, 16-byte alignment required
     33 ;//
     34 ;// Return Value:
     35 ;// If the function runs without error, it returns OMX_Sts_NoErr.
     36 ;// It is assued that following cases are satisfied before calling this function:
     37 ;//  pSrc or pDst is not NULL.
     38 ;//  srcStep or dstStep >= roi.width.
     39 ;//     dx or dy is in the range [0-3].
     40 ;//     roi.width or roi.height is not out of range {4, 8, 16}.
     41 ;//     If roi.width is equal to 4, Dst is 4 byte aligned.
     42 ;//     If roi.width is equal to 8, pDst is 8 byte aligned.
     43 ;//     If roi.width is equal to 16, pDst is 16 byte aligned.
     44 ;//     srcStep and dstStep is multiple of 8.
     45 ;//
     46 ;//
     47 
     48 
     49         INCLUDE omxtypes_s.h
     50         INCLUDE armCOMM_s.h
     51 
     52         M_VARIANTS ARM1136JS
     53 
     54         EXPORT omxVCM4P10_InterpolateLuma
     55 
     56     IF ARM1136JS
     57         IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
     58         IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
     59         IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
     60         IMPORT armVCM4P10_Average_4x4_Align0_unsafe
     61         IMPORT armVCM4P10_Average_4x4_Align2_unsafe
     62         IMPORT armVCM4P10_Average_4x4_Align3_unsafe
     63         IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
     64         IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
     65     ENDIF
     66 
     67     IF ARM1136JS
     68         IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
     69         IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
     70         IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
     71         IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
     72     ENDIF
     73 
     74 
     75 
     76 ;// Declare input registers
     77 pSrc            RN 0
     78 srcStep         RN 1
     79 pDst            RN 2
     80 dstStep         RN 3
     81 iHeight         RN 4
     82 iWidth          RN 5
     83 
     84 ;// Declare other intermediate registers
     85 idx             RN 6
     86 idy             RN 7
     87 index           RN 6
     88 Temp            RN 12
     89 pArgs           RN 11
     90 
     91 
     92         ;// End of CortexA8
     93 
     94 ;//-------------------------------------------------------------------------------------------------------------------------
     95 ;//-------------------------------------------------------------------------------------------------------------------------
     96     IF ARM1136JS
     97 
     98 
     99         M_ALLOC4 ppDst, 8
    100         M_ALLOC4 ppSrc, 8
    101         M_ALLOC4 ppArgs, 16
    102         M_ALLOC4 pBuffer, 120                           ;// 120 = 12x10
    103         M_ALLOC8 pInterBuf, 120                         ;// 120 = 12*5*2
    104         M_ALLOC8 pTempBuf, 32                           ;// 32 =  8*4
    105 
    106         ;// Function header
    107         ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
    108         ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed.
    109         ;// Registers r4, r5, r6 to be preserved by internal unsafe functions
    110         ;// r4 - iHeight
    111         ;// r5 - iWidth
    112         ;// r6 - index
    113         M_START omxVCM4P10_InterpolateLuma, r11
    114 
    115 ;// Declare other intermediate registers
    116 idx             RN 6
    117 idy             RN 7
    118 index           RN 6
    119 Temp            RN 12
    120 pArgs           RN 11
    121 
    122 pBuf            RN 8
    123 Height          RN 9
    124 bufStep         RN 9
    125 
    126         ;// Define stack arguments
    127         M_ARG   ptridx, 4
    128         M_ARG   ptridy, 4
    129         M_ARG   ptrWidth, 4
    130         M_ARG   ptrHeight, 4
    131 
    132         ;// Load structure elements of roi
    133         M_LDR   idx, ptridx
    134         M_LDR   idy, ptridy
    135         M_LDR   iWidth, ptrWidth
    136         M_LDR   iHeight, ptrHeight
    137 
    138         M_PRINTF "roi.width %d\n", iWidth
    139         M_PRINTF "roi.height %d\n", iHeight
    140 
    141         ADD     index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
    142         M_ADR   pArgs, ppArgs
    143 
    144 InterpolateLuma
    145 Block4x4WidthLoop
    146 Block4x4HeightLoop
    147 
    148         STM     pArgs, {pSrc,srcStep,pDst,dstStep}
    149         M_ADR   pBuf, pBuffer
    150 
    151         ;// switch table using motion vector as index
    152         M_SWITCH index, L
    153         M_CASE  Case_0
    154         M_CASE  Case_1
    155         M_CASE  Case_2
    156         M_CASE  Case_3
    157         M_CASE  Case_4
    158         M_CASE  Case_5
    159         M_CASE  Case_6
    160         M_CASE  Case_7
    161         M_CASE  Case_8
    162         M_CASE  Case_9
    163         M_CASE  Case_a
    164         M_CASE  Case_b
    165         M_CASE  Case_c
    166         M_CASE  Case_d
    167         M_CASE  Case_e
    168         M_CASE  Case_f
    169         M_ENDSWITCH
    170 
    171 Case_0
    172         ;// Case G
    173         M_PRINTF "Case 0 \n"
    174 
    175         BL      armVCM4P10_InterpolateLuma_Copy4x4_unsafe
    176         B       Block4x4LoopEnd
    177 
    178 Case_1
    179         ;// Case a
    180         M_PRINTF "Case 1 \n"
    181 
    182         SUB     pSrc, pSrc, #2
    183         MOV     Height, #4
    184         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    185         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    186         BL      armVCM4P10_Average_4x4_Align2_unsafe
    187         B       Block4x4LoopEnd
    188 Case_2
    189         ;// Case b
    190         M_PRINTF "Case 2 \n"
    191 
    192         SUB     pSrc, pSrc, #2
    193         MOV     Height, #4
    194         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    195         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    196         B       Block4x4LoopEnd
    197 Case_3
    198         ;// Case c
    199         M_PRINTF "Case 3 \n"
    200 
    201         SUB     pSrc, pSrc, #2
    202         MOV     Height, #4
    203         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    204         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    205         BL      armVCM4P10_Average_4x4_Align3_unsafe
    206         B       Block4x4LoopEnd
    207 Case_4
    208         ;// Case d
    209         M_PRINTF "Case 4 \n"
    210 
    211         SUB     pSrc, pSrc, srcStep, LSL #1
    212         MOV     Height, #9
    213         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    214         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    215         BL      armVCM4P10_Average_4x4_Align0_unsafe
    216 
    217         B       Block4x4LoopEnd
    218 Case_5
    219         ;// Case e
    220         M_PRINTF "Case 5 \n"
    221 
    222         SUB     pSrc, pSrc, #2
    223         MOV     Height, #4
    224         M_ADR   pDst, pTempBuf
    225         MOV     dstStep, #4
    226         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    227         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    228         M_ADR   pArgs, ppArgs
    229         LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
    230         SUB     pSrc, pSrc, srcStep, LSL #1
    231         M_ADR   pBuf, pBuffer
    232         MOV     Height, #9
    233         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    234         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    235         M_ADR   pSrc, pTempBuf
    236         MOV     srcStep, #4
    237         BL      armVCM4P10_Average_4x4_Align0_unsafe
    238 
    239 
    240         B       Block4x4LoopEnd
    241 Case_6
    242         ;// Case f
    243         M_PRINTF "Case 6 \n"
    244 
    245         SUB     pSrc, pSrc, #2
    246         SUB     pSrc, pSrc, srcStep, LSL #1
    247         MOV     Height, #9
    248         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    249         M_ADR   pBuf, pInterBuf
    250         BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    251         M_ADR   idy, pTempBuf
    252         BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
    253         BL      armVCM4P10_Average_4x4_Align0_unsafe
    254         B       Block4x4LoopEnd
    255 Case_7
    256         ;// Case g
    257         M_PRINTF "Case 7 \n"
    258 
    259         SUB     pSrc, pSrc, #2
    260         MOV     Height, #4
    261         M_ADR   pDst, pTempBuf
    262         MOV     dstStep, #4
    263         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    264         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    265         M_ADR   pArgs, ppArgs
    266         LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
    267         SUB     pSrc, pSrc, srcStep, LSL #1
    268         ADD     pSrc, pSrc, #1
    269         M_ADR   pBuf, pBuffer
    270         MOV     Height, #9
    271         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    272         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    273         M_ADR   pSrc, pTempBuf
    274         MOV     srcStep, #4
    275         BL      armVCM4P10_Average_4x4_Align0_unsafe
    276 
    277         B       Block4x4LoopEnd
    278 Case_8
    279         ;// Case h
    280         M_PRINTF "Case 8 \n"
    281 
    282         SUB     pSrc, pSrc, srcStep, LSL #1
    283         MOV     Height, #9
    284         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    285         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    286         B       Block4x4LoopEnd
    287 Case_9
    288         ;// Case i
    289         M_PRINTF "Case 9 \n"
    290 
    291         SUB     pSrc, pSrc, #2
    292         SUB     pSrc, pSrc, srcStep, LSL #1
    293         MOV     Height, #9
    294         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    295         ADD     pSrc, pSrc, srcStep, LSL #1
    296         M_ADR   pBuf, pInterBuf
    297         BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    298         M_ADR   idy, pTempBuf
    299         BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
    300         BL      armVCM4P10_Average_4x4_Align2_unsafe
    301         B       Block4x4LoopEnd
    302 Case_a
    303         ;// Case j
    304         M_PRINTF "Case a \n"
    305 
    306         SUB     pSrc, pSrc, #2
    307         SUB     pSrc, pSrc, srcStep, LSL #1
    308         MOV     Height, #9
    309         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    310         ADD     pSrc, pSrc, srcStep, LSL #1
    311         M_ADR   pBuf, pInterBuf
    312         BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    313         B       Block4x4LoopEnd
    314 Case_b
    315         ;// Case k
    316         M_PRINTF "Case b \n"
    317         SUB     pSrc, pSrc, #2
    318         SUB     pSrc, pSrc, srcStep, LSL #1
    319         MOV     Height, #9
    320         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    321         ADD     pSrc, pSrc, srcStep, LSL #1
    322         M_ADR   pBuf, pInterBuf
    323         BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    324         M_ADR   idy, pTempBuf
    325         BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
    326         BL      armVCM4P10_Average_4x4_Align3_unsafe
    327         B       Block4x4LoopEnd
    328 Case_c
    329         ;// Case n
    330         M_PRINTF "Case c \n"
    331 
    332         SUB     pSrc, pSrc, srcStep, LSL #1
    333         MOV     Height, #9
    334         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    335         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    336         ADD     pSrc, pSrc, srcStep                     ;// Update pSrc to one row down
    337         BL      armVCM4P10_Average_4x4_Align0_unsafe
    338         B       Block4x4LoopEnd
    339 Case_d
    340         ;// Case p
    341         M_PRINTF "Case d \n"
    342         SUB     pSrc, pSrc, #2
    343         ADD     pSrc, pSrc, srcStep
    344         MOV     Height, #4
    345         M_ADR   pDst, pTempBuf
    346         MOV     dstStep, #4
    347         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    348         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    349         M_ADR   pArgs, ppArgs
    350         LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
    351         SUB     pSrc, pSrc, srcStep, LSL #1
    352         M_ADR   pBuf, pBuffer
    353         MOV     Height, #9
    354         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    355         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    356         M_ADR   pSrc, pTempBuf
    357         MOV     srcStep, #4
    358         BL      armVCM4P10_Average_4x4_Align0_unsafe
    359         B       Block4x4LoopEnd
    360 Case_e
    361         ;// Case q
    362         M_PRINTF "Case e \n"
    363 
    364         SUB     pSrc, pSrc, #2
    365         SUB     pSrc, pSrc, srcStep, LSL #1
    366         MOV     Height, #9
    367         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    368         M_ADR   pBuf, pInterBuf
    369         BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    370         M_ADR   idy, pTempBuf
    371         BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
    372         ADD     pSrc, pSrc, #4
    373         BL      armVCM4P10_Average_4x4_Align0_unsafe
    374 
    375         B       Block4x4LoopEnd
    376 Case_f
    377         ;// Case r
    378         M_PRINTF "Case f \n"
    379         SUB     pSrc, pSrc, #2
    380         ADD     pSrc, pSrc, srcStep
    381         MOV     Height, #4
    382         M_ADR   pDst, pTempBuf
    383         MOV     dstStep, #4
    384         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    385         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    386         M_ADR   pArgs, ppArgs
    387         LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
    388         SUB     pSrc, pSrc, srcStep, LSL #1
    389         ADD     pSrc, pSrc, #1
    390         M_ADR   pBuf, pBuffer
    391         MOV     Height, #9
    392         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    393         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    394         M_ADR   pSrc, pTempBuf
    395         MOV     srcStep, #4
    396         BL      armVCM4P10_Average_4x4_Align0_unsafe
    397 
    398 Block4x4LoopEnd
    399 
    400         ;// Width Loop
    401         SUBS    iWidth, iWidth, #4
    402         M_ADR   pArgs, ppArgs
    403         LDM     pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
    404         ADD     pSrc, pSrc, #4
    405         ADD     pDst, pDst, #4
    406         BGT     Block4x4WidthLoop
    407 
    408         ;// Height Loop
    409         SUBS    iHeight, iHeight, #4
    410         M_LDR   iWidth, ptrWidth
    411         M_ADR   pArgs, ppArgs
    412         ADD     pSrc, pSrc, srcStep, LSL #2
    413         ADD     pDst, pDst, dstStep, LSL #2
    414         SUB     pSrc, pSrc, iWidth
    415         SUB     pDst, pDst, iWidth
    416         BGT     Block4x4HeightLoop
    417 
    418 EndOfInterpolation
    419         MOV     r0, #0
    420         M_END
    421 
    422     ENDIF
    423 
    424 
    425     END
    426