Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  omxVCM4P10_InterpolateLuma_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   9641
     21 ;// Date:       Thursday, February 7, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27 ;// Function:
     28 ;//     omxVCM4P10_InterpolateLuma
     29 ;//
     30 ;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
     31 ;// Performs quarter pel interpolation of inter luma MB.
     32 ;// It's assumed that the frame is already padded when calling this function.
     33 ;// Parameters:
     34 ;// [in]    pSrc        Pointer to the source reference frame buffer
     35 ;// [in]    srcStep     Reference frame step in byte
     36 ;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
     37 ;// [in]    dx          Fractional part of horizontal motion vector
     38 ;//                         component in 1/4 pixel unit; valid in the range [0,3]
     39 ;// [in]    dy          Fractional part of vertical motion vector
     40 ;//                         component in 1/4 pixel unit; valid in the range [0,3]
     41 ;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
     42 ;//                         be equal to either 4, 8, or 16.
     43 ;// [out]   pDst        Pointer to the destination frame buffer.
     44 ;//                   if roi.width==4,  4-byte alignment required
     45 ;//                   if roi.width==8,  8-byte alignment required
     46 ;//                   if roi.width==16, 16-byte alignment required
     47 ;//
     48 ;// Return Value:
     49 ;// If the function runs without error, it returns OMX_Sts_NoErr.
     50 ;// It is assued that following cases are satisfied before calling this function:
     51 ;//  pSrc or pDst is not NULL.
     52 ;//  srcStep or dstStep >= roi.width.
     53 ;//     dx or dy is in the range [0-3].
     54 ;//     roi.width or roi.height is not out of range {4, 8, 16}.
     55 ;//     If roi.width is equal to 4, Dst is 4 byte aligned.
     56 ;//     If roi.width is equal to 8, pDst is 8 byte aligned.
     57 ;//     If roi.width is equal to 16, pDst is 16 byte aligned.
     58 ;//     srcStep and dstStep is multiple of 8.
     59 ;//
     60 ;//
     61 
     62 
     63         INCLUDE omxtypes_s.h
     64         INCLUDE armCOMM_s.h
     65 
     66         M_VARIANTS ARM1136JS
     67 
     68         EXPORT omxVCM4P10_InterpolateLuma
     69 
     70     IF ARM1136JS
     71         IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
     72         IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
     73         IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
     74         IMPORT armVCM4P10_Average_4x4_Align0_unsafe
     75         IMPORT armVCM4P10_Average_4x4_Align2_unsafe
     76         IMPORT armVCM4P10_Average_4x4_Align3_unsafe
     77         IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
     78         IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
     79     ENDIF
     80 
     81     IF ARM1136JS
     82         IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
     83         IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
     84         IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
     85         IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
     86     ENDIF
     87 
     88 
     89 
     90 ;// Declare input registers
     91 pSrc            RN 0
     92 srcStep         RN 1
     93 pDst            RN 2
     94 dstStep         RN 3
     95 iHeight         RN 4
     96 iWidth          RN 5
     97 
     98 ;// Declare other intermediate registers
     99 idx             RN 6
    100 idy             RN 7
    101 index           RN 6
    102 Temp            RN 12
    103 pArgs           RN 11
    104 
    105 
    106         ;// End of CortexA8
    107 
    108 ;//-------------------------------------------------------------------------------------------------------------------------
    109 ;//-------------------------------------------------------------------------------------------------------------------------
    110     IF ARM1136JS
    111 
    112 
    113         M_ALLOC4 ppDst, 8
    114         M_ALLOC4 ppSrc, 8
    115         M_ALLOC4 ppArgs, 16
    116         M_ALLOC4 pBuffer, 120                           ;// 120 = 12x10
    117         M_ALLOC8 pInterBuf, 120                         ;// 120 = 12*5*2
    118         M_ALLOC8 pTempBuf, 32                           ;// 32 =  8*4
    119 
    120         ;// Function header
    121         ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
    122         ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed.
    123         ;// Registers r4, r5, r6 to be preserved by internal unsafe functions
    124         ;// r4 - iHeight
    125         ;// r5 - iWidth
    126         ;// r6 - index
    127         M_START omxVCM4P10_InterpolateLuma, r11
    128 
    129 ;// Declare other intermediate registers
    130 idx             RN 6
    131 idy             RN 7
    132 index           RN 6
    133 Temp            RN 12
    134 pArgs           RN 11
    135 
    136 pBuf            RN 8
    137 Height          RN 9
    138 bufStep         RN 9
    139 
    140         ;// Define stack arguments
    141         M_ARG   ptridx, 4
    142         M_ARG   ptridy, 4
    143         M_ARG   ptrWidth, 4
    144         M_ARG   ptrHeight, 4
    145 
    146         ;// Load structure elements of roi
    147         M_LDR   idx, ptridx
    148         M_LDR   idy, ptridy
    149         M_LDR   iWidth, ptrWidth
    150         M_LDR   iHeight, ptrHeight
    151 
    152         M_PRINTF "roi.width %d\n", iWidth
    153         M_PRINTF "roi.height %d\n", iHeight
    154 
    155         ADD     index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
    156         M_ADR   pArgs, ppArgs
    157 
    158 InterpolateLuma
    159 Block4x4WidthLoop
    160 Block4x4HeightLoop
    161 
    162         STM     pArgs, {pSrc,srcStep,pDst,dstStep}
    163         M_ADR   pBuf, pBuffer
    164 
    165         ;// switch table using motion vector as index
    166         M_SWITCH index, L
    167         M_CASE  Case_0
    168         M_CASE  Case_1
    169         M_CASE  Case_2
    170         M_CASE  Case_3
    171         M_CASE  Case_4
    172         M_CASE  Case_5
    173         M_CASE  Case_6
    174         M_CASE  Case_7
    175         M_CASE  Case_8
    176         M_CASE  Case_9
    177         M_CASE  Case_a
    178         M_CASE  Case_b
    179         M_CASE  Case_c
    180         M_CASE  Case_d
    181         M_CASE  Case_e
    182         M_CASE  Case_f
    183         M_ENDSWITCH
    184 
    185 Case_0
    186         ;// Case G
    187         M_PRINTF "Case 0 \n"
    188 
    189         BL      armVCM4P10_InterpolateLuma_Copy4x4_unsafe
    190         B       Block4x4LoopEnd
    191 
    192 Case_1
    193         ;// Case a
    194         M_PRINTF "Case 1 \n"
    195 
    196         SUB     pSrc, pSrc, #2
    197         MOV     Height, #4
    198         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    199         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    200         BL      armVCM4P10_Average_4x4_Align2_unsafe
    201         B       Block4x4LoopEnd
    202 Case_2
    203         ;// Case b
    204         M_PRINTF "Case 2 \n"
    205 
    206         SUB     pSrc, pSrc, #2
    207         MOV     Height, #4
    208         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    209         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    210         B       Block4x4LoopEnd
    211 Case_3
    212         ;// Case c
    213         M_PRINTF "Case 3 \n"
    214 
    215         SUB     pSrc, pSrc, #2
    216         MOV     Height, #4
    217         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    218         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    219         BL      armVCM4P10_Average_4x4_Align3_unsafe
    220         B       Block4x4LoopEnd
    221 Case_4
    222         ;// Case d
    223         M_PRINTF "Case 4 \n"
    224 
    225         SUB     pSrc, pSrc, srcStep, LSL #1
    226         MOV     Height, #9
    227         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    228         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    229         BL      armVCM4P10_Average_4x4_Align0_unsafe
    230 
    231         B       Block4x4LoopEnd
    232 Case_5
    233         ;// Case e
    234         M_PRINTF "Case 5 \n"
    235 
    236         SUB     pSrc, pSrc, #2
    237         MOV     Height, #4
    238         M_ADR   pDst, pTempBuf
    239         MOV     dstStep, #4
    240         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    241         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    242         M_ADR   pArgs, ppArgs
    243         LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
    244         SUB     pSrc, pSrc, srcStep, LSL #1
    245         M_ADR   pBuf, pBuffer
    246         MOV     Height, #9
    247         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    248         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    249         M_ADR   pSrc, pTempBuf
    250         MOV     srcStep, #4
    251         BL      armVCM4P10_Average_4x4_Align0_unsafe
    252 
    253 
    254         B       Block4x4LoopEnd
    255 Case_6
    256         ;// Case f
    257         M_PRINTF "Case 6 \n"
    258 
    259         SUB     pSrc, pSrc, #2
    260         SUB     pSrc, pSrc, srcStep, LSL #1
    261         MOV     Height, #9
    262         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    263         M_ADR   pBuf, pInterBuf
    264         BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    265         M_ADR   idy, pTempBuf
    266         BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
    267         BL      armVCM4P10_Average_4x4_Align0_unsafe
    268         B       Block4x4LoopEnd
    269 Case_7
    270         ;// Case g
    271         M_PRINTF "Case 7 \n"
    272 
    273         SUB     pSrc, pSrc, #2
    274         MOV     Height, #4
    275         M_ADR   pDst, pTempBuf
    276         MOV     dstStep, #4
    277         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    278         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    279         M_ADR   pArgs, ppArgs
    280         LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
    281         SUB     pSrc, pSrc, srcStep, LSL #1
    282         ADD     pSrc, pSrc, #1
    283         M_ADR   pBuf, pBuffer
    284         MOV     Height, #9
    285         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    286         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    287         M_ADR   pSrc, pTempBuf
    288         MOV     srcStep, #4
    289         BL      armVCM4P10_Average_4x4_Align0_unsafe
    290 
    291         B       Block4x4LoopEnd
    292 Case_8
    293         ;// Case h
    294         M_PRINTF "Case 8 \n"
    295 
    296         SUB     pSrc, pSrc, srcStep, LSL #1
    297         MOV     Height, #9
    298         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    299         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    300         B       Block4x4LoopEnd
    301 Case_9
    302         ;// Case i
    303         M_PRINTF "Case 9 \n"
    304 
    305         SUB     pSrc, pSrc, #2
    306         SUB     pSrc, pSrc, srcStep, LSL #1
    307         MOV     Height, #9
    308         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    309         ADD     pSrc, pSrc, srcStep, LSL #1
    310         M_ADR   pBuf, pInterBuf
    311         BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    312         M_ADR   idy, pTempBuf
    313         BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
    314         BL      armVCM4P10_Average_4x4_Align2_unsafe
    315         B       Block4x4LoopEnd
    316 Case_a
    317         ;// Case j
    318         M_PRINTF "Case a \n"
    319 
    320         SUB     pSrc, pSrc, #2
    321         SUB     pSrc, pSrc, srcStep, LSL #1
    322         MOV     Height, #9
    323         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    324         ADD     pSrc, pSrc, srcStep, LSL #1
    325         M_ADR   pBuf, pInterBuf
    326         BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    327         B       Block4x4LoopEnd
    328 Case_b
    329         ;// Case k
    330         M_PRINTF "Case b \n"
    331         SUB     pSrc, pSrc, #2
    332         SUB     pSrc, pSrc, srcStep, LSL #1
    333         MOV     Height, #9
    334         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    335         ADD     pSrc, pSrc, srcStep, LSL #1
    336         M_ADR   pBuf, pInterBuf
    337         BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    338         M_ADR   idy, pTempBuf
    339         BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
    340         BL      armVCM4P10_Average_4x4_Align3_unsafe
    341         B       Block4x4LoopEnd
    342 Case_c
    343         ;// Case n
    344         M_PRINTF "Case c \n"
    345 
    346         SUB     pSrc, pSrc, srcStep, LSL #1
    347         MOV     Height, #9
    348         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    349         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    350         ADD     pSrc, pSrc, srcStep                     ;// Update pSrc to one row down
    351         BL      armVCM4P10_Average_4x4_Align0_unsafe
    352         B       Block4x4LoopEnd
    353 Case_d
    354         ;// Case p
    355         M_PRINTF "Case d \n"
    356         SUB     pSrc, pSrc, #2
    357         ADD     pSrc, pSrc, srcStep
    358         MOV     Height, #4
    359         M_ADR   pDst, pTempBuf
    360         MOV     dstStep, #4
    361         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    362         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    363         M_ADR   pArgs, ppArgs
    364         LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
    365         SUB     pSrc, pSrc, srcStep, LSL #1
    366         M_ADR   pBuf, pBuffer
    367         MOV     Height, #9
    368         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    369         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    370         M_ADR   pSrc, pTempBuf
    371         MOV     srcStep, #4
    372         BL      armVCM4P10_Average_4x4_Align0_unsafe
    373         B       Block4x4LoopEnd
    374 Case_e
    375         ;// Case q
    376         M_PRINTF "Case e \n"
    377 
    378         SUB     pSrc, pSrc, #2
    379         SUB     pSrc, pSrc, srcStep, LSL #1
    380         MOV     Height, #9
    381         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    382         M_ADR   pBuf, pInterBuf
    383         BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    384         M_ADR   idy, pTempBuf
    385         BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
    386         ADD     pSrc, pSrc, #4
    387         BL      armVCM4P10_Average_4x4_Align0_unsafe
    388 
    389         B       Block4x4LoopEnd
    390 Case_f
    391         ;// Case r
    392         M_PRINTF "Case f \n"
    393         SUB     pSrc, pSrc, #2
    394         ADD     pSrc, pSrc, srcStep
    395         MOV     Height, #4
    396         M_ADR   pDst, pTempBuf
    397         MOV     dstStep, #4
    398         BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
    399         BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    400         M_ADR   pArgs, ppArgs
    401         LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
    402         SUB     pSrc, pSrc, srcStep, LSL #1
    403         ADD     pSrc, pSrc, #1
    404         M_ADR   pBuf, pBuffer
    405         MOV     Height, #9
    406         BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    407         BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    408         M_ADR   pSrc, pTempBuf
    409         MOV     srcStep, #4
    410         BL      armVCM4P10_Average_4x4_Align0_unsafe
    411 
    412 Block4x4LoopEnd
    413 
    414         ;// Width Loop
    415         SUBS    iWidth, iWidth, #4
    416         M_ADR   pArgs, ppArgs
    417         LDM     pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
    418         ADD     pSrc, pSrc, #4
    419         ADD     pDst, pDst, #4
    420         BGT     Block4x4WidthLoop
    421 
    422         ;// Height Loop
    423         SUBS    iHeight, iHeight, #4
    424         M_LDR   iWidth, ptrWidth
    425         M_ADR   pArgs, ppArgs
    426         ADD     pSrc, pSrc, srcStep, LSL #2
    427         ADD     pDst, pDst, dstStep, LSL #2
    428         SUB     pSrc, pSrc, iWidth
    429         SUB     pDst, pDst, iWidth
    430         BGT     Block4x4HeightLoop
    431 
    432 EndOfInterpolation
    433         MOV     r0, #0
    434         M_END
    435 
    436     ENDIF
    437 
    438 
    439     END
    440 
    441