Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  omxVCM4P10_InterpolateLuma_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   12290
      6 ;// Date:       Wednesday, April 9, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13 ;// Function:
     14 ;//     omxVCM4P10_InterpolateLuma
     15 ;//
     16 ;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
     17 ;// Performs quarter pel interpolation of inter luma MB.
     18 ;// It's assumed that the frame is already padded when calling this function.
     19 ;// Parameters:
     20 ;// [in]    pSrc        Pointer to the source reference frame buffer
     21 ;// [in]    srcStep     Reference frame step in byte
     22 ;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
     23 ;// [in]    dx          Fractional part of horizontal motion vector
     24 ;//                         component in 1/4 pixel unit; valid in the range [0,3]
     25 ;// [in]    dy          Fractional part of vertical motion vector
     26 ;//                         component in 1/4 pixel unit; valid in the range [0,3]
     27 ;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
     28 ;//                         be equal to either 4, 8, or 16.
     29 ;// [out]   pDst        Pointer to the destination frame buffer.
     30 ;//                   if roi.width==4,  4-byte alignment required
     31 ;//                   if roi.width==8,  8-byte alignment required
     32 ;//                   if roi.width==16, 16-byte alignment required
     33 ;//
     34 ;// Return Value:
     35 ;// If the function runs without error, it returns OMX_Sts_NoErr.
     36 ;// It is assued that following cases are satisfied before calling this function:
     37 ;//  pSrc or pDst is not NULL.
     38 ;//  srcStep or dstStep >= roi.width.
     39 ;//     dx or dy is in the range [0-3].
     40 ;//     roi.width or roi.height is not out of range {4, 8, 16}.
     41 ;//     If roi.width is equal to 4, Dst is 4 byte aligned.
     42 ;//     If roi.width is equal to 8, pDst is 8 byte aligned.
     43 ;//     If roi.width is equal to 16, pDst is 16 byte aligned.
     44 ;//     srcStep and dstStep is multiple of 8.
     45 ;//
     46 ;//
     47 
     48 
     49         INCLUDE omxtypes_s.h
     50         INCLUDE armCOMM_s.h
     51 
     52         M_VARIANTS CortexA8
     53 
     54         EXPORT omxVCM4P10_InterpolateLuma
     55 
     56 
     57     IF CortexA8
     58         IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
     59         IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
     60         IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
     61         IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
     62     ENDIF
     63 
     64 
     65 
     66 ;// Declare input registers
     67 pSrc            RN 0
     68 srcStep         RN 1
     69 pDst            RN 2
     70 dstStep         RN 3
     71 iHeight         RN 4
     72 iWidth          RN 5
     73 
     74 ;// Declare other intermediate registers
     75 idx             RN 6
     76 idy             RN 7
     77 index           RN 6
     78 Temp            RN 12
     79 pArgs           RN 11
     80 
     81 
     82     IF CortexA8
     83 
     84         ;//
     85         ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
     86         ;//
     87         M_ALLOC4    ppArgs, 16
     88 
     89         ;// Function header
     90         M_START omxVCM4P10_InterpolateLuma, r11, d15
     91 
     92 pSrcBK          RN 8
     93 
     94 ;// Declare Neon registers
     95 dCoeff5         DN 30.S16
     96 dCoeff20        DN 31.S16
     97 
     98 ;// Registers used for implementing Horizontal interpolation
     99 dSrc0c          DN 14.U8
    100 dSrc1c          DN 16.U8
    101 dSrc2c          DN 18.U8
    102 dSrc3c          DN 20.U8
    103 dSrc0d          DN 15.U8
    104 dSrc1d          DN 17.U8
    105 dSrc2d          DN 19.U8
    106 dSrc3d          DN 21.U8
    107 dAccH0          DN 22.U8
    108 dAccH1          DN 24.U8
    109 dAccH2          DN 26.U8
    110 dAccH3          DN 28.U8
    111 dResultH0       DN 22.U32
    112 dResultH1       DN 24.U32
    113 dResultH2       DN 26.U32
    114 dResultH3       DN 28.U32
    115 
    116 ;// Registers used for implementing Vertical interpolation
    117 dSrc0           DN 9.U8
    118 dSrc1           DN 10.U8
    119 dSrc2           DN 11.U8
    120 dSrc3           DN 12.U8
    121 dSrc4           DN 13.U8
    122 dAccV0          DN 0.U8
    123 dAccV1          DN 2.U8
    124 dAccV2          DN 4.U8
    125 dAccV3          DN 6.U8
    126 dResultV0       DN 0.U32
    127 dResultV1       DN 2.U32
    128 dResultV2       DN 4.U32
    129 dResultV3       DN 6.U32
    130 
    131 ;// Registers used for implementing Diagonal interpolation
    132 dTAcc0          DN 0.U8
    133 dTAcc1          DN 2.U8
    134 dTAcc2          DN 4.U8
    135 dTAcc3          DN 6.U8
    136 dTRes0          DN 0.32
    137 dTRes1          DN 2.32
    138 dTRes2          DN 4.32
    139 dTRes3          DN 6.32
    140 dTResult0       DN 14.U8
    141 dTResult1       DN 16.U8
    142 dTResult2       DN 18.U8
    143 dTResult3       DN 20.U8
    144 dTempP0         DN 18.S16
    145 dTempP1         DN 19.S16
    146 dTempQ0         DN 20.S16
    147 dTempQ1         DN 21.S16
    148 dTempR0         DN 22.S16
    149 dTempR1         DN 23.S16
    150 dTempS0         DN 24.S16
    151 dTempS1         DN 25.S16
    152 qTempP01        QN 9.S16
    153 qTempQ01        QN 10.S16
    154 qTempR01        QN 11.S16
    155 qTempS01        QN 12.S16
    156 
    157 ;// Intermediate values for averaging
    158 qRes2           QN 7.S16
    159 qRes3           QN 8.S16
    160 qRes4           QN 9.S16
    161 qRes5           QN 10.S16
    162 qRes6           QN 11.S16
    163 
    164 ;// For implementing copy
    165 dDst0            DN 9.32
    166 dDst1            DN 10.32
    167 dDst2            DN 11.32
    168 dDst3            DN 12.32
    169 
    170         ;// Define stack arguments
    171         M_ARG       ptridx, 4
    172         M_ARG       ptridy, 4
    173         M_ARG       ptrWidth, 4
    174         M_ARG       ptrHeight, 4
    175 
    176         ;// Load structure elements of roi
    177         M_LDR       idx, ptridx
    178         M_LDR       idy, ptridy
    179         M_LDR       iWidth, ptrWidth
    180         M_LDR       iHeight, ptrHeight
    181 
    182         ADD         index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
    183         M_ADR       pArgs, ppArgs
    184 
    185         ;// Move coefficients Neon registers
    186         VMOV        dCoeff20, #20
    187         VMOV        dCoeff5, #5
    188 
    189 Block4x4WidthLoop
    190 Block4x4HeightLoop
    191 
    192         STM         pArgs, {pSrc,srcStep,pDst,dstStep}
    193 
    194         ;// switch table using motion vector as index
    195         ADD         pc, pc, index, LSL #2
    196         B           Case_f
    197         B           Case_0
    198         B           Case_1
    199         B           Case_2
    200         B           Case_3
    201         B           Case_4
    202         B           Case_5
    203         B           Case_6
    204         B           Case_7
    205         B           Case_8
    206         B           Case_9
    207         B           Case_a
    208         B           Case_b
    209         B           Case_c
    210         B           Case_d
    211         B           Case_e
    212         B           Case_f
    213 
    214 Case_0
    215         ;// Case G
    216         M_PRINTF "Case 0 \n"
    217 
    218         ;// Loads a 4x4 block of .8 and stores as .32
    219         ADD         Temp, pSrc, srcStep, LSL #1
    220         VLD1        dSrc0, [pSrc], srcStep
    221         VLD1        dSrc2, [Temp], srcStep
    222         VLD1        dSrc1, [pSrc]
    223         VLD1        dSrc3, [Temp]
    224 
    225         ADD         Temp, pDst, dstStep, LSL #1
    226         VST1        dDst0[0], [pDst], dstStep
    227         VST1        dDst2[0], [Temp], dstStep
    228         VST1        dDst1[0], [pDst]
    229         VST1        dDst3[0], [Temp]
    230         M_ADR       pArgs, ppArgs
    231         B           Block4x4LoopEnd
    232 Case_1
    233         ;// Case a
    234         M_PRINTF "Case 1 \n"
    235 
    236         SUB         pSrc, pSrc, #2
    237         BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    238         VRHADD      dAccH0, dAccH0, dSrc0c
    239         VRHADD      dAccH2, dAccH2, dSrc2c
    240         VRHADD      dAccH1, dAccH1, dSrc1c
    241         VRHADD      dAccH3, dAccH3, dSrc3c
    242         ADD         Temp, pDst, dstStep, LSL #1
    243         VST1        dResultH0[0], [pDst], dstStep
    244         VST1        dResultH2[0], [Temp], dstStep
    245         VST1        dResultH1[0], [pDst]
    246         VST1        dResultH3[0], [Temp]
    247         M_ADR       pArgs, ppArgs
    248         B           Block4x4LoopEnd
    249 Case_2
    250         ;// Case b
    251         M_PRINTF "Case 2 \n"
    252 
    253         SUB         pSrc, pSrc, #2
    254         BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    255         ADD         Temp, pDst, dstStep, LSL #1
    256         VST1        dResultH0[0], [pDst], dstStep
    257         VST1        dResultH2[0], [Temp], dstStep
    258         VST1        dResultH1[0], [pDst]
    259         VST1        dResultH3[0], [Temp]
    260         M_ADR       pArgs, ppArgs
    261         B           Block4x4LoopEnd
    262 Case_3
    263         ;// Case c
    264         M_PRINTF "Case 3 \n"
    265 
    266         SUB         pSrc, pSrc, #2
    267         BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    268         VRHADD      dAccH0, dAccH0, dSrc0d
    269         VRHADD      dAccH2, dAccH2, dSrc2d
    270         VRHADD      dAccH1, dAccH1, dSrc1d
    271         VRHADD      dAccH3, dAccH3, dSrc3d
    272         ADD         Temp, pDst, dstStep, LSL #1
    273         VST1        dResultH0[0], [pDst], dstStep
    274         VST1        dResultH2[0], [Temp], dstStep
    275         VST1        dResultH1[0], [pDst]
    276         VST1        dResultH3[0], [Temp]
    277         M_ADR       pArgs, ppArgs
    278         B           Block4x4LoopEnd
    279 Case_4
    280         ;// Case d
    281         M_PRINTF "Case 4 \n"
    282 
    283         SUB         pSrc, pSrc, srcStep, LSL #1
    284         BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    285         VRHADD      dAccV0, dAccV0, dSrc0
    286         VRHADD      dAccV2, dAccV2, dSrc2
    287         VRHADD      dAccV1, dAccV1, dSrc1
    288         VRHADD      dAccV3, dAccV3, dSrc3
    289         ADD         Temp, pDst, dstStep, LSL #1
    290         VST1        dResultV0[0], [pDst], dstStep
    291         VST1        dResultV2[0], [Temp], dstStep
    292         VST1        dResultV1[0], [pDst]
    293         VST1        dResultV3[0], [Temp]
    294         M_ADR       pArgs, ppArgs
    295         B           Block4x4LoopEnd
    296 Case_5
    297         ;// Case e
    298         M_PRINTF "Case 5 \n"
    299 
    300         MOV         pSrcBK, pSrc
    301         SUB         pSrc, pSrc, srcStep, LSL #1
    302         BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    303         SUB         pSrc, pSrcBK, #2
    304         BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    305         VRHADD      dAccH0, dAccH0, dAccV0
    306         VRHADD      dAccH2, dAccH2, dAccV2
    307         VRHADD      dAccH1, dAccH1, dAccV1
    308         VRHADD      dAccH3, dAccH3, dAccV3
    309         ADD         Temp, pDst, dstStep, LSL #1
    310         VST1        dResultH0[0], [pDst], dstStep
    311         VST1        dResultH2[0], [Temp], dstStep
    312         VST1        dResultH1[0], [pDst]
    313         VST1        dResultH3[0], [Temp]
    314 
    315         M_ADR       pArgs, ppArgs
    316         B       Block4x4LoopEnd
    317 Case_6
    318         ;// Case f
    319         M_PRINTF "Case 6 \n"
    320 
    321         SUB         pSrc, pSrc, srcStep, LSL #1
    322         SUB         pSrc, pSrc, #2
    323         BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    324         VQRSHRUN    dTResult0, qRes2, #5
    325         VQRSHRUN    dTResult1, qRes3, #5
    326         VQRSHRUN    dTResult2, qRes4, #5
    327         VQRSHRUN    dTResult3, qRes5, #5
    328         VRHADD      dTAcc0, dTAcc0, dTResult0
    329         VRHADD      dTAcc2, dTAcc2, dTResult2
    330         VRHADD      dTAcc1, dTAcc1, dTResult1
    331         VRHADD      dTAcc3, dTAcc3, dTResult3
    332         ADD         Temp, pDst, dstStep, LSL #1
    333         VST1        dTRes0[0], [pDst], dstStep
    334         VST1        dTRes2[0], [Temp], dstStep
    335         VST1        dTRes1[0], [pDst]
    336         VST1        dTRes3[0], [Temp]
    337 
    338         M_ADR       pArgs, ppArgs
    339         B       Block4x4LoopEnd
    340 Case_7
    341         ;// Case g
    342         M_PRINTF "Case 7 \n"
    343         MOV         pSrcBK, pSrc
    344         ADD         pSrc, pSrc, #1
    345         SUB         pSrc, pSrc, srcStep, LSL #1
    346         BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    347         SUB         pSrc, pSrcBK, #2
    348         BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    349         VRHADD      dAccH0, dAccH0, dAccV0
    350         VRHADD      dAccH2, dAccH2, dAccV2
    351         VRHADD      dAccH1, dAccH1, dAccV1
    352         VRHADD      dAccH3, dAccH3, dAccV3
    353         ADD         Temp, pDst, dstStep, LSL #1
    354         VST1        dResultH0[0], [pDst], dstStep
    355         VST1        dResultH2[0], [Temp], dstStep
    356         VST1        dResultH1[0], [pDst]
    357         VST1        dResultH3[0], [Temp]
    358 
    359         M_ADR       pArgs, ppArgs
    360         B       Block4x4LoopEnd
    361 Case_8
    362         ;// Case h
    363         M_PRINTF "Case 8 \n"
    364 
    365         SUB         pSrc, pSrc, srcStep, LSL #1
    366         BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    367         ADD         Temp, pDst, dstStep, LSL #1
    368         VST1        dResultV0[0], [pDst], dstStep
    369         VST1        dResultV2[0], [Temp], dstStep
    370         VST1        dResultV1[0], [pDst]
    371         VST1        dResultV3[0], [Temp]
    372         M_ADR       pArgs, ppArgs
    373         B           Block4x4LoopEnd
    374 Case_9
    375         ;// Case i
    376         M_PRINTF "Case 9 \n"
    377         SUB         pSrc, pSrc, srcStep, LSL #1
    378         SUB         pSrc, pSrc, #2
    379         BL          armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    380         VEXT        dTempP0, dTempP0, dTempP1, #2
    381         VEXT        dTempQ0, dTempQ0, dTempQ1, #2
    382         VEXT        dTempR0, dTempR0, dTempR1, #2
    383         VEXT        dTempS0, dTempS0, dTempS1, #2
    384 
    385         VQRSHRUN    dTResult0, qTempP01, #5
    386         VQRSHRUN    dTResult1, qTempQ01, #5
    387         VQRSHRUN    dTResult2, qTempR01, #5
    388         VQRSHRUN    dTResult3, qTempS01, #5
    389 
    390         VRHADD      dTAcc0, dTAcc0, dTResult0
    391         VRHADD      dTAcc2, dTAcc2, dTResult2
    392         VRHADD      dTAcc1, dTAcc1, dTResult1
    393         VRHADD      dTAcc3, dTAcc3, dTResult3
    394         ADD         Temp, pDst, dstStep, LSL #1
    395         VST1        dTRes0[0], [pDst], dstStep
    396         VST1        dTRes2[0], [Temp], dstStep
    397         VST1        dTRes1[0], [pDst]
    398         VST1        dTRes3[0], [Temp]
    399         M_ADR       pArgs, ppArgs
    400         B       Block4x4LoopEnd
    401 Case_a
    402         ;// Case j
    403         M_PRINTF "Case a \n"
    404 
    405         SUB         pSrc, pSrc, srcStep, LSL #1
    406         SUB         pSrc, pSrc, #2
    407         BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    408         ADD         Temp, pDst, dstStep, LSL #1
    409         VST1        dTRes0[0], [pDst], dstStep
    410         VST1        dTRes2[0], [Temp], dstStep
    411         VST1        dTRes1[0], [pDst]
    412         VST1        dTRes3[0], [Temp]
    413         M_ADR       pArgs, ppArgs
    414         B       Block4x4LoopEnd
    415 Case_b
    416         ;// Case k
    417         M_PRINTF "Case b \n"
    418         SUB         pSrc, pSrc, srcStep, LSL #1
    419         SUB         pSrc, pSrc, #2
    420         BL          armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    421         VEXT        dTempP0, dTempP0, dTempP1, #3
    422         VEXT        dTempQ0, dTempQ0, dTempQ1, #3
    423         VEXT        dTempR0, dTempR0, dTempR1, #3
    424         VEXT        dTempS0, dTempS0, dTempS1, #3
    425 
    426         VQRSHRUN    dTResult0, qTempP01, #5
    427         VQRSHRUN    dTResult1, qTempQ01, #5
    428         VQRSHRUN    dTResult2, qTempR01, #5
    429         VQRSHRUN    dTResult3, qTempS01, #5
    430 
    431         VRHADD      dTAcc0, dTAcc0, dTResult0
    432         VRHADD      dTAcc2, dTAcc2, dTResult2
    433         VRHADD      dTAcc1, dTAcc1, dTResult1
    434         VRHADD      dTAcc3, dTAcc3, dTResult3
    435         ADD         Temp, pDst, dstStep, LSL #1
    436         VST1        dTRes0[0], [pDst], dstStep
    437         VST1        dTRes2[0], [Temp], dstStep
    438         VST1        dTRes1[0], [pDst]
    439         VST1        dTRes3[0], [Temp]
    440         M_ADR       pArgs, ppArgs
    441         B       Block4x4LoopEnd
    442 Case_c
    443         ;// Case n
    444         M_PRINTF "Case c \n"
    445 
    446         SUB         pSrc, pSrc, srcStep, LSL #1
    447         BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    448         VRHADD      dAccV0, dAccV0, dSrc1
    449         VRHADD      dAccV2, dAccV2, dSrc3
    450         VRHADD      dAccV1, dAccV1, dSrc2
    451         VRHADD      dAccV3, dAccV3, dSrc4
    452         ADD         Temp, pDst, dstStep, LSL #1
    453         VST1        dResultV0[0], [pDst], dstStep
    454         VST1        dResultV2[0], [Temp], dstStep
    455         VST1        dResultV1[0], [pDst]
    456         VST1        dResultV3[0], [Temp]
    457         M_ADR       pArgs, ppArgs
    458         B           Block4x4LoopEnd
    459 Case_d
    460         ;// Case p
    461         M_PRINTF "Case d \n"
    462 
    463         MOV         pSrcBK, pSrc
    464         SUB         pSrc, pSrc, srcStep, LSL #1
    465         BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    466         ADD         pSrc, pSrcBK, srcStep
    467         SUB         pSrc, pSrc, #2
    468         BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    469         VRHADD      dAccH0, dAccH0, dAccV0
    470         VRHADD      dAccH2, dAccH2, dAccV2
    471         VRHADD      dAccH1, dAccH1, dAccV1
    472         VRHADD      dAccH3, dAccH3, dAccV3
    473         ADD         Temp, pDst, dstStep, LSL #1
    474         VST1        dResultH0[0], [pDst], dstStep
    475         VST1        dResultH2[0], [Temp], dstStep
    476         VST1        dResultH1[0], [pDst]
    477         VST1        dResultH3[0], [Temp]
    478         M_ADR       pArgs, ppArgs
    479         B       Block4x4LoopEnd
    480 Case_e
    481         ;// Case q
    482         M_PRINTF "Case e \n"
    483 
    484         SUB         pSrc, pSrc, srcStep, LSL #1
    485         SUB         pSrc, pSrc, #2
    486         BL          armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    487         VQRSHRUN    dTResult0, qRes3, #5
    488         VQRSHRUN    dTResult1, qRes4, #5
    489         VQRSHRUN    dTResult2, qRes5, #5
    490         VQRSHRUN    dTResult3, qRes6, #5
    491 
    492         VRHADD      dTAcc0, dTAcc0, dTResult0
    493         VRHADD      dTAcc2, dTAcc2, dTResult2
    494         VRHADD      dTAcc1, dTAcc1, dTResult1
    495         VRHADD      dTAcc3, dTAcc3, dTResult3
    496         ADD         Temp, pDst, dstStep, LSL #1
    497         VST1        dTRes0[0], [pDst], dstStep
    498         VST1        dTRes2[0], [Temp], dstStep
    499         VST1        dTRes1[0], [pDst]
    500         VST1        dTRes3[0], [Temp]
    501         M_ADR       pArgs, ppArgs
    502         B       Block4x4LoopEnd
    503 Case_f
    504         ;// Case r
    505         M_PRINTF "Case f \n"
    506         MOV         pSrcBK, pSrc
    507         ADD         pSrc, pSrc, #1
    508         SUB         pSrc, pSrc, srcStep, LSL #1
    509         BL          armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    510         ADD         pSrc, pSrcBK, srcStep
    511         SUB         pSrc, pSrc, #2
    512         BL          armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    513         VRHADD      dAccH0, dAccH0, dAccV0
    514         VRHADD      dAccH2, dAccH2, dAccV2
    515         VRHADD      dAccH1, dAccH1, dAccV1
    516         VRHADD      dAccH3, dAccH3, dAccV3
    517         ADD         Temp, pDst, dstStep, LSL #1
    518         VST1        dResultH0[0], [pDst], dstStep
    519         VST1        dResultH2[0], [Temp], dstStep
    520         VST1        dResultH1[0], [pDst]
    521         VST1        dResultH3[0], [Temp]
    522         M_ADR       pArgs, ppArgs
    523 
    524 
    525 Block4x4LoopEnd
    526 
    527         ;// Width Loop
    528         ;//M_ADR       pArgs, ppArgs
    529         LDM         pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
    530         SUBS        iWidth, iWidth, #4
    531         ADD         pSrc, pSrc, #4
    532         ADD         pDst, pDst, #4
    533         BGT         Block4x4WidthLoop
    534 
    535         ;// Height Loop
    536         SUBS        iHeight, iHeight, #4
    537         M_LDR       iWidth, ptrWidth
    538         M_ADR       pArgs, ppArgs
    539         ADD         pSrc, pSrc, srcStep, LSL #2
    540         ADD         pDst, pDst, dstStep, LSL #2
    541         SUB         pSrc, pSrc, iWidth
    542         SUB         pDst, pDst, iWidth
    543         BGT         Block4x4HeightLoop
    544 
    545 EndOfInterpolation
    546         MOV         r0, #0
    547         M_END
    548 
    549     ENDIF
    550         ;// End of CortexA8
    551 
    552     END
    553 
    554