Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  armVCM4P10_InterpolateLuma_Align_unsafe_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   9641
      6 ;// Date:       Thursday, February 7, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13         INCLUDE omxtypes_s.h
     14         INCLUDE armCOMM_s.h
     15 
     16         M_VARIANTS ARM1136JS
     17 
     18         EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
     19         EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
     20 
     21 DEBUG_ON    SETL {FALSE}
     22 
     23     IF ARM1136JS
     24 
     25 ;// Declare input registers
     26 pSrc            RN 0
     27 srcStep         RN 1
     28 pDst            RN 8
     29 iHeight         RN 9
     30 
     31 ;// Declare inner loop registers
     32 x               RN 7
     33 x0              RN 7
     34 x1              RN 10
     35 x2              RN 11
     36 Scratch         RN 12
     37 
     38 ;// Function:
     39 ;//     armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
     40 ;//
     41 ;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
     42 ;// destination pointed by (pDst) for horizontal interpolation.
     43 ;// This function needs to copy 9 bytes in horizontal direction.
     44 ;//
     45 ;// Registers used as input for this function
     46 ;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
     47 ;//
     48 ;// Registers preserved for top level function
     49 ;// r2,r3,r4,r5,r6
     50 ;//
     51 ;// Registers modified by the function
     52 ;// r7,r8,r9,r10,r11,r12
     53 ;//
     54 ;// Output registers
     55 ;// r0 - pointer to the new aligned location which will be used as pSrc
     56 ;// r1 - step size to this aligned location
     57 
     58         ;// Function header
     59         M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
     60 
     61         ;// Copy pDst to scratch
     62         MOV     Scratch, pDst
     63 
     64 StartAlignedStackCopy
     65         AND     x, pSrc, #3
     66         BIC     pSrc, pSrc, #3
     67 
     68         M_SWITCH x
     69         M_CASE   Copy0toAligned
     70         M_CASE   Copy1toAligned
     71         M_CASE   Copy2toAligned
     72         M_CASE   Copy3toAligned
     73         M_ENDSWITCH
     74 
     75 Copy0toAligned
     76         LDM     pSrc, {x0, x1, x2}
     77         SUBS    iHeight, iHeight, #1
     78         ADD     pSrc, pSrc, srcStep
     79 
     80         ;// One cycle stall
     81 
     82         STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
     83         BGT     Copy0toAligned
     84         B       CopyEnd
     85 
     86 Copy1toAligned
     87         LDM     pSrc, {x0, x1, x2}
     88         SUBS    iHeight, iHeight, #1
     89         ADD     pSrc, pSrc, srcStep
     90 
     91         ;// One cycle stall
     92 
     93         MOV     x0, x0, LSR #8
     94         ORR     x0, x0, x1, LSL #24
     95         MOV     x1, x1, LSR #8
     96         ORR     x1, x1, x2, LSL #24
     97         MOV     x2, x2, LSR #8
     98         STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
     99         BGT     Copy1toAligned
    100         B       CopyEnd
    101 
    102 Copy2toAligned
    103         LDM     pSrc, {x0, x1, x2}
    104         SUBS    iHeight, iHeight, #1
    105         ADD     pSrc, pSrc, srcStep
    106 
    107         ;// One cycle stall
    108 
    109         MOV     x0, x0, LSR #16
    110         ORR     x0, x0, x1, LSL #16
    111         MOV     x1, x1, LSR #16
    112         ORR     x1, x1, x2, LSL #16
    113         MOV     x2, x2, LSR #16
    114         STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
    115         BGT     Copy2toAligned
    116         B       CopyEnd
    117 
    118 Copy3toAligned
    119         LDM     pSrc, {x0, x1, x2}
    120         SUBS    iHeight, iHeight, #1
    121         ADD     pSrc, pSrc, srcStep
    122 
    123         ;// One cycle stall
    124 
    125         MOV     x0, x0, LSR #24
    126         ORR     x0, x0, x1, LSL #8
    127         MOV     x1, x1, LSR #24
    128         ORR     x1, x1, x2, LSL #8
    129         MOV     x2, x2, LSR #24
    130         STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
    131         BGT     Copy3toAligned
    132 
    133 CopyEnd
    134 
    135         MOV     pSrc, Scratch
    136         MOV     srcStep, #12
    137 
    138         M_END
    139 
    140 
    141 ;// Function:
    142 ;//     armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    143 ;//
    144 ;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
    145 ;// destination pointed by (pDst) for vertical interpolation.
    146 ;// This function needs to copy 4 bytes in horizontal direction
    147 ;//
    148 ;// Registers used as input for this function
    149 ;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
    150 ;//
    151 ;// Registers preserved for top level function
    152 ;// r2,r3,r4,r5,r6
    153 ;//
    154 ;// Registers modified by the function
    155 ;// r7,r8,r9,r10,r11,r12
    156 ;//
    157 ;// Output registers
    158 ;// r0 - pointer to the new aligned location which will be used as pSrc
    159 ;// r1 - step size to this aligned location
    160 
    161         ;// Function header
    162         M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    163 
    164         ;// Copy pSrc to stack
    165 StartVAlignedStackCopy
    166         AND     x, pSrc, #3
    167         BIC     pSrc, pSrc, #3
    168 
    169 
    170         M_SWITCH x
    171         M_CASE   Copy0toVAligned
    172         M_CASE   Copy1toVAligned
    173         M_CASE   Copy2toVAligned
    174         M_CASE   Copy3toVAligned
    175         M_ENDSWITCH
    176 
    177 Copy0toVAligned
    178         M_LDR   x0, [pSrc], srcStep
    179         SUBS    iHeight, iHeight, #1
    180 
    181         ;// One cycle stall
    182 
    183         STR     x0, [pDst], #4                              ;// Store aligned output row
    184         BGT     Copy0toVAligned
    185         B       CopyVEnd
    186 
    187 Copy1toVAligned
    188         LDR     x1, [pSrc, #4]
    189         M_LDR   x0, [pSrc], srcStep
    190         SUBS    iHeight, iHeight, #1
    191 
    192         ;// One cycle stall
    193 
    194         MOV     x1, x1, LSL #24
    195         ORR     x0, x1, x0, LSR #8
    196         STR     x0, [pDst], #4                              ;// Store aligned output row
    197         BGT     Copy1toVAligned
    198         B       CopyVEnd
    199 
    200 Copy2toVAligned
    201         LDR     x1, [pSrc, #4]
    202         M_LDR   x0, [pSrc], srcStep
    203         SUBS    iHeight, iHeight, #1
    204 
    205         ;// One cycle stall
    206 
    207         MOV     x1, x1, LSL #16
    208         ORR     x0, x1, x0, LSR #16
    209         STR     x0, [pDst], #4                              ;// Store aligned output row
    210         BGT     Copy2toVAligned
    211         B       CopyVEnd
    212 
    213 Copy3toVAligned
    214         LDR     x1, [pSrc, #4]
    215         M_LDR   x0, [pSrc], srcStep
    216         SUBS    iHeight, iHeight, #1
    217 
    218         ;// One cycle stall
    219 
    220         MOV     x1, x1, LSL #8
    221         ORR     x0, x1, x0, LSR #24
    222         STR     x0, [pDst], #4                              ;// Store aligned output row
    223         BGT     Copy3toVAligned
    224 
    225 CopyVEnd
    226 
    227         SUB     pSrc, pDst, #28
    228         MOV     srcStep, #4
    229 
    230         M_END
    231 
    232 
    233     ENDIF
    234 
    235     END
    236 
    237