Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  armVCM4P10_InterpolateLuma_Align_unsafe_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   12290
     21 ;// Date:       Wednesday, April 9, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27         INCLUDE omxtypes_s.h
     28         INCLUDE armCOMM_s.h
     29 
     30         M_VARIANTS ARM1136JS
     31 
     32         EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
     33         EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
     34 
     35 DEBUG_ON    SETL {FALSE}
     36 
     37     IF ARM1136JS
     38 
     39 ;// Declare input registers
     40 pSrc            RN 0
     41 srcStep         RN 1
     42 pDst            RN 8
     43 iHeight         RN 9
     44 
     45 ;// Declare inner loop registers
     46 x               RN 7
     47 x0              RN 7
     48 x1              RN 10
     49 x2              RN 11
     50 Scratch         RN 12
     51 
     52 ;// Function:
     53 ;//     armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
     54 ;//
     55 ;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
     56 ;// destination pointed by (pDst) for horizontal interpolation.
     57 ;// This function needs to copy 9 bytes in horizontal direction.
     58 ;//
     59 ;// Registers used as input for this function
     60 ;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
     61 ;//
     62 ;// Registers preserved for top level function
     63 ;// r2,r3,r4,r5,r6
     64 ;//
     65 ;// Registers modified by the function
     66 ;// r7,r8,r9,r10,r11,r12
     67 ;//
     68 ;// Output registers
     69 ;// r0 - pointer to the new aligned location which will be used as pSrc
     70 ;// r1 - step size to this aligned location
     71 
     72         ;// Function header
     73         M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
     74 
     75         ;// Copy pDst to scratch
     76         MOV     Scratch, pDst
     77 
     78 StartAlignedStackCopy
     79         AND     x, pSrc, #3
     80         BIC     pSrc, pSrc, #3
     81 
     82         M_SWITCH x
     83         M_CASE   Copy0toAligned
     84         M_CASE   Copy1toAligned
     85         M_CASE   Copy2toAligned
     86         M_CASE   Copy3toAligned
     87         M_ENDSWITCH
     88 
     89 Copy0toAligned
     90         LDM     pSrc, {x0, x1, x2}
     91         SUBS    iHeight, iHeight, #1
     92         ADD     pSrc, pSrc, srcStep
     93 
     94         ;// One cycle stall
     95 
     96         STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
     97         BGT     Copy0toAligned
     98         B       CopyEnd
     99 
    100 Copy1toAligned
    101         LDM     pSrc, {x0, x1, x2}
    102         SUBS    iHeight, iHeight, #1
    103         ADD     pSrc, pSrc, srcStep
    104 
    105         ;// One cycle stall
    106 
    107         MOV     x0, x0, LSR #8
    108         ORR     x0, x0, x1, LSL #24
    109         MOV     x1, x1, LSR #8
    110         ORR     x1, x1, x2, LSL #24
    111         MOV     x2, x2, LSR #8
    112         STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
    113         BGT     Copy1toAligned
    114         B       CopyEnd
    115 
    116 Copy2toAligned
    117         LDM     pSrc, {x0, x1, x2}
    118         SUBS    iHeight, iHeight, #1
    119         ADD     pSrc, pSrc, srcStep
    120 
    121         ;// One cycle stall
    122 
    123         MOV     x0, x0, LSR #16
    124         ORR     x0, x0, x1, LSL #16
    125         MOV     x1, x1, LSR #16
    126         ORR     x1, x1, x2, LSL #16
    127         MOV     x2, x2, LSR #16
    128         STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
    129         BGT     Copy2toAligned
    130         B       CopyEnd
    131 
    132 Copy3toAligned
    133         LDM     pSrc, {x0, x1, x2}
    134         SUBS    iHeight, iHeight, #1
    135         ADD     pSrc, pSrc, srcStep
    136 
    137         ;// One cycle stall
    138 
    139         MOV     x0, x0, LSR #24
    140         ORR     x0, x0, x1, LSL #8
    141         MOV     x1, x1, LSR #24
    142         ORR     x1, x1, x2, LSL #8
    143         MOV     x2, x2, LSR #24
    144         STM     pDst!, {x0, x1, x2}                     ;// Store aligned output row
    145         BGT     Copy3toAligned
    146 
    147 CopyEnd
    148 
    149         MOV     pSrc, Scratch
    150         MOV     srcStep, #12
    151 
    152         M_END
    153 
    154 
    155 ;// Function:
    156 ;//     armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    157 ;//
    158 ;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
    159 ;// destination pointed by (pDst) for vertical interpolation.
    160 ;// This function needs to copy 4 bytes in horizontal direction
    161 ;//
    162 ;// Registers used as input for this function
    163 ;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
    164 ;//
    165 ;// Registers preserved for top level function
    166 ;// r2,r3,r4,r5,r6
    167 ;//
    168 ;// Registers modified by the function
    169 ;// r7,r8,r9,r10,r11,r12
    170 ;//
    171 ;// Output registers
    172 ;// r0 - pointer to the new aligned location which will be used as pSrc
    173 ;// r1 - step size to this aligned location
    174 
    175         ;// Function header
    176         M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
    177 
    178         ;// Copy pSrc to stack
    179 StartVAlignedStackCopy
    180         AND     x, pSrc, #3
    181         BIC     pSrc, pSrc, #3
    182 
    183 
    184         M_SWITCH x
    185         M_CASE   Copy0toVAligned
    186         M_CASE   Copy1toVAligned
    187         M_CASE   Copy2toVAligned
    188         M_CASE   Copy3toVAligned
    189         M_ENDSWITCH
    190 
    191 Copy0toVAligned
    192         M_LDR   x0, [pSrc], srcStep
    193         SUBS    iHeight, iHeight, #1
    194 
    195         ;// One cycle stall
    196 
    197         STR     x0, [pDst], #4                              ;// Store aligned output row
    198         BGT     Copy0toVAligned
    199         B       CopyVEnd
    200 
    201 Copy1toVAligned
    202         LDR     x1, [pSrc, #4]
    203         M_LDR   x0, [pSrc], srcStep
    204         SUBS    iHeight, iHeight, #1
    205 
    206         ;// One cycle stall
    207 
    208         MOV     x1, x1, LSL #24
    209         ORR     x0, x1, x0, LSR #8
    210         STR     x0, [pDst], #4                              ;// Store aligned output row
    211         BGT     Copy1toVAligned
    212         B       CopyVEnd
    213 
    214 Copy2toVAligned
    215         LDR     x1, [pSrc, #4]
    216         M_LDR   x0, [pSrc], srcStep
    217         SUBS    iHeight, iHeight, #1
    218 
    219         ;// One cycle stall
    220 
    221         MOV     x1, x1, LSL #16
    222         ORR     x0, x1, x0, LSR #16
    223         STR     x0, [pDst], #4                              ;// Store aligned output row
    224         BGT     Copy2toVAligned
    225         B       CopyVEnd
    226 
    227 Copy3toVAligned
    228         LDR     x1, [pSrc, #4]
    229         M_LDR   x0, [pSrc], srcStep
    230         SUBS    iHeight, iHeight, #1
    231 
    232         ;// One cycle stall
    233 
    234         MOV     x1, x1, LSL #8
    235         ORR     x0, x1, x0, LSR #24
    236         STR     x0, [pDst], #4                              ;// Store aligned output row
    237         BGT     Copy3toVAligned
    238 
    239 CopyVEnd
    240 
    241         SUB     pSrc, pDst, #28
    242         MOV     srcStep, #4
    243 
    244         M_END
    245 
    246 
    247     ENDIF
    248 
    249     END
    250 
    251