Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  armVCM4P10_Average_4x_Align_unsafe_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   12290
     21 ;// Date:       Wednesday, April 9, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27 
     28 ;// Functions:
     29 ;//     armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
     30 ;//
     31 ;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
     32 ;// First operand will be at offset ALIGNMENT from aligned address
     33 ;// Second operand will be at aligned location and will be used as output.
     34 ;// destination pointed by (pDst) for vertical interpolation.
     35 ;// This function needs to copy 4 bytes in horizontal direction
     36 ;//
     37 ;// Registers used as input for this function
     38 ;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
     39 ;//
     40 ;// Registers preserved for top level function
     41 ;// r4,r5,r6,r8,r9,r14
     42 ;//
     43 ;// Registers modified by the function
     44 ;// r7,r10,r11,r12
     45 ;//
     46 ;// Output registers
     47 ;// r2 - pointer to the aligned location
     48 ;// r3 - step size to this aligned location
     49 
     50         INCLUDE omxtypes_s.h
     51         INCLUDE armCOMM_s.h
     52 
     53         M_VARIANTS ARM1136JS
     54 
     55         EXPORT armVCM4P10_Average_4x4_Align0_unsafe
     56         EXPORT armVCM4P10_Average_4x4_Align2_unsafe
     57         EXPORT armVCM4P10_Average_4x4_Align3_unsafe
     58 
     59 DEBUG_ON    SETL {FALSE}
     60 
     61 ;// Declare input registers
     62 pPred0          RN 0
     63 iPredStep0      RN 1
     64 pPred1          RN 2
     65 iPredStep1      RN 3
     66 pDstPred        RN 2
     67 iDstStep        RN 3
     68 
     69 ;// Declare other intermediate registers
     70 iPredA0         RN 10
     71 iPredA1         RN 11
     72 iPredB0         RN 12
     73 iPredB1         RN 14
     74 Temp1           RN 4
     75 Temp2           RN 5
     76 ResultA         RN 5
     77 ResultB         RN 4
     78 r0x80808080     RN 7
     79 
     80     IF ARM1136JS
     81 
     82         ;// This function calculates average of 4x4 block
     83         ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
     84 
     85         ;// Function header
     86         M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
     87 
     88         ;// Code start
     89         LDR         r0x80808080, =0x80808080
     90 
     91         ;// 1st load
     92         M_LDR       iPredB0, [pPred1]
     93         M_LDR       iPredA0, [pPred0], iPredStep0
     94         M_LDR       iPredB1, [pPred1, iPredStep1]
     95         M_LDR       iPredA1, [pPred0], iPredStep0
     96 
     97         ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
     98         MVN         iPredB0, iPredB0
     99         MVN         iPredB1, iPredB1
    100         UHSUB8      ResultA, iPredA0, iPredB0
    101         UHSUB8      ResultB, iPredA1, iPredB1
    102         EOR         ResultA, ResultA, r0x80808080
    103         M_STR       ResultA, [pDstPred], iDstStep
    104         EOR         ResultB, ResultB, r0x80808080
    105         M_STR       ResultB, [pDstPred], iDstStep
    106 
    107         ;// 2nd load
    108         M_LDR       iPredA0, [pPred0], iPredStep0
    109         M_LDR       iPredB0, [pPred1]
    110         M_LDR       iPredA1, [pPred0], iPredStep0
    111         M_LDR       iPredB1, [pPred1, iPredStep1]
    112 
    113         MVN         iPredB0, iPredB0
    114         UHSUB8      ResultA, iPredA0, iPredB0
    115         MVN         iPredB1, iPredB1
    116         UHSUB8      ResultB, iPredA1, iPredB1
    117         EOR         ResultA, ResultA, r0x80808080
    118         M_STR       ResultA, [pDstPred], iDstStep
    119         EOR         ResultB, ResultB, r0x80808080
    120         M_STR       ResultB, [pDstPred], iDstStep
    121 End0
    122         M_END
    123 
    124         ;// This function calculates average of 4x4 block
    125         ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
    126 
    127         ;// Function header
    128         M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
    129 
    130         ;// Code start
    131         LDR         r0x80808080, =0x80808080
    132 
    133         ;// 1st load
    134         LDR         Temp1, [pPred0, #4]
    135         M_LDR       iPredA0, [pPred0], iPredStep0
    136         M_LDR       iPredB0, [pPred1]
    137         M_LDR       iPredB1, [pPred1, iPredStep1]
    138         M_LDR       Temp2, [pPred0, #4]
    139         M_LDR       iPredA1, [pPred0], iPredStep0
    140         MVN         iPredB0, iPredB0
    141         MVN         iPredB1, iPredB1
    142         MOV         iPredA0, iPredA0, LSR #16
    143         ORR         iPredA0, iPredA0, Temp1, LSL #16
    144         MOV         iPredA1, iPredA1, LSR #16
    145         ORR         iPredA1, iPredA1, Temp2, LSL #16
    146 
    147         ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
    148         UHSUB8      ResultA, iPredA0, iPredB0
    149         UHSUB8      ResultB, iPredA1, iPredB1
    150         EOR         ResultA, ResultA, r0x80808080
    151         M_STR       ResultA, [pDstPred], iDstStep
    152         EOR         ResultB, ResultB, r0x80808080
    153         M_STR       ResultB, [pDstPred], iDstStep
    154 
    155         ;// 2nd load
    156         LDR         Temp1, [pPred0, #4]
    157         M_LDR         iPredA0, [pPred0], iPredStep0
    158         LDR         iPredB0, [pPred1]
    159         LDR         iPredB1, [pPred1, iPredStep1]
    160         LDR         Temp2, [pPred0, #4]
    161         M_LDR         iPredA1, [pPred0], iPredStep0
    162         MVN         iPredB0, iPredB0
    163         MVN         iPredB1, iPredB1
    164         MOV         iPredA0, iPredA0, LSR #16
    165         ORR         iPredA0, iPredA0, Temp1, LSL #16
    166         MOV         iPredA1, iPredA1, LSR #16
    167         ORR         iPredA1, iPredA1, Temp2, LSL #16
    168 
    169         UHSUB8      ResultA, iPredA0, iPredB0
    170         UHSUB8      ResultB, iPredA1, iPredB1
    171         EOR         ResultA, ResultA, r0x80808080
    172         M_STR       ResultA, [pDstPred], iDstStep
    173         EOR         ResultB, ResultB, r0x80808080
    174         M_STR       ResultB, [pDstPred], iDstStep
    175 End2
    176         M_END
    177 
    178 
    179         ;// This function calculates average of 4x4 block
    180         ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
    181 
    182         ;// Function header
    183         M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
    184 
    185         ;// Code start
    186         LDR         r0x80808080, =0x80808080
    187 
    188         ;// 1st load
    189         LDR         Temp1, [pPred0, #4]
    190         M_LDR       iPredA0, [pPred0], iPredStep0
    191         LDR         iPredB0, [pPred1]
    192         LDR         iPredB1, [pPred1, iPredStep1]
    193         LDR         Temp2, [pPred0, #4]
    194         M_LDR       iPredA1, [pPred0], iPredStep0
    195 
    196         MVN         iPredB0, iPredB0
    197         MVN         iPredB1, iPredB1
    198         MOV         iPredA0, iPredA0, LSR #24
    199         ORR         iPredA0, iPredA0, Temp1, LSL #8
    200         MOV         iPredA1, iPredA1, LSR #24
    201         ORR         iPredA1, iPredA1, Temp2, LSL #8
    202         UHSUB8      ResultA, iPredA0, iPredB0
    203         UHSUB8      ResultB, iPredA1, iPredB1
    204         EOR         ResultA, ResultA, r0x80808080
    205         M_STR       ResultA, [pDstPred], iDstStep
    206         EOR         ResultB, ResultB, r0x80808080
    207         M_STR       ResultB, [pDstPred], iDstStep
    208 
    209         ;// 2nd load
    210         LDR         Temp1, [pPred0, #4]
    211         M_LDR       iPredA0, [pPred0], iPredStep0
    212         LDR         iPredB0, [pPred1]
    213         LDR         iPredB1, [pPred1, iPredStep1]
    214         LDR         Temp2, [pPred0, #4]
    215         M_LDR       iPredA1, [pPred0], iPredStep0
    216 
    217         MVN         iPredB0, iPredB0
    218         MVN         iPredB1, iPredB1
    219         MOV         iPredA0, iPredA0, LSR #24
    220         ORR         iPredA0, iPredA0, Temp1, LSL #8
    221         MOV         iPredA1, iPredA1, LSR #24
    222         ORR         iPredA1, iPredA1, Temp2, LSL #8
    223 
    224         UHSUB8      ResultA, iPredA0, iPredB0
    225         UHSUB8      ResultB, iPredA1, iPredB1
    226         EOR         ResultA, ResultA, r0x80808080
    227         M_STR       ResultA, [pDstPred], iDstStep
    228         EOR         ResultB, ResultB, r0x80808080
    229         M_STR       ResultB, [pDstPred], iDstStep
    230 End3
    231         M_END
    232 
    233     ENDIF
    234 
    235     END
    236 
    237