Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  armVCM4P10_Average_4x_Align_unsafe_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   12290
      6 ;// Date:       Wednesday, April 9, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13 
     14 ;// Functions:
     15 ;//     armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
     16 ;//
     17 ;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
     18 ;// First operand will be at offset ALIGNMENT from aligned address
     19 ;// Second operand will be at aligned location and will be used as output.
     20 ;// destination pointed by (pDst) for vertical interpolation.
     21 ;// This function needs to copy 4 bytes in horizontal direction
     22 ;//
     23 ;// Registers used as input for this function
     24 ;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
     25 ;//
     26 ;// Registers preserved for top level function
     27 ;// r4,r5,r6,r8,r9,r14
     28 ;//
     29 ;// Registers modified by the function
     30 ;// r7,r10,r11,r12
     31 ;//
     32 ;// Output registers
     33 ;// r2 - pointer to the aligned location
     34 ;// r3 - step size to this aligned location
     35 
     36         INCLUDE omxtypes_s.h
     37         INCLUDE armCOMM_s.h
     38 
     39         M_VARIANTS ARM1136JS
     40 
     41         EXPORT armVCM4P10_Average_4x4_Align0_unsafe
     42         EXPORT armVCM4P10_Average_4x4_Align2_unsafe
     43         EXPORT armVCM4P10_Average_4x4_Align3_unsafe
     44 
     45 DEBUG_ON    SETL {FALSE}
     46 
     47 ;// Declare input registers
     48 pPred0          RN 0
     49 iPredStep0      RN 1
     50 pPred1          RN 2
     51 iPredStep1      RN 3
     52 pDstPred        RN 2
     53 iDstStep        RN 3
     54 
     55 ;// Declare other intermediate registers
     56 iPredA0         RN 10
     57 iPredA1         RN 11
     58 iPredB0         RN 12
     59 iPredB1         RN 14
     60 Temp1           RN 4
     61 Temp2           RN 5
     62 ResultA         RN 5
     63 ResultB         RN 4
     64 r0x80808080     RN 7
     65 
     66     IF ARM1136JS
     67 
     68         ;// This function calculates average of 4x4 block
     69         ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
     70 
     71         ;// Function header
     72         M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
     73 
     74         ;// Code start
     75         LDR         r0x80808080, =0x80808080
     76 
     77         ;// 1st load
     78         M_LDR       iPredB0, [pPred1]
     79         M_LDR       iPredA0, [pPred0], iPredStep0
     80         M_LDR       iPredB1, [pPred1, iPredStep1]
     81         M_LDR       iPredA1, [pPred0], iPredStep0
     82 
     83         ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
     84         MVN         iPredB0, iPredB0
     85         MVN         iPredB1, iPredB1
     86         UHSUB8      ResultA, iPredA0, iPredB0
     87         UHSUB8      ResultB, iPredA1, iPredB1
     88         EOR         ResultA, ResultA, r0x80808080
     89         M_STR       ResultA, [pDstPred], iDstStep
     90         EOR         ResultB, ResultB, r0x80808080
     91         M_STR       ResultB, [pDstPred], iDstStep
     92 
     93         ;// 2nd load
     94         M_LDR       iPredA0, [pPred0], iPredStep0
     95         M_LDR       iPredB0, [pPred1]
     96         M_LDR       iPredA1, [pPred0], iPredStep0
     97         M_LDR       iPredB1, [pPred1, iPredStep1]
     98 
     99         MVN         iPredB0, iPredB0
    100         UHSUB8      ResultA, iPredA0, iPredB0
    101         MVN         iPredB1, iPredB1
    102         UHSUB8      ResultB, iPredA1, iPredB1
    103         EOR         ResultA, ResultA, r0x80808080
    104         M_STR       ResultA, [pDstPred], iDstStep
    105         EOR         ResultB, ResultB, r0x80808080
    106         M_STR       ResultB, [pDstPred], iDstStep
    107 End0
    108         M_END
    109 
    110         ;// This function calculates average of 4x4 block
    111         ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
    112 
    113         ;// Function header
    114         M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
    115 
    116         ;// Code start
    117         LDR         r0x80808080, =0x80808080
    118 
    119         ;// 1st load
    120         LDR         Temp1, [pPred0, #4]
    121         M_LDR       iPredA0, [pPred0], iPredStep0
    122         M_LDR       iPredB0, [pPred1]
    123         M_LDR       iPredB1, [pPred1, iPredStep1]
    124         M_LDR       Temp2, [pPred0, #4]
    125         M_LDR       iPredA1, [pPred0], iPredStep0
    126         MVN         iPredB0, iPredB0
    127         MVN         iPredB1, iPredB1
    128         MOV         iPredA0, iPredA0, LSR #16
    129         ORR         iPredA0, iPredA0, Temp1, LSL #16
    130         MOV         iPredA1, iPredA1, LSR #16
    131         ORR         iPredA1, iPredA1, Temp2, LSL #16
    132 
    133         ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
    134         UHSUB8      ResultA, iPredA0, iPredB0
    135         UHSUB8      ResultB, iPredA1, iPredB1
    136         EOR         ResultA, ResultA, r0x80808080
    137         M_STR       ResultA, [pDstPred], iDstStep
    138         EOR         ResultB, ResultB, r0x80808080
    139         M_STR       ResultB, [pDstPred], iDstStep
    140 
    141         ;// 2nd load
    142         LDR         Temp1, [pPred0, #4]
    143         M_LDR         iPredA0, [pPred0], iPredStep0
    144         LDR         iPredB0, [pPred1]
    145         LDR         iPredB1, [pPred1, iPredStep1]
    146         LDR         Temp2, [pPred0, #4]
    147         M_LDR         iPredA1, [pPred0], iPredStep0
    148         MVN         iPredB0, iPredB0
    149         MVN         iPredB1, iPredB1
    150         MOV         iPredA0, iPredA0, LSR #16
    151         ORR         iPredA0, iPredA0, Temp1, LSL #16
    152         MOV         iPredA1, iPredA1, LSR #16
    153         ORR         iPredA1, iPredA1, Temp2, LSL #16
    154 
    155         UHSUB8      ResultA, iPredA0, iPredB0
    156         UHSUB8      ResultB, iPredA1, iPredB1
    157         EOR         ResultA, ResultA, r0x80808080
    158         M_STR       ResultA, [pDstPred], iDstStep
    159         EOR         ResultB, ResultB, r0x80808080
    160         M_STR       ResultB, [pDstPred], iDstStep
    161 End2
    162         M_END
    163 
    164 
    165         ;// This function calculates average of 4x4 block
    166         ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
    167 
    168         ;// Function header
    169         M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
    170 
    171         ;// Code start
    172         LDR         r0x80808080, =0x80808080
    173 
    174         ;// 1st load
    175         LDR         Temp1, [pPred0, #4]
    176         M_LDR       iPredA0, [pPred0], iPredStep0
    177         LDR         iPredB0, [pPred1]
    178         LDR         iPredB1, [pPred1, iPredStep1]
    179         LDR         Temp2, [pPred0, #4]
    180         M_LDR       iPredA1, [pPred0], iPredStep0
    181 
    182         MVN         iPredB0, iPredB0
    183         MVN         iPredB1, iPredB1
    184         MOV         iPredA0, iPredA0, LSR #24
    185         ORR         iPredA0, iPredA0, Temp1, LSL #8
    186         MOV         iPredA1, iPredA1, LSR #24
    187         ORR         iPredA1, iPredA1, Temp2, LSL #8
    188         UHSUB8      ResultA, iPredA0, iPredB0
    189         UHSUB8      ResultB, iPredA1, iPredB1
    190         EOR         ResultA, ResultA, r0x80808080
    191         M_STR       ResultA, [pDstPred], iDstStep
    192         EOR         ResultB, ResultB, r0x80808080
    193         M_STR       ResultB, [pDstPred], iDstStep
    194 
    195         ;// 2nd load
    196         LDR         Temp1, [pPred0, #4]
    197         M_LDR       iPredA0, [pPred0], iPredStep0
    198         LDR         iPredB0, [pPred1]
    199         LDR         iPredB1, [pPred1, iPredStep1]
    200         LDR         Temp2, [pPred0, #4]
    201         M_LDR       iPredA1, [pPred0], iPredStep0
    202 
    203         MVN         iPredB0, iPredB0
    204         MVN         iPredB1, iPredB1
    205         MOV         iPredA0, iPredA0, LSR #24
    206         ORR         iPredA0, iPredA0, Temp1, LSL #8
    207         MOV         iPredA1, iPredA1, LSR #24
    208         ORR         iPredA1, iPredA1, Temp2, LSL #8
    209 
    210         UHSUB8      ResultA, iPredA0, iPredB0
    211         UHSUB8      ResultB, iPredA1, iPredB1
    212         EOR         ResultA, ResultA, r0x80808080
    213         M_STR       ResultA, [pDstPred], iDstStep
    214         EOR         ResultB, ResultB, r0x80808080
    215         M_STR       ResultB, [pDstPred], iDstStep
    216 End3
    217         M_END
    218 
    219     ENDIF
    220 
    221     END
    222