Home | History | Annotate | Download | only in src
      1 ;//
      2 ;//
      3 ;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
      4 ;// OpenMAX DL: v1.0.2
      5 ;// Revision:   12290
      6 ;// Date:       Wednesday, April 9, 2008
      7 ;//
      8 ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
      9 ;//
     10 ;//
     11 ;//
     12 
     13         INCLUDE omxtypes_s.h
     14         INCLUDE armCOMM_s.h
     15 
     16         EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
     17 
     18         M_VARIANTS CortexA8
     19 
     20     IF CortexA8
     21         M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11
     22 
     23 ;// Declare input registers
     24 pSrc            RN 0
     25 srcStep         RN 1
     26 pDst            RN 2
     27 dstStep         RN 3
     28 
     29 ;// Declare Neon registers
     30 dTCoeff5        DN 30.U8
     31 dTCoeff20       DN 31.U8
     32 dCoeff5         DN 30.S16
     33 dCoeff20        DN 31.S16
     34 
     35 qSrcA01         QN 0.U8
     36 qSrcB23         QN 1.U8
     37 qSrcC45         QN 2.U8
     38 qSrcD67         QN 3.U8
     39 qSrcE89         QN 4.U8
     40 qSrcF1011       QN 5.U8
     41 qSrcG1213       QN 6.U8
     42 qSrcH1415       QN 7.U8
     43 qSrcI1617       QN 8.U8
     44 
     45 dSrcA0          DN 0.U8
     46 dSrcB2          DN 2.U8
     47 dSrcC4          DN 4.U8
     48 dSrcD6          DN 6.U8
     49 dSrcE8          DN 8.U8
     50 dSrcF10         DN 10.U8
     51 dSrcG12         DN 12.U8
     52 dSrcH14         DN 14.U8
     53 dSrcI16         DN 16.U8
     54 
     55 dSrcA1          DN 1.U8
     56 dSrcB3          DN 3.U8
     57 dSrcC5          DN 5.U8
     58 dSrcD7          DN 7.U8
     59 dSrcE9          DN 9.U8
     60 dSrcF11         DN 11.U8
     61 dSrcG13         DN 13.U8
     62 dSrcH15         DN 15.U8
     63 dSrcI17         DN 17.U8
     64 
     65 qTempP01        QN 9.S16
     66 qTempQ01        QN 10.S16
     67 qTempR01        QN 11.S16
     68 qTempS01        QN 12.S16
     69 
     70 qTempP23        QN 0.S16
     71 qTempQ23        QN 1.S16
     72 qTempR23        QN 2.S16
     73 qTempS23        QN 3.S16
     74 
     75 dTempP0         DN 18.S16
     76 dTempP1         DN 19.S16
     77 dTempP2         DN 0.S16
     78 
     79 dTempQ0         DN 20.S16
     80 dTempQ1         DN 21.S16
     81 dTempQ2         DN 2.S16
     82 
     83 dTempR0         DN 22.S16
     84 dTempR1         DN 23.S16
     85 dTempR2         DN 4.S16
     86 
     87 dTempS0         DN 24.S16
     88 dTempS1         DN 25.S16
     89 dTempS2         DN 6.S16
     90 
     91 dTempB0         DN 26.S16
     92 dTempC0         DN 27.S16
     93 dTempD0         DN 28.S16
     94 dTempF0         DN 29.S16
     95 
     96 dTempAcc0       DN 0.U16
     97 dTempAcc1       DN 2.U16
     98 dTempAcc2       DN 4.U16
     99 dTempAcc3       DN 6.U16
    100 
    101 dAcc0           DN 0.U8
    102 dAcc1           DN 2.U8
    103 dAcc2           DN 4.U8
    104 dAcc3           DN 6.U8
    105 
    106 qAcc0           QN 0.S32
    107 qAcc1           QN 1.S32
    108 qAcc2           QN 2.S32
    109 qAcc3           QN 3.S32
    110 
    111 qTAcc0          QN 0.U16
    112 qTAcc1          QN 1.U16
    113 qTAcc2          QN 2.U16
    114 qTAcc3          QN 3.U16
    115 
    116 qTmp            QN 4.S16
    117 dTmp            DN 8.S16
    118 
    119         VLD1        qSrcA01, [pSrc], srcStep                 ;// [a0 a1 a2 a3 .. a15]
    120         ADD         r12, pSrc, srcStep, LSL #2
    121         VMOV        dTCoeff5, #5
    122         VMOV        dTCoeff20, #20
    123         VLD1        qSrcF1011, [r12], srcStep
    124         VLD1        qSrcB23, [pSrc], srcStep                 ;// [b0 b1 b2 b3 .. b15]
    125 
    126         VLD1        qSrcG1213, [r12], srcStep
    127         VADDL       qTempP01, dSrcA0, dSrcF10
    128         VLD1        qSrcC45, [pSrc], srcStep                 ;// [c0 c1 c2 c3 .. c15]
    129         VADDL       qTempP23, dSrcA1, dSrcF11
    130         VLD1        qSrcD67, [pSrc], srcStep
    131         VADDL       qTempQ01, dSrcB2, dSrcG12
    132         VLD1        qSrcE89, [pSrc], srcStep
    133 
    134         ;//t0
    135         VMLAL       qTempP01, dSrcC4, dTCoeff20
    136 
    137         VLD1        qSrcH1415, [r12], srcStep
    138 
    139         VMLAL       qTempP23, dSrcC5, dTCoeff20
    140 
    141         VLD1        qSrcI1617, [r12], srcStep                 ;// [i0 i1 i2 i3 .. ]
    142 
    143         VMLAL       qTempP01, dSrcD6, dTCoeff20
    144         VMLAL       qTempQ01, dSrcD6, dTCoeff20
    145         VMLSL       qTempP23, dSrcB3, dTCoeff5
    146 
    147         VADDL       qTempR01, dSrcC4, dSrcH14
    148 
    149         VMLSL       qTempP01, dSrcB2, dTCoeff5
    150 
    151         VADDL       qTempQ23, dSrcB3, dSrcG13
    152 
    153         VMLAL       qTempP23, dSrcD7, dTCoeff20
    154         VMLAL       qTempQ01, dSrcE8, dTCoeff20
    155 
    156         VMLSL       qTempP01, dSrcE8, dTCoeff5
    157         VMLAL       qTempQ23, dSrcD7, dTCoeff20
    158 
    159         VMLSL       qTempP23, dSrcE9, dTCoeff5
    160 
    161         ;//t1
    162 
    163         VMLAL       qTempR01, dSrcE8, dTCoeff20
    164         VMLSL       qTempQ01, dSrcC4, dTCoeff5
    165         VMLSL       qTempQ23, dSrcC5, dTCoeff5
    166         VADDL       qTempR23, dSrcC5, dSrcH15
    167 
    168         VMLAL       qTempR01, dSrcF10, dTCoeff20
    169         VMLSL       qTempQ01, dSrcF10, dTCoeff5
    170         VMLAL       qTempQ23, dSrcE9, dTCoeff20
    171         VMLAL       qTempR23, dSrcE9, dTCoeff20
    172         VADDL       qTempS01, dSrcD6, dSrcI16
    173 
    174 
    175         VMLSL       qTempR01, dSrcD6, dTCoeff5
    176         VMLSL       qTempQ23, dSrcF11, dTCoeff5
    177         VMLSL       qTempR23, dSrcD7, dTCoeff5
    178 
    179         ;//t2
    180         VADDL       qTempS23, dSrcD7, dSrcI17
    181         VMLAL       qTempS01, dSrcF10, dTCoeff20
    182         VMLSL       qTempR01, dSrcG12, dTCoeff5
    183         VMLSL       qTempR23, dSrcG13, dTCoeff5
    184 
    185         VMLAL       qTempS23, dSrcF11, dTCoeff20
    186         VMLAL       qTempS01, dSrcG12, dTCoeff20
    187         VEXT        dTempB0, dTempP0, dTempP1, #1
    188         VMLAL       qTempR23, dSrcF11, dTCoeff20
    189 
    190 
    191         ;//t3
    192         VMLAL       qTempS23, dSrcG13, dTCoeff20
    193         VMLSL       qTempS01, dSrcE8, dTCoeff5
    194         VEXT        dTempC0, dTempP0, dTempP1, #2
    195         VMOV        dCoeff20, #20
    196         VMLSL       qTempS23, dSrcE9, dTCoeff5
    197         VMLSL       qTempS01, dSrcH14, dTCoeff5
    198         VEXT        dTempF0, dTempP1, dTempP2, #1
    199         VEXT        dTempD0, dTempP0, dTempP1, #3
    200         VMLSL       qTempS23, dSrcH15, dTCoeff5
    201 
    202         VADDL       qAcc0, dTempP0, dTempF0
    203         VADD        dTempC0, dTempC0, dTempD0
    204         ;//h
    205         VMOV        dCoeff5, #5
    206 
    207         ;// res0
    208         VADD        dTempB0, dTempB0, dTempP1
    209         VMLAL       qAcc0, dTempC0, dCoeff20
    210         VEXT        dTempC0, dTempQ0, dTempQ1, #2
    211         VEXT        dTempD0, dTempQ0, dTempQ1, #3
    212         VEXT        dTempF0, dTempQ1, dTempQ2, #1
    213         VMLSL       qAcc0, dTempB0, dCoeff5
    214 
    215         ;// res1
    216         VEXT        dTempB0, dTempQ0, dTempQ1, #1
    217         VADDL       qAcc1, dTempQ0, dTempF0
    218         VADD        dTempC0, dTempC0, dTempD0
    219         VADD        dTempB0, dTempB0, dTempQ1
    220         VEXT        dTempD0, dTempR0, dTempR1, #3
    221         VMLAL       qAcc1, dTempC0, dCoeff20
    222         VEXT        dTempF0, dTempR1, dTempR2, #1
    223         VEXT        dTempC0, dTempR0, dTempR1, #2
    224         VEXT        dTmp, dTempR0, dTempR1, #1
    225         VADDL       qAcc2, dTempR0, dTempF0
    226         VMLSL       qAcc1, dTempB0, dCoeff5
    227 ;        VEXT        dTempB0, dTempR0, dTempR1, #1
    228         VADD        dTempC0, dTempC0, dTempD0
    229 
    230         ;// res2
    231         VADD        dTempB0, dTmp, dTempR1
    232         VEXT        dTempD0, dTempS0, dTempS1, #3
    233         VMLAL       qAcc2, dTempC0, dCoeff20
    234 ;        VADD        dTempB0, dTempB0, dTempR1
    235 
    236         ;// res3
    237         VEXT        dTempC0, dTempS0, dTempS1, #2
    238         VEXT        dTempF0, dTempS1, dTempS2, #1
    239         VADD        dTempC0, dTempC0, dTempD0
    240         VEXT        dTmp, dTempS0, dTempS1, #1
    241         VADDL       qAcc3, dTempS0, dTempF0
    242         VMLSL       qAcc2, dTempB0, dCoeff5
    243         VMLAL       qAcc3, dTempC0, dCoeff20
    244         VADD        dTmp, dTmp, dTempS1
    245         VMLSL       qAcc3, dTmp, dCoeff5
    246 
    247         VQRSHRUN    dTempAcc0, qAcc0, #10
    248         VQRSHRUN    dTempAcc1, qAcc1, #10
    249         VQRSHRUN    dTempAcc2, qAcc2, #10
    250         VQRSHRUN    dTempAcc3, qAcc3, #10
    251 
    252         VQMOVN      dAcc0, qTAcc0
    253         VQMOVN      dAcc1, qTAcc1
    254         VQMOVN      dAcc2, qTAcc2
    255         VQMOVN      dAcc3, qTAcc3
    256 
    257         M_END
    258 
    259     ENDIF
    260 
    261 
    262 
    263 
    264 
    265     END
    266 
    267