Home | History | Annotate | Download | only in src
      1 ;//
      2 ;// Copyright (C) 2007-2008 ARM Limited
      3 ;//
      4 ;// Licensed under the Apache License, Version 2.0 (the "License");
      5 ;// you may not use this file except in compliance with the License.
      6 ;// You may obtain a copy of the License at
      7 ;//
      8 ;//      http://www.apache.org/licenses/LICENSE-2.0
      9 ;//
     10 ;// Unless required by applicable law or agreed to in writing, software
     11 ;// distributed under the License is distributed on an "AS IS" BASIS,
     12 ;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 ;// See the License for the specific language governing permissions and
     14 ;// limitations under the License.
     15 ;//
     16 ;//
     17 ;//
     18 ;// File Name:  armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
     19 ;// OpenMAX DL: v1.0.2
     20 ;// Revision:   12290
     21 ;// Date:       Wednesday, April 9, 2008
     22 ;//
     23 ;//
     24 ;//
     25 ;//
     26 
     27         INCLUDE omxtypes_s.h
     28         INCLUDE armCOMM_s.h
     29 
     30         EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
     31 
     32         M_VARIANTS CortexA8
     33 
     34     IF CortexA8
     35         M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11
     36 
     37 ;// Declare input registers
     38 pSrc            RN 0
     39 srcStep         RN 1
     40 pDst            RN 2
     41 dstStep         RN 3
     42 
     43 ;// Declare Neon registers
     44 dTCoeff5        DN 30.U8
     45 dTCoeff20       DN 31.U8
     46 dCoeff5         DN 30.S16
     47 dCoeff20        DN 31.S16
     48 
     49 qSrcA01         QN 0.U8
     50 qSrcB23         QN 1.U8
     51 qSrcC45         QN 2.U8
     52 qSrcD67         QN 3.U8
     53 qSrcE89         QN 4.U8
     54 qSrcF1011       QN 5.U8
     55 qSrcG1213       QN 6.U8
     56 qSrcH1415       QN 7.U8
     57 qSrcI1617       QN 8.U8
     58 
     59 dSrcA0          DN 0.U8
     60 dSrcB2          DN 2.U8
     61 dSrcC4          DN 4.U8
     62 dSrcD6          DN 6.U8
     63 dSrcE8          DN 8.U8
     64 dSrcF10         DN 10.U8
     65 dSrcG12         DN 12.U8
     66 dSrcH14         DN 14.U8
     67 dSrcI16         DN 16.U8
     68 
     69 dSrcA1          DN 1.U8
     70 dSrcB3          DN 3.U8
     71 dSrcC5          DN 5.U8
     72 dSrcD7          DN 7.U8
     73 dSrcE9          DN 9.U8
     74 dSrcF11         DN 11.U8
     75 dSrcG13         DN 13.U8
     76 dSrcH15         DN 15.U8
     77 dSrcI17         DN 17.U8
     78 
     79 qTempP01        QN 9.S16
     80 qTempQ01        QN 10.S16
     81 qTempR01        QN 11.S16
     82 qTempS01        QN 12.S16
     83 
     84 qTempP23        QN 0.S16
     85 qTempQ23        QN 1.S16
     86 qTempR23        QN 2.S16
     87 qTempS23        QN 3.S16
     88 
     89 dTempP0         DN 18.S16
     90 dTempP1         DN 19.S16
     91 dTempP2         DN 0.S16
     92 
     93 dTempQ0         DN 20.S16
     94 dTempQ1         DN 21.S16
     95 dTempQ2         DN 2.S16
     96 
     97 dTempR0         DN 22.S16
     98 dTempR1         DN 23.S16
     99 dTempR2         DN 4.S16
    100 
    101 dTempS0         DN 24.S16
    102 dTempS1         DN 25.S16
    103 dTempS2         DN 6.S16
    104 
    105 dTempB0         DN 26.S16
    106 dTempC0         DN 27.S16
    107 dTempD0         DN 28.S16
    108 dTempF0         DN 29.S16
    109 
    110 dTempAcc0       DN 0.U16
    111 dTempAcc1       DN 2.U16
    112 dTempAcc2       DN 4.U16
    113 dTempAcc3       DN 6.U16
    114 
    115 dAcc0           DN 0.U8
    116 dAcc1           DN 2.U8
    117 dAcc2           DN 4.U8
    118 dAcc3           DN 6.U8
    119 
    120 qAcc0           QN 0.S32
    121 qAcc1           QN 1.S32
    122 qAcc2           QN 2.S32
    123 qAcc3           QN 3.S32
    124 
    125 qTAcc0          QN 0.U16
    126 qTAcc1          QN 1.U16
    127 qTAcc2          QN 2.U16
    128 qTAcc3          QN 3.U16
    129 
    130 qTmp            QN 4.S16
    131 dTmp            DN 8.S16
    132 
    133         VLD1        qSrcA01, [pSrc], srcStep                 ;// [a0 a1 a2 a3 .. a15]
    134         ADD         r12, pSrc, srcStep, LSL #2
    135         VMOV        dTCoeff5, #5
    136         VMOV        dTCoeff20, #20
    137         VLD1        qSrcF1011, [r12], srcStep
    138         VLD1        qSrcB23, [pSrc], srcStep                 ;// [b0 b1 b2 b3 .. b15]
    139 
    140         VLD1        qSrcG1213, [r12], srcStep
    141         VADDL       qTempP01, dSrcA0, dSrcF10
    142         VLD1        qSrcC45, [pSrc], srcStep                 ;// [c0 c1 c2 c3 .. c15]
    143         VADDL       qTempP23, dSrcA1, dSrcF11
    144         VLD1        qSrcD67, [pSrc], srcStep
    145         VADDL       qTempQ01, dSrcB2, dSrcG12
    146         VLD1        qSrcE89, [pSrc], srcStep
    147 
    148         ;//t0
    149         VMLAL       qTempP01, dSrcC4, dTCoeff20
    150 
    151         VLD1        qSrcH1415, [r12], srcStep
    152 
    153         VMLAL       qTempP23, dSrcC5, dTCoeff20
    154 
    155         VLD1        qSrcI1617, [r12], srcStep                 ;// [i0 i1 i2 i3 .. ]
    156 
    157         VMLAL       qTempP01, dSrcD6, dTCoeff20
    158         VMLAL       qTempQ01, dSrcD6, dTCoeff20
    159         VMLSL       qTempP23, dSrcB3, dTCoeff5
    160 
    161         VADDL       qTempR01, dSrcC4, dSrcH14
    162 
    163         VMLSL       qTempP01, dSrcB2, dTCoeff5
    164 
    165         VADDL       qTempQ23, dSrcB3, dSrcG13
    166 
    167         VMLAL       qTempP23, dSrcD7, dTCoeff20
    168         VMLAL       qTempQ01, dSrcE8, dTCoeff20
    169 
    170         VMLSL       qTempP01, dSrcE8, dTCoeff5
    171         VMLAL       qTempQ23, dSrcD7, dTCoeff20
    172 
    173         VMLSL       qTempP23, dSrcE9, dTCoeff5
    174 
    175         ;//t1
    176 
    177         VMLAL       qTempR01, dSrcE8, dTCoeff20
    178         VMLSL       qTempQ01, dSrcC4, dTCoeff5
    179         VMLSL       qTempQ23, dSrcC5, dTCoeff5
    180         VADDL       qTempR23, dSrcC5, dSrcH15
    181 
    182         VMLAL       qTempR01, dSrcF10, dTCoeff20
    183         VMLSL       qTempQ01, dSrcF10, dTCoeff5
    184         VMLAL       qTempQ23, dSrcE9, dTCoeff20
    185         VMLAL       qTempR23, dSrcE9, dTCoeff20
    186         VADDL       qTempS01, dSrcD6, dSrcI16
    187 
    188 
    189         VMLSL       qTempR01, dSrcD6, dTCoeff5
    190         VMLSL       qTempQ23, dSrcF11, dTCoeff5
    191         VMLSL       qTempR23, dSrcD7, dTCoeff5
    192 
    193         ;//t2
    194         VADDL       qTempS23, dSrcD7, dSrcI17
    195         VMLAL       qTempS01, dSrcF10, dTCoeff20
    196         VMLSL       qTempR01, dSrcG12, dTCoeff5
    197         VMLSL       qTempR23, dSrcG13, dTCoeff5
    198 
    199         VMLAL       qTempS23, dSrcF11, dTCoeff20
    200         VMLAL       qTempS01, dSrcG12, dTCoeff20
    201         VEXT        dTempB0, dTempP0, dTempP1, #1
    202         VMLAL       qTempR23, dSrcF11, dTCoeff20
    203 
    204 
    205         ;//t3
    206         VMLAL       qTempS23, dSrcG13, dTCoeff20
    207         VMLSL       qTempS01, dSrcE8, dTCoeff5
    208         VEXT        dTempC0, dTempP0, dTempP1, #2
    209         VMOV        dCoeff20, #20
    210         VMLSL       qTempS23, dSrcE9, dTCoeff5
    211         VMLSL       qTempS01, dSrcH14, dTCoeff5
    212         VEXT        dTempF0, dTempP1, dTempP2, #1
    213         VEXT        dTempD0, dTempP0, dTempP1, #3
    214         VMLSL       qTempS23, dSrcH15, dTCoeff5
    215 
    216         VADDL       qAcc0, dTempP0, dTempF0
    217         VADD        dTempC0, dTempC0, dTempD0
    218         ;//h
    219         VMOV        dCoeff5, #5
    220 
    221         ;// res0
    222         VADD        dTempB0, dTempB0, dTempP1
    223         VMLAL       qAcc0, dTempC0, dCoeff20
    224         VEXT        dTempC0, dTempQ0, dTempQ1, #2
    225         VEXT        dTempD0, dTempQ0, dTempQ1, #3
    226         VEXT        dTempF0, dTempQ1, dTempQ2, #1
    227         VMLSL       qAcc0, dTempB0, dCoeff5
    228 
    229         ;// res1
    230         VEXT        dTempB0, dTempQ0, dTempQ1, #1
    231         VADDL       qAcc1, dTempQ0, dTempF0
    232         VADD        dTempC0, dTempC0, dTempD0
    233         VADD        dTempB0, dTempB0, dTempQ1
    234         VEXT        dTempD0, dTempR0, dTempR1, #3
    235         VMLAL       qAcc1, dTempC0, dCoeff20
    236         VEXT        dTempF0, dTempR1, dTempR2, #1
    237         VEXT        dTempC0, dTempR0, dTempR1, #2
    238         VEXT        dTmp, dTempR0, dTempR1, #1
    239         VADDL       qAcc2, dTempR0, dTempF0
    240         VMLSL       qAcc1, dTempB0, dCoeff5
    241 ;        VEXT        dTempB0, dTempR0, dTempR1, #1
    242         VADD        dTempC0, dTempC0, dTempD0
    243 
    244         ;// res2
    245         VADD        dTempB0, dTmp, dTempR1
    246         VEXT        dTempD0, dTempS0, dTempS1, #3
    247         VMLAL       qAcc2, dTempC0, dCoeff20
    248 ;        VADD        dTempB0, dTempB0, dTempR1
    249 
    250         ;// res3
    251         VEXT        dTempC0, dTempS0, dTempS1, #2
    252         VEXT        dTempF0, dTempS1, dTempS2, #1
    253         VADD        dTempC0, dTempC0, dTempD0
    254         VEXT        dTmp, dTempS0, dTempS1, #1
    255         VADDL       qAcc3, dTempS0, dTempF0
    256         VMLSL       qAcc2, dTempB0, dCoeff5
    257         VMLAL       qAcc3, dTempC0, dCoeff20
    258         VADD        dTmp, dTmp, dTempS1
    259         VMLSL       qAcc3, dTmp, dCoeff5
    260 
    261         VQRSHRUN    dTempAcc0, qAcc0, #10
    262         VQRSHRUN    dTempAcc1, qAcc1, #10
    263         VQRSHRUN    dTempAcc2, qAcc2, #10
    264         VQRSHRUN    dTempAcc3, qAcc3, #10
    265 
    266         VQMOVN      dAcc0, qTAcc0
    267         VQMOVN      dAcc1, qTAcc1
    268         VQMOVN      dAcc2, qTAcc2
    269         VQMOVN      dAcc3, qTAcc3
    270 
    271         M_END
    272 
    273     ENDIF
    274 
    275 
    276 
    277 
    278 
    279     END
    280 
    281