Home | History | Annotate | Download | only in src_gcc
      1 /*
      2  * Copyright (C) 2007-2008 ARM Limited
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  */
     17 /*
     18  *
     19  */
     20 
     21     .eabi_attribute 24, 1
     22     .eabi_attribute 25, 1
     23 
     24     .arm
     25     .fpu neon
     26     .text
     27 
     28     .global omxVCM4P10_DequantTransformResidualFromPairAndAdd
     29     .func   omxVCM4P10_DequantTransformResidualFromPairAndAdd
     30 omxVCM4P10_DequantTransformResidualFromPairAndAdd:
     31     PUSH     {r4-r12,lr}
     32     VPUSH    {d8-d9}
     33     SUB      sp,sp,#0x20
     34     ADD      r4,sp,#0
     35     LDR      r5,[sp,#0x64]
     36     MOV      r7,r1
     37     MOV      r8,r2
     38     MOV      r9,r3
     39     CMP      r5,#0
     40     BEQ      L0x114
     41     MOV      r1,r4
     42     BL       armVCM4P10_UnpackBlock4x4  ;//
     43     LDR      r1,[sp,#0x60]
     44     LDR      r11, .LarmVCM4P10_QPModuloTable
     45 P0: ADD      r11, pc
     46     LDR      r10, .LarmVCM4P10_QPDivTable
     47 P1: ADD      r10, pc
     48     LDR      r2, .LarmVCM4P10_VMatrixU16
     49 P2: ADD      r2, pc
     50     LDRSB    r12,[r11,r1]
     51     LDRSB    lr,[r10,r1]
     52     LDR      r10, =0x3020504
     53     LDR      r1, =0x5040100
     54     ADD      r2,r2,r12
     55     VDUP.32  d7,r1
     56     VDUP.32  d9,r10
     57     VDUP.16  d5,lr
     58     VLD1.8   {d6},[r2]
     59     VTBL.8   d8,{d6},d7
     60     VTBL.8   d4,{d6},d9
     61     CMP      r8,#0
     62     VLD1.16  {d0,d1,d2,d3},[r4]
     63     VSHL.U16 d8,d8,d5
     64     VSHL.U16 d4,d4,d5
     65     BEQ      L1
     66     LDRSH    r10,[r8,#0]
     67 L1:
     68     VMUL.I16 d0,d0,d8
     69     VMUL.I16 d1,d1,d4
     70     VMUL.I16 d2,d2,d8
     71     VMUL.I16 d3,d3,d4
     72     VMOVNE.16 d0[0],r10
     73     VTRN.16  d0,d1
     74     VTRN.16  d2,d3
     75     VTRN.32  q0,q1
     76     VMOV.I16 d4,#0
     77     VADD.I16 d5,d0,d2
     78     VSUB.I16 d6,d0,d2
     79     VHADD.S16 d7,d1,d4
     80     VHADD.S16 d8,d3,d4
     81     VSUB.I16 d7,d7,d3
     82     VADD.I16 d8,d1,d8
     83     VADD.I16 d0,d5,d8
     84     VADD.I16 d1,d6,d7
     85     VSUB.I16 d2,d6,d7
     86     VSUB.I16 d3,d5,d8
     87     VTRN.16  d0,d1
     88     VTRN.16  d2,d3
     89     VTRN.32  q0,q1
     90     VADD.I16 d5,d0,d2
     91     VSUB.I16 d6,d0,d2
     92     VHADD.S16 d7,d1,d4
     93     VHADD.S16 d8,d3,d4
     94     VSUB.I16 d7,d7,d3
     95     VADD.I16 d8,d1,d8
     96     VADD.I16 d0,d5,d8
     97     VADD.I16 d1,d6,d7
     98     VSUB.I16 d2,d6,d7
     99     VSUB.I16 d3,d5,d8
    100     VRSHR.S16 d0,d0,#6
    101     VRSHR.S16 d1,d1,#6
    102     VRSHR.S16 d2,d2,#6
    103     VRSHR.S16 d3,d3,#6
    104     B        L0x130
    105 L0x114:
    106     LDRSH    r10,[r8,#0]
    107     ADD      r10,r10,#0x20
    108     ASR      r10,r10,#6
    109     VDUP.16  d0,r10
    110     VDUP.16  d1,r10
    111     VDUP.16  d2,r10
    112     VDUP.16  d3,r10
    113 L0x130:
    114     LDR      r1,[sp,#0x58]
    115     LDR      r10,[sp,#0x5c]
    116     LDR      r3,[r7],r1
    117     LDR      r5,[r7],r1
    118     VMOV     d4,r3,r5
    119     LDR      r3,[r7],r1
    120     LDR      r5,[r7,#0]
    121     VMOV     d5,r3,r5
    122     VADDW.U8 q3,q0,d4
    123     VADDW.U8 q4,q1,d5
    124     VQMOVUN.S16 d0,q3
    125     VQMOVUN.S16 d1,q4
    126     VST1.32  {d0[0]},[r9],r10
    127     VST1.32  {d0[1]},[r9],r10
    128     VST1.32  {d1[0]},[r9],r10
    129     VST1.32  {d1[1]},[r9]
    130     MOV      r0,#0
    131     ADD      sp,sp,#0x20
    132     VPOP     {d8-d9}
    133     POP      {r4-r12,pc}
    134     .endfunc
    135 
    136 .LarmVCM4P10_QPModuloTable:
    137     .word   armVCM4P10_QPModuloTable-(P0+8)
    138 .LarmVCM4P10_QPDivTable:
    139     .word   armVCM4P10_QPDivTable-(P1+8)
    140 .LarmVCM4P10_VMatrixU16:
    141     .word   armVCM4P10_VMatrixU16-(P2+8)
    142 
    143     .end
    144 
    145