Home | History | Annotate | Download | only in src_gcc
      1 /*
      2  * Copyright (C) 2007-2008 ARM Limited
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  */
     17 /*
     18  *
     19  */
     20 
     21     .eabi_attribute 24, 1
     22     .eabi_attribute 25, 1
     23 
     24     .arm
     25     .fpu neon
     26     .text
     27     .align 4
     28 
     29 armVCM4P10_pIndexTable8x8:
     30     .word  OMX_VC_CHROMA_DC-(P0+8),    OMX_VC_CHROMA_HOR-(P0+8)
     31     .word  OMX_VC_CHROMA_VERT-(P0+8),  OMX_VC_CHROMA_PLANE-(P0+8)
     32 
     33 armVCM4P10_MultiplierTableChroma8x8:
     34     .hword   3, 2, 1,4
     35     .hword  -3,-2,-1,0
     36     .hword   1, 2, 3,4
     37 
     38     .global omxVCM4P10_PredictIntraChroma_8x8
     39 omxVCM4P10_PredictIntraChroma_8x8:
     40     PUSH     {r4-r10,lr}
     41     VPUSH    {d8-d15}
     42     ADR      r8, armVCM4P10_pIndexTable8x8
     43     LDR      r6,[sp,#0x68]
     44     LDR      r4,[sp,#0x60]
     45     LDR      r5,[sp,#0x64]
     46     LDR      r7,[sp,#0x6c]
     47     LDR      r8,[r8,r6,LSL #2]
     48 P0: ADD      pc,r8
     49 
     50 OMX_VC_CHROMA_DC:
     51     TST      r7,#2
     52     BEQ      L0xe8
     53     ADD      r9,r0,r4
     54     ADD      r10,r4,r4
     55     VLD1.8   {d1[0]},[r0],r10
     56     VLD1.8   {d1[1]},[r9],r10
     57     VLD1.8   {d1[2]},[r0],r10
     58     VLD1.8   {d1[3]},[r9],r10
     59     VLD1.8   {d1[4]},[r0],r10
     60     VLD1.8   {d1[5]},[r9],r10
     61     VLD1.8   {d1[6]},[r0],r10
     62     VLD1.8   {d1[7]},[r9]
     63     TST      r7,#1
     64     BEQ      L0xcc
     65     VLD1.8   {d0},[r1]
     66     MOV      r0,#0
     67     VPADDL.U8 d2,d0
     68     VPADDL.U16 d3,d2
     69     VPADDL.U8 d2,d1
     70     VPADDL.U16 d1,d2
     71     VADD.I32 d2,d3,d1
     72     VRSHR.U32 d2,d2,#3
     73     VRSHR.U32 d3,d3,#2
     74     VRSHR.U32 d1,d1,#2
     75     VMOV.I8  d5,#0xc
     76     VMOV.I8  d6,#0x4
     77     VSHL.I64 d5,d5,#32
     78     VSHR.U64 d6,d6,#32
     79     VADD.I8  d6,d6,d5
     80     VTBL.8   d0,{d2-d3},d5
     81     VTBL.8   d4,{d1-d2},d6
     82 L0x9c:
     83     ADD      r9,r3,r5
     84     ADD      r10,r5,r5
     85     VST1.8   {d0},[r3],r10
     86     VST1.8   {d0},[r9],r10
     87     VST1.8   {d0},[r3],r10
     88     VST1.8   {d0},[r9],r10
     89     VST1.8   {d4},[r3],r10
     90     VST1.8   {d4},[r9],r10
     91     VST1.8   {d4},[r3],r10
     92     VST1.8   {d4},[r9]
     93     VPOP     {d8-d15}
     94     POP      {r4-r10,pc}
     95 L0xcc:
     96     MOV      r0,#0
     97     VPADDL.U8 d2,d1
     98     VPADDL.U16 d1,d2
     99     VRSHR.U32 d1,d1,#2
    100     VDUP.8   d0,d1[0]
    101     VDUP.8   d4,d1[4]
    102     B        L0x9c
    103 L0xe8:
    104     TST      r7,#1
    105     BEQ      L0x114
    106     VLD1.8   {d0},[r1]
    107     MOV      r0,#0
    108     VPADDL.U8 d2,d0
    109     VPADDL.U16 d3,d2
    110     VRSHR.U32 d3,d3,#2
    111     VMOV.I8  d5,#0x4
    112     VSHL.I64 d5,d5,#32
    113     VTBL.8   d0,{d3},d5
    114     B        L0x11c
    115 L0x114:
    116     VMOV.I8  d0,#0x80
    117     MOV      r0,#0
    118 L0x11c:
    119     ADD      r9,r3,r5
    120     ADD      r10,r5,r5
    121     VST1.8   {d0},[r3],r10
    122     VST1.8   {d0},[r9],r10
    123     VST1.8   {d0},[r3],r10
    124     VST1.8   {d0},[r9],r10
    125     VST1.8   {d0},[r3],r10
    126     VST1.8   {d0},[r9],r10
    127     VST1.8   {d0},[r3],r10
    128     VST1.8   {d0},[r9]
    129     VPOP     {d8-d15}
    130     POP      {r4-r10,pc}
    131 OMX_VC_CHROMA_VERT:
    132     VLD1.8   {d0},[r1]
    133     MOV      r0,#0
    134     B        L0x11c
    135 OMX_VC_CHROMA_HOR:
    136     ADD      r9,r0,r4
    137     ADD      r10,r4,r4
    138     VLD1.8   {d0[]},[r0],r10
    139     VLD1.8   {d1[]},[r9],r10
    140     VLD1.8   {d2[]},[r0],r10
    141     VLD1.8   {d3[]},[r9],r10
    142     VLD1.8   {d4[]},[r0],r10
    143     VLD1.8   {d5[]},[r9],r10
    144     VLD1.8   {d6[]},[r0],r10
    145     VLD1.8   {d7[]},[r9]
    146     B        L0x28c
    147 OMX_VC_CHROMA_PLANE:
    148     ADD      r9,r0,r4
    149     ADD      r10,r4,r4
    150     VLD1.8   {d0},[r1]
    151     VLD1.8   {d2[0]},[r2]
    152     VLD1.8   {d1[0]},[r0],r10
    153     VLD1.8   {d1[1]},[r9],r10
    154     VLD1.8   {d1[2]},[r0],r10
    155     VLD1.8   {d1[3]},[r9],r10
    156     VLD1.8   {d1[4]},[r0],r10
    157     VLD1.8   {d1[5]},[r9],r10
    158     VLD1.8   {d1[6]},[r0],r10
    159     VLD1.8   {d1[7]},[r9]
    160     VREV64.8 d3,d0
    161     VSUBL.U8 q3,d3,d2
    162     VSHR.U64 d3,d3,#8
    163     VSUBL.U8 q2,d3,d0
    164     VREV64.8 d3,d1
    165     VSUBL.U8 q7,d3,d2
    166     VSHR.U64 d3,d3,#8
    167     VSUBL.U8 q6,d3,d1
    168     ADR      r2, armVCM4P10_MultiplierTableChroma8x8
    169     VSHL.I64 d4,d4,#16
    170     VEXT.8   d9,d4,d6,#2
    171     VLD1.16  {d10},[r2]!
    172     VSHL.I64 d12,d12,#16
    173     VEXT.8   d16,d12,d14,#2
    174     VMUL.I16 d11,d9,d10
    175     VMUL.I16 d3,d16,d10
    176     VPADD.I16 d3,d11,d3
    177     VPADDL.S16 d3,d3
    178     VSHL.I32 d2,d3,#4
    179     VADD.I32 d3,d3,d2
    180     VLD1.16  {d10,d11},[r2]
    181     VRSHR.S32 d3,d3,#5
    182     VADDL.U8 q0,d0,d1
    183     VDUP.16  q0,d1[3]
    184     VSHL.I16 q0,q0,#4
    185     VDUP.16  q2,d3[0]
    186     VDUP.16  q3,d3[2]
    187     VMUL.I16 q2,q2,q5
    188     VMUL.I16 q3,q3,q5
    189     VADD.I16 q2,q2,q0
    190     VDUP.16  q0,d6[0]
    191     VDUP.16  q1,d6[1]
    192     VDUP.16  q4,d6[2]
    193     VDUP.16  q5,d6[3]
    194     VDUP.16  q6,d7[0]
    195     VDUP.16  q7,d7[1]
    196     VDUP.16  q8,d7[2]
    197     VDUP.16  q9,d7[3]
    198     VADD.I16 q0,q2,q0
    199     VADD.I16 q1,q2,q1
    200     VADD.I16 q4,q2,q4
    201     VADD.I16 q5,q2,q5
    202     VADD.I16 q6,q2,q6
    203     VADD.I16 q7,q2,q7
    204     VADD.I16 q8,q2,q8
    205     VADD.I16 q9,q2,q9
    206     VQRSHRUN.S16 d0,q0,#5
    207     VQRSHRUN.S16 d1,q1,#5
    208     VQRSHRUN.S16 d2,q4,#5
    209     VQRSHRUN.S16 d3,q5,#5
    210     VQRSHRUN.S16 d4,q6,#5
    211     VQRSHRUN.S16 d5,q7,#5
    212     VQRSHRUN.S16 d6,q8,#5
    213     VQRSHRUN.S16 d7,q9,#5
    214 L0x28c:
    215     ADD      r9,r3,r5
    216     ADD      r10,r5,r5
    217     VST1.8   {d0},[r3],r10
    218     VST1.8   {d1},[r9],r10
    219     VST1.8   {d2},[r3],r10
    220     VST1.8   {d3},[r9],r10
    221     VST1.8   {d4},[r3],r10
    222     VST1.8   {d5},[r9],r10
    223     VST1.8   {d6},[r3],r10
    224     VST1.8   {d7},[r9]
    225     MOV      r0,#0
    226     VPOP     {d8-d15}
    227     POP      {r4-r10,pc}
    228 
    229     .end
    230 
    231