Home | History | Annotate | Download | only in src_gcc
      1 /*
      2  * Copyright (C) 2007-2008 ARM Limited
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  */
     17 /*
     18  *
     19  */
     20 
     21     .eabi_attribute 24, 1
     22     .eabi_attribute 25, 1
     23 
     24     .arm
     25     .fpu neon
     26     .text
     27     .align 4
     28 
     29 armVCM4P10_pIndexTable8x8:
     30     .word  OMX_VC_CHROMA_DC-(P0+8),    OMX_VC_CHROMA_HOR-(P0+8)
     31     .word  OMX_VC_CHROMA_VERT-(P0+8),  OMX_VC_CHROMA_PLANE-(P0+8)
     32 
     33 armVCM4P10_MultiplierTableChroma8x8:
     34     .hword   3, 2, 1,4
     35     .hword  -3,-2,-1,0
     36     .hword   1, 2, 3,4
     37 
     38     .global omxVCM4P10_PredictIntraChroma_8x8
     39     .func   omxVCM4P10_PredictIntraChroma_8x8
     40 omxVCM4P10_PredictIntraChroma_8x8:
     41     PUSH     {r4-r10,lr}
     42     VPUSH    {d8-d15}
     43     ADR      r8, armVCM4P10_pIndexTable8x8
     44     LDR      r6,[sp,#0x68]
     45     LDR      r4,[sp,#0x60]
     46     LDR      r5,[sp,#0x64]
     47     LDR      r7,[sp,#0x6c]
     48     LDR      r8,[r8,r6,LSL #2]
     49 P0: ADD      pc,r8
     50 
     51 OMX_VC_CHROMA_DC:
     52     TST      r7,#2
     53     BEQ      L0xe8
     54     ADD      r9,r0,r4
     55     ADD      r10,r4,r4
     56     VLD1.8   {d1[0]},[r0],r10
     57     VLD1.8   {d1[1]},[r9],r10
     58     VLD1.8   {d1[2]},[r0],r10
     59     VLD1.8   {d1[3]},[r9],r10
     60     VLD1.8   {d1[4]},[r0],r10
     61     VLD1.8   {d1[5]},[r9],r10
     62     VLD1.8   {d1[6]},[r0],r10
     63     VLD1.8   {d1[7]},[r9]
     64     TST      r7,#1
     65     BEQ      L0xcc
     66     VLD1.8   {d0},[r1]
     67     MOV      r0,#0
     68     VPADDL.U8 d2,d0
     69     VPADDL.U16 d3,d2
     70     VPADDL.U8 d2,d1
     71     VPADDL.U16 d1,d2
     72     VADD.I32 d2,d3,d1
     73     VRSHR.U32 d2,d2,#3
     74     VRSHR.U32 d3,d3,#2
     75     VRSHR.U32 d1,d1,#2
     76     VMOV.I8  d5,#0xc
     77     VMOV.I8  d6,#0x4
     78     VSHL.I64 d5,d5,#32
     79     VSHR.U64 d6,d6,#32
     80     VADD.I8  d6,d6,d5
     81     VTBL.8   d0,{d2-d3},d5
     82     VTBL.8   d4,{d1-d2},d6
     83 L0x9c:
     84     ADD      r9,r3,r5
     85     ADD      r10,r5,r5
     86     VST1.8   {d0},[r3],r10
     87     VST1.8   {d0},[r9],r10
     88     VST1.8   {d0},[r3],r10
     89     VST1.8   {d0},[r9],r10
     90     VST1.8   {d4},[r3],r10
     91     VST1.8   {d4},[r9],r10
     92     VST1.8   {d4},[r3],r10
     93     VST1.8   {d4},[r9]
     94     VPOP     {d8-d15}
     95     POP      {r4-r10,pc}
     96 L0xcc:
     97     MOV      r0,#0
     98     VPADDL.U8 d2,d1
     99     VPADDL.U16 d1,d2
    100     VRSHR.U32 d1,d1,#2
    101     VDUP.8   d0,d1[0]
    102     VDUP.8   d4,d1[4]
    103     B        L0x9c
    104 L0xe8:
    105     TST      r7,#1
    106     BEQ      L0x114
    107     VLD1.8   {d0},[r1]
    108     MOV      r0,#0
    109     VPADDL.U8 d2,d0
    110     VPADDL.U16 d3,d2
    111     VRSHR.U32 d3,d3,#2
    112     VMOV.I8  d5,#0x4
    113     VSHL.I64 d5,d5,#32
    114     VTBL.8   d0,{d3},d5
    115     B        L0x11c
    116 L0x114:
    117     VMOV.I8  d0,#0x80
    118     MOV      r0,#0
    119 L0x11c:
    120     ADD      r9,r3,r5
    121     ADD      r10,r5,r5
    122     VST1.8   {d0},[r3],r10
    123     VST1.8   {d0},[r9],r10
    124     VST1.8   {d0},[r3],r10
    125     VST1.8   {d0},[r9],r10
    126     VST1.8   {d0},[r3],r10
    127     VST1.8   {d0},[r9],r10
    128     VST1.8   {d0},[r3],r10
    129     VST1.8   {d0},[r9]
    130     VPOP     {d8-d15}
    131     POP      {r4-r10,pc}
    132 OMX_VC_CHROMA_VERT:
    133     VLD1.8   {d0},[r1]
    134     MOV      r0,#0
    135     B        L0x11c
    136 OMX_VC_CHROMA_HOR:
    137     ADD      r9,r0,r4
    138     ADD      r10,r4,r4
    139     VLD1.8   {d0[]},[r0],r10
    140     VLD1.8   {d1[]},[r9],r10
    141     VLD1.8   {d2[]},[r0],r10
    142     VLD1.8   {d3[]},[r9],r10
    143     VLD1.8   {d4[]},[r0],r10
    144     VLD1.8   {d5[]},[r9],r10
    145     VLD1.8   {d6[]},[r0],r10
    146     VLD1.8   {d7[]},[r9]
    147     B        L0x28c
    148 OMX_VC_CHROMA_PLANE:
    149     ADD      r9,r0,r4
    150     ADD      r10,r4,r4
    151     VLD1.8   {d0},[r1]
    152     VLD1.8   {d2[0]},[r2]
    153     VLD1.8   {d1[0]},[r0],r10
    154     VLD1.8   {d1[1]},[r9],r10
    155     VLD1.8   {d1[2]},[r0],r10
    156     VLD1.8   {d1[3]},[r9],r10
    157     VLD1.8   {d1[4]},[r0],r10
    158     VLD1.8   {d1[5]},[r9],r10
    159     VLD1.8   {d1[6]},[r0],r10
    160     VLD1.8   {d1[7]},[r9]
    161     VREV64.8 d3,d0
    162     VSUBL.U8 q3,d3,d2
    163     VSHR.U64 d3,d3,#8
    164     VSUBL.U8 q2,d3,d0
    165     VREV64.8 d3,d1
    166     VSUBL.U8 q7,d3,d2
    167     VSHR.U64 d3,d3,#8
    168     VSUBL.U8 q6,d3,d1
    169     ADR      r2, armVCM4P10_MultiplierTableChroma8x8
    170     VSHL.I64 d4,d4,#16
    171     VEXT.8   d9,d4,d6,#2
    172     VLD1.16  {d10},[r2]!
    173     VSHL.I64 d12,d12,#16
    174     VEXT.8   d16,d12,d14,#2
    175     VMUL.I16 d11,d9,d10
    176     VMUL.I16 d3,d16,d10
    177     VPADD.I16 d3,d11,d3
    178     VPADDL.S16 d3,d3
    179     VSHL.I32 d2,d3,#4
    180     VADD.I32 d3,d3,d2
    181     VLD1.16  {d10,d11},[r2]
    182     VRSHR.S32 d3,d3,#5
    183     VADDL.U8 q0,d0,d1
    184     VDUP.16  q0,d1[3]
    185     VSHL.I16 q0,q0,#4
    186     VDUP.16  q2,d3[0]
    187     VDUP.16  q3,d3[2]
    188     VMUL.I16 q2,q2,q5
    189     VMUL.I16 q3,q3,q5
    190     VADD.I16 q2,q2,q0
    191     VDUP.16  q0,d6[0]
    192     VDUP.16  q1,d6[1]
    193     VDUP.16  q4,d6[2]
    194     VDUP.16  q5,d6[3]
    195     VDUP.16  q6,d7[0]
    196     VDUP.16  q7,d7[1]
    197     VDUP.16  q8,d7[2]
    198     VDUP.16  q9,d7[3]
    199     VADD.I16 q0,q2,q0
    200     VADD.I16 q1,q2,q1
    201     VADD.I16 q4,q2,q4
    202     VADD.I16 q5,q2,q5
    203     VADD.I16 q6,q2,q6
    204     VADD.I16 q7,q2,q7
    205     VADD.I16 q8,q2,q8
    206     VADD.I16 q9,q2,q9
    207     VQRSHRUN.S16 d0,q0,#5
    208     VQRSHRUN.S16 d1,q1,#5
    209     VQRSHRUN.S16 d2,q4,#5
    210     VQRSHRUN.S16 d3,q5,#5
    211     VQRSHRUN.S16 d4,q6,#5
    212     VQRSHRUN.S16 d5,q7,#5
    213     VQRSHRUN.S16 d6,q8,#5
    214     VQRSHRUN.S16 d7,q9,#5
    215 L0x28c:
    216     ADD      r9,r3,r5
    217     ADD      r10,r5,r5
    218     VST1.8   {d0},[r3],r10
    219     VST1.8   {d1},[r9],r10
    220     VST1.8   {d2},[r3],r10
    221     VST1.8   {d3},[r9],r10
    222     VST1.8   {d4},[r3],r10
    223     VST1.8   {d5},[r9],r10
    224     VST1.8   {d6},[r3],r10
    225     VST1.8   {d7},[r9]
    226     MOV      r0,#0
    227     VPOP     {d8-d15}
    228     POP      {r4-r10,pc}
    229     .endfunc
    230 
    231     .end
    232 
    233