Home | History | Annotate | Download | only in src_gcc
      1 /*
      2  * Copyright (C) 2007-2008 ARM Limited
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  */
     17 /*
     18  *
     19  */
     20 
     21     .eabi_attribute 24, 1
     22     .eabi_attribute 25, 1
     23 
     24     .arm
     25     .fpu neon
     26 
     27     .text
     28     .align 4
     29 
     30 armVCM4P10_pSwitchTable4x4:
     31     .word OMX_VC_4x4_VERT-(P0+8),     OMX_VC_4x4_HOR-(P0+8)
     32     .word OMX_VC_4x4_DC-(P0+8),       OMX_VC_4x4_DIAG_DL-(P0+8)
     33     .word OMX_VC_4x4_DIAG_DR-(P0+8),  OMX_VC_4x4_VR-(P0+8)
     34     .word OMX_VC_4x4_HD-(P0+8),       OMX_VC_4x4_VL-(P0+8)
     35     .word OMX_VC_4x4_HU-(P0+8)
     36 
     37     .global omxVCM4P10_PredictIntra_4x4
     38 omxVCM4P10_PredictIntra_4x4:
     39     PUSH     {r4-r12,lr}
     40     VPUSH    {d8-d12}
     41     ADR      r8, armVCM4P10_pSwitchTable4x4
     42     LDRD     r6,r7,[sp,#0x58]
     43     LDRD     r4,r5,[sp,#0x50]
     44     LDR      r8,[r8,r6,LSL #2]
     45 P0: ADD      pc, r8
     46 
     47 OMX_VC_4x4_HOR:
     48     ADD      r9,r0,r4
     49     ADD      r10,r4,r4
     50     VLD1.8   {d0[]},[r0],r10
     51     VLD1.8   {d1[]},[r9],r10
     52     VLD1.8   {d2[]},[r0]
     53     VLD1.8   {d3[]},[r9]
     54     ADD      r11,r3,r5
     55     ADD      r12,r5,r5
     56     VST1.32  {d0[0]},[r3],r12
     57     VST1.32  {d1[0]},[r11],r12
     58     VST1.32  {d2[0]},[r3]
     59     VST1.32  {d3[0]},[r11]
     60     B        L0x348
     61 OMX_VC_4x4_VERT:
     62     VLD1.32  {d0[0]},[r1]
     63     ADD      r11,r3,r5
     64     ADD      r12,r5,r5
     65 L0x58:
     66     VST1.32  {d0[0]},[r3],r12
     67     VST1.32  {d0[0]},[r11],r12
     68     VST1.32  {d0[0]},[r3]
     69     VST1.32  {d0[0]},[r11]
     70     B        L0x348
     71 OMX_VC_4x4_DC:
     72     TST      r7,#2
     73     BEQ      L0xdc
     74     ADD      r9,r0,r4
     75     ADD      r10,r4,r4
     76     VLD1.8   {d0[0]},[r0],r10
     77     VLD1.8   {d0[1]},[r9],r10
     78     VLD1.8   {d0[2]},[r0]
     79     VLD1.8   {d0[3]},[r9]
     80     TST      r7,#1
     81     BEQ      L0xbc
     82     VLD1.32  {d0[1]},[r1]
     83     MOV      r0,#0
     84     VPADDL.U8 d1,d0
     85     VPADDL.U16 d1,d1
     86     VPADDL.U32 d1,d1
     87     VRSHR.U64 d1,d1,#3
     88     ADD      r11,r3,r5
     89     ADD      r12,r5,r5
     90     VDUP.8   d0,d1[0]
     91     B        L0x58
     92 L0xbc:
     93     MOV      r0,#0
     94     VPADDL.U8 d1,d0
     95     VPADDL.U16 d1,d1
     96     VRSHR.U32 d1,d1,#2
     97     ADD      r11,r3,r5
     98     ADD      r12,r5,r5
     99     VDUP.8   d0,d1[0]
    100     B        L0x58
    101 L0xdc:
    102     TST      r7,#1
    103     BEQ      L0x108
    104     VLD1.32  {d0[0]},[r1]
    105     MOV      r0,#0
    106     VPADDL.U8 d1,d0
    107     VPADDL.U16 d1,d1
    108     VRSHR.U32 d1,d1,#2
    109     ADD      r11,r3,r5
    110     ADD      r12,r5,r5
    111     VDUP.8   d0,d1[0]
    112     B        L0x58
    113 L0x108:
    114     VMOV.I8  d0,#0x80
    115     MOV      r0,#0
    116     ADD      r11,r3,r5
    117     ADD      r12,r5,r5
    118     B        L0x58
    119 OMX_VC_4x4_DIAG_DL:
    120     TST      r7,#0x40
    121     BEQ      L0x138
    122     VLD1.8   {d3},[r1]
    123     VDUP.8   d2,d3[7]
    124     VEXT.8   d4,d3,d2,#1
    125     VEXT.8   d5,d3,d2,#2
    126     B        L0x14c
    127 L0x138:
    128     VLD1.32  {d0[1]},[r1]
    129     VDUP.8   d2,d0[7]
    130     VEXT.8   d3,d0,d2,#4
    131     VEXT.8   d4,d0,d2,#5
    132     VEXT.8   d5,d0,d2,#6
    133 L0x14c:
    134     VHADD.U8 d6,d3,d5
    135     VRHADD.U8 d6,d6,d4
    136     VST1.32  {d6[0]},[r3],r5
    137     VEXT.8   d6,d6,d6,#1
    138     VST1.32  {d6[0]},[r3],r5
    139     VEXT.8   d6,d6,d6,#1
    140     VST1.32  {d6[0]},[r3],r5
    141     VEXT.8   d6,d6,d6,#1
    142     VST1.32  {d6[0]},[r3]
    143     B        L0x348
    144 OMX_VC_4x4_DIAG_DR:
    145     VLD1.32  {d0[0]},[r1]
    146     VLD1.8   {d1[7]},[r2]
    147     ADD      r9,r0,r4
    148     ADD      r10,r4,r4
    149     ADD      r1,r3,r5
    150     VLD1.8   {d1[6]},[r0],r10
    151     VLD1.8   {d1[5]},[r9],r10
    152     VLD1.8   {d1[4]},[r0]
    153     VLD1.8   {d1[3]},[r9]
    154     VEXT.8   d3,d1,d0,#3
    155     ADD      r4,r1,r5
    156     VEXT.8   d4,d1,d0,#4
    157     ADD      r6,r4,r5
    158     VEXT.8   d5,d1,d0,#5
    159     VHADD.U8 d6,d3,d5
    160     VRHADD.U8 d6,d6,d4
    161     VST1.32  {d6[0]},[r6]
    162     VEXT.8   d6,d6,d6,#1
    163     VST1.32  {d6[0]},[r4]
    164     VEXT.8   d6,d6,d6,#1
    165     VST1.32  {d6[0]},[r1]
    166     VEXT.8   d6,d6,d6,#1
    167     VST1.32  {d6[0]},[r3]
    168     B        L0x348
    169 OMX_VC_4x4_VR:
    170     VLD1.32  {d0[0]},[r1]
    171     VLD1.8   {d0[7]},[r2]
    172     VLD1.8   {d1[7]},[r0],r4
    173     VLD1.8   {d2[7]},[r0],r4
    174     VLD1.8   {d1[6]},[r0]
    175     VEXT.8   d12,d0,d0,#7
    176     VEXT.8   d3,d1,d12,#6
    177     VEXT.8   d4,d2,d12,#7
    178     VEXT.8   d5,d1,d0,#7
    179     VEXT.8   d6,d2,d0,#7
    180     VEXT.8   d11,d1,d12,#7
    181     VHADD.U8 d8,d6,d12
    182     VRHADD.U8 d8,d8,d11
    183     VHADD.U8 d7,d3,d5
    184     VRHADD.U8 d7,d7,d4
    185     VEXT.8   d10,d8,d8,#1
    186     ADD      r11,r3,r5
    187     ADD      r12,r5,r5
    188     VEXT.8   d9,d7,d7,#1
    189     VST1.32  {d10[0]},[r3],r12
    190     VST1.32  {d9[0]},[r11],r12
    191     VST1.32  {d8[0]},[r3],r12
    192     VST1.32  {d7[0]},[r11]
    193     B        L0x348
    194 OMX_VC_4x4_HD:
    195     VLD1.8   {d0},[r1]
    196     VLD1.8   {d1[7]},[r2]
    197     ADD      r9,r0,r4
    198     ADD      r10,r4,r4
    199     VLD1.8   {d1[6]},[r0],r10
    200     VLD1.8   {d1[5]},[r9],r10
    201     VLD1.8   {d1[4]},[r0]
    202     VLD1.8   {d1[3]},[r9]
    203     VEXT.8   d3,d1,d0,#3
    204     VEXT.8   d4,d1,d0,#2
    205     VEXT.8   d5,d1,d0,#1
    206     VHADD.U8 d7,d3,d5
    207     VRHADD.U8 d7,d7,d4
    208     VRHADD.U8 d8,d4,d3
    209     VSHL.I64 d8,d8,#24
    210     VSHL.I64 d6,d7,#16
    211     VZIP.8   d8,d6
    212     VEXT.8   d7,d7,d7,#6
    213     VEXT.8   d8,d6,d7,#2
    214     ADD      r11,r3,r5
    215     ADD      r12,r5,r5
    216     VST1.32  {d8[1]},[r3],r12
    217     VST1.32  {d6[1]},[r11],r12
    218     VST1.32  {d8[0]},[r3]
    219     VST1.32  {d6[0]},[r11]
    220     B        L0x348
    221 OMX_VC_4x4_VL:
    222     TST      r7,#0x40
    223     BEQ      L0x2b4
    224     VLD1.8   {d3},[r1]
    225     VEXT.8   d4,d3,d3,#1
    226     VEXT.8   d5,d4,d4,#1
    227     B        L0x2c8
    228 L0x2b4:
    229     VLD1.32  {d0[1]},[r1]
    230     VDUP.8   d2,d0[7]
    231     VEXT.8   d3,d0,d2,#4
    232     VEXT.8   d4,d0,d2,#5
    233     VEXT.8   d5,d0,d2,#6
    234 L0x2c8:
    235     VRHADD.U8 d7,d4,d3
    236     VHADD.U8 d10,d3,d5
    237     VRHADD.U8 d10,d10,d4
    238     VEXT.8   d8,d7,d7,#1
    239     ADD      r11,r3,r5
    240     ADD      r12,r5,r5
    241     VEXT.8   d9,d10,d8,#1
    242     VST1.32  {d7[0]},[r3],r12
    243     VST1.32  {d10[0]},[r11],r12
    244     VST1.32  {d8[0]},[r3]
    245     VST1.32  {d9[0]},[r11]
    246     B        L0x348
    247 OMX_VC_4x4_HU:
    248     ADD      r9,r0,r4
    249     ADD      r10,r4,r4
    250     VLD1.8   {d1[4]},[r0],r10
    251     VLD1.8   {d1[5]},[r9],r10
    252     VLD1.8   {d1[6]},[r0]
    253     VLD1.8   {d1[7]},[r9]
    254     VDUP.8   d2,d1[7]
    255     VEXT.8   d3,d1,d2,#4
    256     VEXT.8   d4,d1,d2,#5
    257     VEXT.8   d5,d1,d2,#6
    258     VHADD.U8 d7,d3,d5
    259     VRHADD.U8 d7,d7,d4
    260     VRHADD.U8 d8,d4,d3
    261     VZIP.8   d8,d7
    262     VST1.32  {d8[0]},[r3],r5
    263     VEXT.8   d8,d8,d8,#2
    264     VST1.32  {d8[0]},[r3],r5
    265     VEXT.8   d8,d8,d8,#2
    266     VST1.32  {d8[0]},[r3],r5
    267     VST1.32  {d7[0]},[r3]
    268 L0x348:
    269     MOV      r0,#0
    270     VPOP     {d8-d12}
    271     POP      {r4-r12,pc}
    272 
    273     .end
    274