Home | History | Annotate | Download | only in src_gcc
      1 /*
      2  * Copyright (C) 2007-2008 ARM Limited
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  */
     17 /*
     18  *
     19  */
     20 
     21     .eabi_attribute 24, 1
     22     .eabi_attribute 25, 1
     23 
     24     .arm
     25     .fpu neon
     26 
     27     .text
     28     .align 4
     29 
     30 armVCM4P10_WidthBranchTableMVIsNotZero:
     31     .word   WidthIs2MVIsNotZero-(P0+8), WidthIs2MVIsNotZero-(P0+8)
     32     .word   WidthIs4MVIsNotZero-(P0+8), WidthIs4MVIsNotZero-(P0+8)
     33     .word   WidthIs8MVIsNotZero-(P0+8)
     34 
     35 armVCM4P10_WidthBranchTableMVIsZero:
     36     .word   WidthIs2MVIsZero-(P0+8), WidthIs2MVIsZero-(P0+8)
     37     .word   WidthIs4MVIsZero-(P0+8), WidthIs4MVIsZero-(P0+8)
     38     .word   WidthIs8MVIsZero-(P0+8)
     39 
     40     .global armVCM4P10_Interpolate_Chroma
     41 armVCM4P10_Interpolate_Chroma:
     42     PUSH     {r4-r12,lr}
     43     VPUSH    {d8-d15}
     44     LDRD     r6,r7,[sp,#0x70]
     45     LDRD     r4,r5,[sp,#0x68]
     46     RSB      r8,r6,#8
     47     RSB      r9,r7,#8
     48     CMN      r6,r7
     49     MOV      r10,#1
     50     ADREQ    r11, armVCM4P10_WidthBranchTableMVIsZero
     51     SUB      lr,r1,r10
     52     ADRNE    r11, armVCM4P10_WidthBranchTableMVIsNotZero
     53     VLD1.8   {d0},[r0],r10
     54     SMULBB   r12,r8,r9
     55     SMULBB   r9,r6,r9
     56     VLD1.8   {d1},[r0],lr
     57     SMULBB   r8,r8,r7
     58     SMULBB   r6,r6,r7
     59     VDUP.8   d12,r12
     60     VDUP.8   d13,r9
     61     VDUP.8   d14,r8
     62     VDUP.8   d15,r6
     63     LDR      r11,[r11, r4, lsl #1]
     64 P0: ADD      pc,r11
     65 
     66 WidthIs8MVIsNotZero:
     67     VLD1.8   {d2},[r0],r10
     68     VMULL.U8 q2,d0,d12
     69     VLD1.8   {d3},[r0],lr
     70     VMULL.U8 q3,d2,d12
     71     VLD1.8   {d16},[r0],r10
     72     VMLAL.U8 q2,d1,d13
     73     VLD1.8   {d17},[r0],lr
     74     VMULL.U8 q11,d16,d12
     75     VMLAL.U8 q3,d3,d13
     76     VLD1.8   {d18},[r0],r10
     77     VMLAL.U8 q2,d2,d14
     78     VMLAL.U8 q11,d17,d13
     79     VMULL.U8 q12,d18,d12
     80     VLD1.8   {d19},[r0],lr
     81     VMLAL.U8 q3,d16,d14
     82     VLD1.8   {d0},[r0],r10
     83     VMLAL.U8 q12,d19,d13
     84     VMLAL.U8 q11,d18,d14
     85     VMLAL.U8 q2,d3,d15
     86     VLD1.8   {d1},[r0],lr
     87     VMLAL.U8 q12,d0,d14
     88     VMLAL.U8 q3,d17,d15
     89     VMLAL.U8 q11,d19,d15
     90     SUBS     r5,r5,#4
     91     VMLAL.U8 q12,d1,d15
     92     VQRSHRN.U16 d8,q2,#6
     93     VQRSHRN.U16 d9,q3,#6
     94     VQRSHRN.U16 d20,q11,#6
     95     VST1.64  {d8},[r2],r3
     96     VQRSHRN.U16 d21,q12,#6
     97     VST1.64  {d9},[r2],r3
     98     VST1.64  {d20},[r2],r3
     99     VST1.64  {d21},[r2],r3
    100     BGT      WidthIs8MVIsNotZero
    101     MOV      r0,#0
    102     VPOP     {d8-d15}
    103     POP      {r4-r12,pc}
    104 
    105 WidthIs4MVIsNotZero:
    106     VLD1.8   {d2},[r0],r10
    107     VMULL.U8 q2,d0,d12
    108     VMULL.U8 q3,d2,d12
    109     VLD1.8   {d3},[r0],lr
    110     VMLAL.U8 q2,d1,d13
    111     VMLAL.U8 q3,d3,d13
    112     VLD1.8   {d0},[r0],r10
    113     VMLAL.U8 q2,d2,d14
    114     VMLAL.U8 q3,d0,d14
    115     VLD1.8   {d1},[r0],lr
    116     SUBS     r5,r5,#2
    117     VMLAL.U8 q3,d1,d15
    118     VMLAL.U8 q2,d3,d15
    119     VQRSHRN.U16 d9,q3,#6
    120     VQRSHRN.U16 d8,q2,#6
    121     VST1.32  {d8[0]},[r2],r3
    122     VST1.32  {d9[0]},[r2],r3
    123     BGT      WidthIs4MVIsNotZero
    124     MOV      r0,#0
    125     VPOP     {d8-d15}
    126     POP      {r4-r12,pc}
    127 
    128 WidthIs2MVIsNotZero:
    129     VLD1.8   {d2},[r0],r10
    130     VMULL.U8 q2,d0,d12
    131     VMULL.U8 q3,d2,d12
    132     VLD1.8   {d3},[r0],lr
    133     VMLAL.U8 q2,d1,d13
    134     VMLAL.U8 q3,d3,d13
    135     VLD1.8   {d0},[r0],r10
    136     VMLAL.U8 q2,d2,d14
    137     VMLAL.U8 q3,d0,d14
    138     VLD1.8   {d1},[r0],lr
    139     SUBS     r5,r5,#2
    140     VMLAL.U8 q3,d1,d15
    141     VMLAL.U8 q2,d3,d15
    142     VQRSHRN.U16 d9,q3,#6
    143     VQRSHRN.U16 d8,q2,#6
    144     VST1.16  {d8[0]},[r2],r3
    145     VST1.16  {d9[0]},[r2],r3
    146     BGT      WidthIs2MVIsNotZero
    147     MOV      r0,#0
    148     VPOP     {d8-d15}
    149     POP      {r4-r12,pc}
    150 
    151 WidthIs8MVIsZero:
    152     SUB      r0,r0,r1
    153 WidthIs8LoopMVIsZero:
    154     VLD1.8   {d0},[r0],r1
    155     SUBS     r5,r5,#2
    156     VLD1.8   {d1},[r0],r1
    157     VST1.64  {d0},[r2],r3
    158     VST1.64  {d1},[r2],r3
    159     BGT      WidthIs8LoopMVIsZero
    160     MOV      r0,#0
    161     VPOP     {d8-d15}
    162     POP      {r4-r12,pc}
    163 
    164 WidthIs4MVIsZero:
    165     VLD1.8   {d1},[r0],r1
    166     SUBS     r5,r5,#2
    167     VST1.32  {d0[0]},[r2],r3
    168     VLD1.8   {d0},[r0],r1
    169     VST1.32  {d1[0]},[r2],r3
    170     BGT      WidthIs4MVIsZero
    171     MOV      r0,#0
    172     VPOP     {d8-d15}
    173     POP      {r4-r12,pc}
    174 
    175 WidthIs2MVIsZero:
    176     VLD1.8   {d1},[r0],r1
    177     SUBS     r5,r5,#2
    178     VST1.16  {d0[0]},[r2],r3
    179     VLD1.8   {d0},[r0],r1
    180     VST1.16  {d1[0]},[r2],r3
    181     BGT      WidthIs2MVIsZero
    182     MOV      r0,#0
    183     VPOP     {d8-d15}
    184     POP      {r4-r12,pc}
    185 
    186     .end
    187 
    188