Home | History | Annotate | Download | only in src_gcc
      1 /*
      2  * Copyright (C) 2007-2008 ARM Limited
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  */
     17 /*
     18  *
     19  */
     20 
     21     .eabi_attribute 24, 1
     22     .eabi_attribute 25, 1
     23 
     24     .arm
     25     .fpu neon
     26     .text
     27 
     28     .global omxVCM4P10_InterpolateLuma
     29     .func   omxVCM4P10_InterpolateLuma
     30 omxVCM4P10_InterpolateLuma:
     31     PUSH     {r4-r12,lr}
     32     VPUSH    {d8-d15}
     33     SUB      sp,sp,#0x10
     34     LDR      r6,[sp,#0x78]
     35     LDR      r7,[sp,#0x7c]
     36     LDR      r5,[sp,#0x80]
     37     LDR      r4,[sp,#0x84]
     38     ADD      r6,r6,r7,LSL #2
     39     ADD      r11,sp,#0
     40     VMOV.I16 d31,#0x14
     41     VMOV.I16 d30,#0x5
     42 L0x2c:
     43     STM      r11,{r0-r3}
     44     ADD      pc,pc,r6,LSL #2
     45     B        L0x3f0
     46     B        L0x78
     47     B        L0xa8
     48     B        L0xdc
     49     B        L0x100
     50     B        L0x134
     51     B        L0x168
     52     B        L0x1a8
     53     B        L0x1f0
     54     B        L0x234
     55     B        L0x258
     56     B        L0x2b0
     57     B        L0x2d8
     58     B        L0x330
     59     B        L0x364
     60     B        L0x3a8
     61     B        L0x3f0
     62 L0x78:
     63     ADD      r12,r0,r1,LSL #1
     64     VLD1.8   {d9},[r0],r1
     65     VLD1.8   {d11},[r12],r1
     66     VLD1.8   {d10},[r0]
     67     VLD1.8   {d12},[r12]
     68     ADD      r12,r2,r3,LSL #1
     69     VST1.32  {d9[0]},[r2],r3
     70     VST1.32  {d11[0]},[r12],r3
     71     VST1.32  {d10[0]},[r2]
     72     VST1.32  {d12[0]},[r12]
     73     ADD      r11,sp,#0
     74     B        L0x434
     75 L0xa8:
     76     SUB      r0,r0,#2
     77     BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
     78     VRHADD.U8 d22,d22,d14
     79     VRHADD.U8 d26,d26,d18
     80     VRHADD.U8 d24,d24,d16
     81     VRHADD.U8 d28,d28,d20
     82     ADD      r12,r2,r3,LSL #1
     83     VST1.32  {d22[0]},[r2],r3
     84     VST1.32  {d26[0]},[r12],r3
     85     VST1.32  {d24[0]},[r2]
     86     VST1.32  {d28[0]},[r12]
     87     ADD      r11,sp,#0
     88     B        L0x434
     89 L0xdc:
     90     SUB      r0,r0,#2
     91     BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
     92     ADD      r12,r2,r3,LSL #1
     93     VST1.32  {d22[0]},[r2],r3
     94     VST1.32  {d26[0]},[r12],r3
     95     VST1.32  {d24[0]},[r2]
     96     VST1.32  {d28[0]},[r12]
     97     ADD      r11,sp,#0
     98     B        L0x434
     99 L0x100:
    100     SUB      r0,r0,#2
    101     BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    102     VRHADD.U8 d22,d22,d15
    103     VRHADD.U8 d26,d26,d19
    104     VRHADD.U8 d24,d24,d17
    105     VRHADD.U8 d28,d28,d21
    106     ADD      r12,r2,r3,LSL #1
    107     VST1.32  {d22[0]},[r2],r3
    108     VST1.32  {d26[0]},[r12],r3
    109     VST1.32  {d24[0]},[r2]
    110     VST1.32  {d28[0]},[r12]
    111     ADD      r11,sp,#0
    112     B        L0x434
    113 L0x134:
    114     SUB      r0,r0,r1,LSL #1
    115     BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    116     VRHADD.U8 d0,d0,d9
    117     VRHADD.U8 d4,d4,d11
    118     VRHADD.U8 d2,d2,d10
    119     VRHADD.U8 d6,d6,d12
    120     ADD      r12,r2,r3,LSL #1
    121     VST1.32  {d0[0]},[r2],r3
    122     VST1.32  {d4[0]},[r12],r3
    123     VST1.32  {d2[0]},[r2]
    124     VST1.32  {d6[0]},[r12]
    125     ADD      r11,sp,#0
    126     B        L0x434
    127 L0x168:
    128     MOV      r8,r0
    129     SUB      r0,r0,r1,LSL #1
    130     BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    131     SUB      r0,r8,#2
    132     BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    133     VRHADD.U8 d22,d22,d0
    134     VRHADD.U8 d26,d26,d4
    135     VRHADD.U8 d24,d24,d2
    136     VRHADD.U8 d28,d28,d6
    137     ADD      r12,r2,r3,LSL #1
    138     VST1.32  {d22[0]},[r2],r3
    139     VST1.32  {d26[0]},[r12],r3
    140     VST1.32  {d24[0]},[r2]
    141     VST1.32  {d28[0]},[r12]
    142     ADD      r11,sp,#0
    143     B        L0x434
    144 L0x1a8:
    145     SUB      r0,r0,r1,LSL #1
    146     SUB      r0,r0,#2
    147     BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    148     VQRSHRUN.S16 d14,q7,#5
    149     VQRSHRUN.S16 d16,q8,#5
    150     VQRSHRUN.S16 d18,q9,#5
    151     VQRSHRUN.S16 d20,q10,#5
    152     VRHADD.U8 d0,d0,d14
    153     VRHADD.U8 d4,d4,d18
    154     VRHADD.U8 d2,d2,d16
    155     VRHADD.U8 d6,d6,d20
    156     ADD      r12,r2,r3,LSL #1
    157     VST1.32  {d0[0]},[r2],r3
    158     VST1.32  {d4[0]},[r12],r3
    159     VST1.32  {d2[0]},[r2]
    160     VST1.32  {d6[0]},[r12]
    161     ADD      r11,sp,#0
    162     B        L0x434
    163 L0x1f0:
    164     MOV      r8,r0
    165     ADD      r0,r0,#1
    166     SUB      r0,r0,r1,LSL #1
    167     BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    168     SUB      r0,r8,#2
    169     BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    170     VRHADD.U8 d22,d22,d0
    171     VRHADD.U8 d26,d26,d4
    172     VRHADD.U8 d24,d24,d2
    173     VRHADD.U8 d28,d28,d6
    174     ADD      r12,r2,r3,LSL #1
    175     VST1.32  {d22[0]},[r2],r3
    176     VST1.32  {d26[0]},[r12],r3
    177     VST1.32  {d24[0]},[r2]
    178     VST1.32  {d28[0]},[r12]
    179     ADD      r11,sp,#0
    180     B        L0x434
    181 L0x234:
    182     SUB      r0,r0,r1,LSL #1
    183     BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    184     ADD      r12,r2,r3,LSL #1
    185     VST1.32  {d0[0]},[r2],r3
    186     VST1.32  {d4[0]},[r12],r3
    187     VST1.32  {d2[0]},[r2]
    188     VST1.32  {d6[0]},[r12]
    189     ADD      r11,sp,#0
    190     B        L0x434
    191 L0x258:
    192     SUB      r0,r0,r1,LSL #1
    193     SUB      r0,r0,#2
    194     BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    195     VEXT.8   d18,d18,d19,#4
    196     VEXT.8   d20,d20,d21,#4
    197     VEXT.8   d22,d22,d23,#4
    198     VEXT.8   d24,d24,d25,#4
    199     VQRSHRUN.S16 d14,q9,#5
    200     VQRSHRUN.S16 d16,q10,#5
    201     VQRSHRUN.S16 d18,q11,#5
    202     VQRSHRUN.S16 d20,q12,#5
    203     VRHADD.U8 d0,d0,d14
    204     VRHADD.U8 d4,d4,d18
    205     VRHADD.U8 d2,d2,d16
    206     VRHADD.U8 d6,d6,d20
    207     ADD      r12,r2,r3,LSL #1
    208     VST1.32  {d0[0]},[r2],r3
    209     VST1.32  {d4[0]},[r12],r3
    210     VST1.32  {d2[0]},[r2]
    211     VST1.32  {d6[0]},[r12]
    212     ADD      r11,sp,#0
    213     B        L0x434
    214 L0x2b0:
    215     SUB      r0,r0,r1,LSL #1
    216     SUB      r0,r0,#2
    217     BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    218     ADD      r12,r2,r3,LSL #1
    219     VST1.32  {d0[0]},[r2],r3
    220     VST1.32  {d4[0]},[r12],r3
    221     VST1.32  {d2[0]},[r2]
    222     VST1.32  {d6[0]},[r12]
    223     ADD      r11,sp,#0
    224     B        L0x434
    225 L0x2d8:
    226     SUB      r0,r0,r1,LSL #1
    227     SUB      r0,r0,#2
    228     BL       armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
    229     VEXT.8   d18,d18,d19,#6
    230     VEXT.8   d20,d20,d21,#6
    231     VEXT.8   d22,d22,d23,#6
    232     VEXT.8   d24,d24,d25,#6
    233     VQRSHRUN.S16 d14,q9,#5
    234     VQRSHRUN.S16 d16,q10,#5
    235     VQRSHRUN.S16 d18,q11,#5
    236     VQRSHRUN.S16 d20,q12,#5
    237     VRHADD.U8 d0,d0,d14
    238     VRHADD.U8 d4,d4,d18
    239     VRHADD.U8 d2,d2,d16
    240     VRHADD.U8 d6,d6,d20
    241     ADD      r12,r2,r3,LSL #1
    242     VST1.32  {d0[0]},[r2],r3
    243     VST1.32  {d4[0]},[r12],r3
    244     VST1.32  {d2[0]},[r2]
    245     VST1.32  {d6[0]},[r12]
    246     ADD      r11,sp,#0
    247     B        L0x434
    248 L0x330:
    249     SUB      r0,r0,r1,LSL #1
    250     BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    251     VRHADD.U8 d0,d0,d10
    252     VRHADD.U8 d4,d4,d12
    253     VRHADD.U8 d2,d2,d11
    254     VRHADD.U8 d6,d6,d13
    255     ADD      r12,r2,r3,LSL #1
    256     VST1.32  {d0[0]},[r2],r3
    257     VST1.32  {d4[0]},[r12],r3
    258     VST1.32  {d2[0]},[r2]
    259     VST1.32  {d6[0]},[r12]
    260     ADD      r11,sp,#0
    261     B        L0x434
    262 L0x364:
    263     MOV      r8,r0
    264     SUB      r0,r0,r1,LSL #1
    265     BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    266     ADD      r0,r8,r1
    267     SUB      r0,r0,#2
    268     BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    269     VRHADD.U8 d22,d22,d0
    270     VRHADD.U8 d26,d26,d4
    271     VRHADD.U8 d24,d24,d2
    272     VRHADD.U8 d28,d28,d6
    273     ADD      r12,r2,r3,LSL #1
    274     VST1.32  {d22[0]},[r2],r3
    275     VST1.32  {d26[0]},[r12],r3
    276     VST1.32  {d24[0]},[r2]
    277     VST1.32  {d28[0]},[r12]
    278     ADD      r11,sp,#0
    279     B        L0x434
    280 L0x3a8:
    281     SUB      r0,r0,r1,LSL #1
    282     SUB      r0,r0,#2
    283     BL       armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    284     VQRSHRUN.S16 d14,q8,#5
    285     VQRSHRUN.S16 d16,q9,#5
    286     VQRSHRUN.S16 d18,q10,#5
    287     VQRSHRUN.S16 d20,q11,#5
    288     VRHADD.U8 d0,d0,d14
    289     VRHADD.U8 d4,d4,d18
    290     VRHADD.U8 d2,d2,d16
    291     VRHADD.U8 d6,d6,d20
    292     ADD      r12,r2,r3,LSL #1
    293     VST1.32  {d0[0]},[r2],r3
    294     VST1.32  {d4[0]},[r12],r3
    295     VST1.32  {d2[0]},[r2]
    296     VST1.32  {d6[0]},[r12]
    297     ADD      r11,sp,#0
    298     B        L0x434
    299 L0x3f0:
    300     MOV      r8,r0
    301     ADD      r0,r0,#1
    302     SUB      r0,r0,r1,LSL #1
    303     BL       armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
    304     ADD      r0,r8,r1
    305     SUB      r0,r0,#2
    306     BL       armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
    307     VRHADD.U8 d22,d22,d0
    308     VRHADD.U8 d26,d26,d4
    309     VRHADD.U8 d24,d24,d2
    310     VRHADD.U8 d28,d28,d6
    311     ADD      r12,r2,r3,LSL #1
    312     VST1.32  {d22[0]},[r2],r3
    313     VST1.32  {d26[0]},[r12],r3
    314     VST1.32  {d24[0]},[r2]
    315     VST1.32  {d28[0]},[r12]
    316     ADD      r11,sp,#0
    317 L0x434:
    318     LDM      r11,{r0-r3}
    319     SUBS     r5,r5,#4
    320     ADD      r0,r0,#4
    321     ADD      r2,r2,#4
    322     BGT      L0x2c
    323     SUBS     r4,r4,#4
    324     LDR      r5,[sp,#0x80]
    325     ADD      r11,sp,#0
    326     ADD      r0,r0,r1,LSL #2
    327     ADD      r2,r2,r3,LSL #2
    328     SUB      r0,r0,r5
    329     SUB      r2,r2,r5
    330     BGT      L0x2c
    331     MOV      r0,#0
    332     ADD      sp,sp,#0x10
    333     VPOP     {d8-d15}
    334     POP      {r4-r12,pc}
    335     .endfunc
    336 
    337     .end
    338 
    339