Home | History | Annotate | Download | only in src_gcc
      1 /*
      2  * Copyright (C) 2007-2008 ARM Limited
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  */
     17 /*
     18  *
     19  */
     20 
     21     .eabi_attribute 24, 1
     22     .eabi_attribute 25, 1
     23 
     24     .arm
     25     .fpu neon
     26     .text
     27 
     28     .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I
     29     .func   omxVCM4P10_FilterDeblockingLuma_VerEdge_I
     30 omxVCM4P10_FilterDeblockingLuma_VerEdge_I:
     31     PUSH     {r4-r12,lr}
     32     VPUSH    {d8-d15}
     33     ADD      r7,r2,#1
     34     ADD      r8,r3,#1
     35     VLD1.8   {d0[]},[r2]
     36     SUB      r0,r0,#4
     37     VLD1.8   {d2[]},[r3]
     38     LDR      r4,[sp,#0x6c]
     39     LDR      r5,[sp,#0x68]
     40     MOV      r6,#0
     41     VMOV.I8  d14,#0
     42     VMOV.I8  d15,#0x1
     43     MOV      r9,#0x11000000
     44     ADD      r11,r1,r1
     45 L0x38:
     46     LDRH     r12,[r4],#4
     47     CMP      r12,#0
     48     BEQ      L0x160
     49     ADD      r10,r0,r1
     50     VLD1.8   {d7},[r0],r11
     51     VLD1.8   {d8},[r10],r11
     52     VLD1.8   {d5},[r0],r11
     53     VZIP.8   d7,d8
     54     VLD1.8   {d10},[r10],r11
     55     VLD1.8   {d6},[r0],r11
     56     VZIP.8   d5,d10
     57     VLD1.8   {d9},[r10],r11
     58     VLD1.8   {d4},[r0],r11
     59     VLD1.8   {d11},[r10],r11
     60     VZIP.8   d6,d9
     61     VZIP.16  d8,d10
     62     VZIP.8   d4,d11
     63     SUB      r0,r0,r1,LSL #3
     64     VZIP.16  d7,d5
     65     VZIP.16  d9,d11
     66     VZIP.16  d6,d4
     67     VTRN.32  d8,d9
     68     VTRN.32  d5,d4
     69     VTRN.32  d10,d11
     70     VTRN.32  d7,d6
     71     VABD.U8  d13,d4,d8
     72     VABD.U8  d12,d5,d4
     73     VABD.U8  d18,d9,d8
     74     VABD.U8  d19,d6,d4
     75     TST      r12,#0xff
     76     VCGT.U8  d16,d0,d13
     77     VMAX.U8  d12,d18,d12
     78     VABD.U8  d17,d10,d8
     79     VMOVEQ.32 d16[0],r6
     80     TST      r12,#0xff00
     81     VCGT.U8  d19,d2,d19
     82     VCGT.U8  d12,d2,d12
     83     VMOVEQ.32 d16[1],r6
     84     VCGT.U8  d17,d2,d17
     85     VAND     d16,d16,d12
     86     TST      r12,#4
     87     VAND     d12,d16,d17
     88     VAND     d17,d16,d19
     89     BNE      L0x17c
     90     BL       armVCM4P10_DeblockingLumabSLT4_unsafe
     91     VZIP.8   d7,d6
     92     VZIP.8   d30,d29
     93     VZIP.8   d24,d25
     94     VZIP.8   d10,d11
     95     VZIP.16  d7,d30
     96     ADD      r10,r0,r1
     97     VZIP.16  d24,d10
     98     VZIP.16  d25,d11
     99     VZIP.16  d6,d29
    100     VTRN.32  d7,d24
    101     VTRN.32  d30,d10
    102     VTRN.32  d6,d25
    103     VTRN.32  d29,d11
    104     VST1.8   {d7},[r0],r11
    105     VST1.8   {d24},[r10],r11
    106     VST1.8   {d30},[r0],r11
    107     VST1.8   {d10},[r10],r11
    108     VST1.8   {d6},[r0],r11
    109     VST1.8   {d25},[r10],r11
    110     ADDS     r9,r9,r9
    111     VST1.8   {d29},[r0],r11
    112     ADD      r5,r5,#2
    113     VST1.8   {d11},[r10],r1
    114     SUB      r0,r0,r1,LSL #3
    115     VLD1.8   {d0[]},[r7]
    116     ADD      r0,r0,#4
    117     VLD1.8   {d2[]},[r8]
    118     BCC      L0x38
    119     B        L0x1f0
    120 L0x160:
    121     ADD      r0,r0,#4
    122     ADDS     r9,r9,r9
    123     VLD1.8   {d0[]},[r7]
    124     ADD      r5,r5,#4
    125     VLD1.8   {d2[]},[r8]
    126     BCC      L0x38
    127     B        L0x1f0
    128 L0x17c:
    129     BL       armVCM4P10_DeblockingLumabSGE4_unsafe
    130     VZIP.8   d7,d31
    131     VZIP.8   d30,d29
    132     VZIP.8   d24,d25
    133     VZIP.8   d28,d11
    134     VZIP.16  d7,d30
    135     ADD      r10,r0,r1
    136     VZIP.16  d24,d28
    137     VZIP.16  d25,d11
    138     VZIP.16  d31,d29
    139     VTRN.32  d7,d24
    140     VTRN.32  d30,d28
    141     VTRN.32  d31,d25
    142     VTRN.32  d29,d11
    143     VST1.8   {d7},[r0],r11
    144     VST1.8   {d24},[r10],r11
    145     VST1.8   {d30},[r0],r11
    146     VST1.8   {d28},[r10],r11
    147     VST1.8   {d31},[r0],r11
    148     VST1.8   {d25},[r10],r11
    149     ADDS     r9,r9,r9
    150     VST1.8   {d29},[r0],r11
    151     ADD      r5,r5,#4
    152     VST1.8   {d11},[r10],r11
    153     SUB      r0,r0,r1,LSL #3
    154     VLD1.8   {d0[]},[r7]
    155     ADD      r0,r0,#4
    156     VLD1.8   {d2[]},[r8]
    157     BCC      L0x38
    158 L0x1f0:
    159     SUB      r4,r4,#0xe
    160     SUB      r5,r5,#0xe
    161     SUB      r0,r0,#0x10
    162     VLD1.8   {d0[]},[r2]
    163     ADD      r0,r0,r1,LSL #3
    164     VLD1.8   {d2[]},[r3]
    165     BNE      L0x38
    166     MOV      r0,#0
    167     VPOP     {d8-d15}
    168     POP      {r4-r12,pc}
    169     .endfunc
    170 
    171     .end
    172 
    173