Home | History | Annotate | Download | only in src_gcc
      1 /*
      2  * Copyright (C) 2007-2008 ARM Limited
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  *
     16  */
     17 /*
     18  *
     19  */
     20 
     21     .eabi_attribute 24, 1
     22     .eabi_attribute 25, 1
     23 
     24     .arm
     25     .fpu neon
     26     .text
     27 
     28     .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I
     29 omxVCM4P10_FilterDeblockingLuma_VerEdge_I:
     30     PUSH     {r4-r12,lr}
     31     VPUSH    {d8-d15}
     32     ADD      r7,r2,#1
     33     ADD      r8,r3,#1
     34     VLD1.8   {d0[]},[r2]
     35     SUB      r0,r0,#4
     36     VLD1.8   {d2[]},[r3]
     37     LDR      r4,[sp,#0x6c]
     38     LDR      r5,[sp,#0x68]
     39     MOV      r6,#0
     40     VMOV.I8  d14,#0
     41     VMOV.I8  d15,#0x1
     42     MOV      r9,#0x11000000
     43     ADD      r11,r1,r1
     44 L0x38:
     45     LDRH     r12,[r4],#4
     46     CMP      r12,#0
     47     BEQ      L0x160
     48     ADD      r10,r0,r1
     49     VLD1.8   {d7},[r0],r11
     50     VLD1.8   {d8},[r10],r11
     51     VLD1.8   {d5},[r0],r11
     52     VZIP.8   d7,d8
     53     VLD1.8   {d10},[r10],r11
     54     VLD1.8   {d6},[r0],r11
     55     VZIP.8   d5,d10
     56     VLD1.8   {d9},[r10],r11
     57     VLD1.8   {d4},[r0],r11
     58     VLD1.8   {d11},[r10],r11
     59     VZIP.8   d6,d9
     60     VZIP.16  d8,d10
     61     VZIP.8   d4,d11
     62     SUB      r0,r0,r1,LSL #3
     63     VZIP.16  d7,d5
     64     VZIP.16  d9,d11
     65     VZIP.16  d6,d4
     66     VTRN.32  d8,d9
     67     VTRN.32  d5,d4
     68     VTRN.32  d10,d11
     69     VTRN.32  d7,d6
     70     VABD.U8  d13,d4,d8
     71     VABD.U8  d12,d5,d4
     72     VABD.U8  d18,d9,d8
     73     VABD.U8  d19,d6,d4
     74     TST      r12,#0xff
     75     VCGT.U8  d16,d0,d13
     76     VMAX.U8  d12,d18,d12
     77     VABD.U8  d17,d10,d8
     78     VMOVEQ.32 d16[0],r6
     79     TST      r12,#0xff00
     80     VCGT.U8  d19,d2,d19
     81     VCGT.U8  d12,d2,d12
     82     VMOVEQ.32 d16[1],r6
     83     VCGT.U8  d17,d2,d17
     84     VAND     d16,d16,d12
     85     TST      r12,#4
     86     VAND     d12,d16,d17
     87     VAND     d17,d16,d19
     88     BNE      L0x17c
     89     BL       armVCM4P10_DeblockingLumabSLT4_unsafe
     90     VZIP.8   d7,d6
     91     VZIP.8   d30,d29
     92     VZIP.8   d24,d25
     93     VZIP.8   d10,d11
     94     VZIP.16  d7,d30
     95     ADD      r10,r0,r1
     96     VZIP.16  d24,d10
     97     VZIP.16  d25,d11
     98     VZIP.16  d6,d29
     99     VTRN.32  d7,d24
    100     VTRN.32  d30,d10
    101     VTRN.32  d6,d25
    102     VTRN.32  d29,d11
    103     VST1.8   {d7},[r0],r11
    104     VST1.8   {d24},[r10],r11
    105     VST1.8   {d30},[r0],r11
    106     VST1.8   {d10},[r10],r11
    107     VST1.8   {d6},[r0],r11
    108     VST1.8   {d25},[r10],r11
    109     ADDS     r9,r9,r9
    110     VST1.8   {d29},[r0],r11
    111     ADD      r5,r5,#2
    112     VST1.8   {d11},[r10],r1
    113     SUB      r0,r0,r1,LSL #3
    114     VLD1.8   {d0[]},[r7]
    115     ADD      r0,r0,#4
    116     VLD1.8   {d2[]},[r8]
    117     BCC      L0x38
    118     B        L0x1f0
    119 L0x160:
    120     ADD      r0,r0,#4
    121     ADDS     r9,r9,r9
    122     VLD1.8   {d0[]},[r7]
    123     ADD      r5,r5,#4
    124     VLD1.8   {d2[]},[r8]
    125     BCC      L0x38
    126     B        L0x1f0
    127 L0x17c:
    128     BL       armVCM4P10_DeblockingLumabSGE4_unsafe
    129     VZIP.8   d7,d31
    130     VZIP.8   d30,d29
    131     VZIP.8   d24,d25
    132     VZIP.8   d28,d11
    133     VZIP.16  d7,d30
    134     ADD      r10,r0,r1
    135     VZIP.16  d24,d28
    136     VZIP.16  d25,d11
    137     VZIP.16  d31,d29
    138     VTRN.32  d7,d24
    139     VTRN.32  d30,d28
    140     VTRN.32  d31,d25
    141     VTRN.32  d29,d11
    142     VST1.8   {d7},[r0],r11
    143     VST1.8   {d24},[r10],r11
    144     VST1.8   {d30},[r0],r11
    145     VST1.8   {d28},[r10],r11
    146     VST1.8   {d31},[r0],r11
    147     VST1.8   {d25},[r10],r11
    148     ADDS     r9,r9,r9
    149     VST1.8   {d29},[r0],r11
    150     ADD      r5,r5,#4
    151     VST1.8   {d11},[r10],r11
    152     SUB      r0,r0,r1,LSL #3
    153     VLD1.8   {d0[]},[r7]
    154     ADD      r0,r0,#4
    155     VLD1.8   {d2[]},[r8]
    156     BCC      L0x38
    157 L0x1f0:
    158     SUB      r4,r4,#0xe
    159     SUB      r5,r5,#0xe
    160     SUB      r0,r0,#0x10
    161     VLD1.8   {d0[]},[r2]
    162     ADD      r0,r0,r1,LSL #3
    163     VLD1.8   {d2[]},[r3]
    164     BNE      L0x38
    165     MOV      r0,#0
    166     VPOP     {d8-d15}
    167     POP      {r4-r12,pc}
    168 
    169     .end
    170 
    171