Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/*******************************************************************************
     19 @* @file
     20 @*  ihevc_deblk_luma_horz.s
     21 @*
     22 @* @brief
     23 @*  contains function definitions for inter prediction  interpolation.
     24 @* functions are coded using neon  intrinsics and can be compiled using
     25 
     26 @* rvct
     27 @*
     28 @* @author
     29 @*  anand s
     30 @*
     31 @* @par list of functions:
     32 @*
     33 @*
     34 @* @remarks
     35 @*  none
     36 @*
     37 @*******************************************************************************/
     38 
     39 .text
     40 .align 4
     41 
     42 
     43 
     44 
     45 .extern gai4_ihevc_qp_table
     46 .extern gai4_ihevc_tc_table
     47 .globl ihevc_deblk_chroma_horz_a9q
     48 
     49 gai4_ihevc_qp_table_addr:
     50 .long gai4_ihevc_qp_table - ulbl1 - 8
     51 
     52 gai4_ihevc_tc_table_addr:
     53 .long gai4_ihevc_tc_table - ulbl2 - 8
     54 
     55 .type ihevc_deblk_chroma_horz_a9q, %function
     56 
     57 ihevc_deblk_chroma_horz_a9q:
     58     push        {r4-r12,lr}
     59     sub         r12,r0,r1
     60     vld1.8      {d0},[r0]
     61     sub         r5,r12,r1
     62     add         r6,r0,r1
     63     add         r1,r2,r3
     64     vmovl.u8    q0,d0
     65     ldr         r10,[sp,#0x28]
     66     vld1.8      {d2},[r12]
     67     add         r2,r1,#1
     68     ldr         r4,[sp,#0x30]
     69     vld1.8      {d4},[r5]
     70     ldr         r8,[sp,#0x34]
     71     vld1.8      {d16},[r6]
     72     ldr         r9,[sp,#0x38]
     73     adds        r1,r10,r2,asr #1
     74     vmovl.u8    q1,d2
     75     ldr         r7,[sp,#0x2c]
     76     ldr         r3,gai4_ihevc_qp_table_addr
     77 ulbl1:
     78     add         r3, r3, pc
     79     bmi         l1.3312
     80     cmp         r1,#0x39
     81     ldrle       r1,[r3,r1,lsl #2]
     82     subgt       r1,r1,#6
     83 l1.3312:
     84     adds        r2,r7,r2,asr #1
     85     vmovl.u8    q2,d4
     86     bmi         l1.3332
     87     cmp         r2,#0x39
     88     ldrle       r2,[r3,r2,lsl #2]
     89     subgt       r2,r2,#6
     90 l1.3332:
     91     add         r1,r1,r4,lsl #1
     92     vsub.i16    q3,q0,q1
     93     add         r3,r1,#2
     94     cmp         r3,#0x35
     95     movgt       r1,#0x35
     96     vshl.i16    q3,q3,#2
     97     vmovl.u8    q8,d16
     98     bgt         l1.3368
     99     adds        r3,r1,#2
    100     addpl       r1,r1,#2
    101     movmi       r1,#0
    102 l1.3368:
    103     ldr         r3,gai4_ihevc_tc_table_addr
    104 ulbl2:
    105     add         r3, r3, pc
    106     vadd.i16    q2,q3,q2
    107     add         r2,r2,r4,lsl #1
    108     vsub.i16    q3,q2,q8
    109     add         r4,r2,#2
    110     ldr         r1,[r3,r1,lsl #2]
    111     cmp         r4,#0x35
    112     movgt       r2,#0x35
    113     bgt         l1.3412
    114     adds        r4,r2,#2
    115     addpl       r2,r2,#2
    116     movmi       r2,#0
    117 l1.3412:
    118 
    119 
    120     ldr         r2,[r3,r2,lsl #2]
    121     cmp         r8,#0
    122     vdup.16     q8,r2
    123     vdup.16     q2,r1
    124     rsb         r1,r1,#0
    125     vrshr.s16   q3,q3,#3
    126     vdup.16     q9,r1
    127     rsb         r1,r2,#0
    128     vzip.16     q2,q8
    129     vdup.16     q10,r1
    130 
    131     vzip.16     q9,q10
    132 
    133     vmin.s16    q8,q3,q2
    134     vmax.s16    q2,q9,q8
    135     vadd.i16    q1,q1,q2
    136     vsub.i16    q0,q0,q2
    137     vqmovun.s16 d2,q1
    138     vqmovun.s16 d0,q0
    139     beq         l1.3528
    140     vst1.8      {d2},[r12]
    141 l1.3528:
    142     cmp         r9,#0
    143     beq         l1.3540
    144     vst1.8      {d0},[r0]
    145 l1.3540:
    146     pop         {r4-r12,pc}
    147 
    148 
    149