Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/*******************************************************************************
     19 @* @file
     20 @*  ihevc_deblk_luma_horz.s
     21 @*
     22 @* @brief
     23 @*  contains function definitions for inter prediction  interpolation.
     24 @* functions are coded using neon  intrinsics and can be compiled using
     25 
     26 @* rvct
     27 @*
     28 @* @author
     29 @*  anand s
     30 @*
     31 @* @par list of functions:
     32 @*
     33 @*
     34 @* @remarks
     35 @*  none
     36 @*
     37 @*******************************************************************************/
     38 
     39 .equ    qp_offset_u_offset,     40
     40 .equ    qp_offset_v_offset,     44
     41 .equ    tc_offset_div2_offset,  48
     42 .equ    filter_p_offset,        52
     43 .equ    filter_q_offset,        56
     44 
     45 .text
     46 .align 4
     47 
     48 
     49 
     50 
     51 .extern gai4_ihevc_qp_table
     52 .extern gai4_ihevc_tc_table
     53 .globl ihevc_deblk_chroma_horz_a9q
     54 
     55 gai4_ihevc_qp_table_addr:
     56 .long gai4_ihevc_qp_table - ulbl1 - 8
     57 
     58 gai4_ihevc_tc_table_addr:
     59 .long gai4_ihevc_tc_table - ulbl2 - 8
     60 
     61 .type ihevc_deblk_chroma_horz_a9q, %function
     62 
     63 ihevc_deblk_chroma_horz_a9q:
     64     push        {r4-r12,lr}
     65     sub         r12,r0,r1
     66     vld1.8      {d0},[r0]
     67     sub         r5,r12,r1
     68     add         r6,r0,r1
     69     add         r1,r2,r3
     70     vmovl.u8    q0,d0
     71     ldr         r10,[sp,#qp_offset_u_offset]
     72     vld1.8      {d2},[r12]
     73     add         r2,r1,#1
     74     ldr         r4,[sp,#tc_offset_div2_offset]
     75     vld1.8      {d4},[r5]
     76     ldr         r8,[sp,#filter_p_offset]
     77     vld1.8      {d16},[r6]
     78     ldr         r9,[sp,#filter_q_offset]
     79     adds        r1,r10,r2,asr #1
     80     vmovl.u8    q1,d2
     81     ldr         r7,[sp,#qp_offset_v_offset]
     82     ldr         r3,gai4_ihevc_qp_table_addr
     83 ulbl1:
     84     add         r3, r3, pc
     85     bmi         l1.3312
     86     cmp         r1,#0x39
     87     ldrle       r1,[r3,r1,lsl #2]
     88     subgt       r1,r1,#6
     89 l1.3312:
     90     adds        r2,r7,r2,asr #1
     91     vmovl.u8    q2,d4
     92     bmi         l1.3332
     93     cmp         r2,#0x39
     94     ldrle       r2,[r3,r2,lsl #2]
     95     subgt       r2,r2,#6
     96 l1.3332:
     97     add         r1,r1,r4,lsl #1
     98     vsub.i16    q3,q0,q1
     99     add         r3,r1,#2
    100     cmp         r3,#0x35
    101     movgt       r1,#0x35
    102     vshl.i16    q3,q3,#2
    103     vmovl.u8    q8,d16
    104     bgt         l1.3368
    105     adds        r3,r1,#2
    106     addpl       r1,r1,#2
    107     movmi       r1,#0
    108 l1.3368:
    109     ldr         r3,gai4_ihevc_tc_table_addr
    110 ulbl2:
    111     add         r3, r3, pc
    112     vadd.i16    q2,q3,q2
    113     add         r2,r2,r4,lsl #1
    114     vsub.i16    q3,q2,q8
    115     add         r4,r2,#2
    116     ldr         r1,[r3,r1,lsl #2]
    117     cmp         r4,#0x35
    118     movgt       r2,#0x35
    119     bgt         l1.3412
    120     adds        r4,r2,#2
    121     addpl       r2,r2,#2
    122     movmi       r2,#0
    123 l1.3412:
    124 
    125 
    126     ldr         r2,[r3,r2,lsl #2]
    127     cmp         r8,#0
    128     vdup.16     q8,r2
    129     vdup.16     q2,r1
    130     rsb         r1,r1,#0
    131     vrshr.s16   q3,q3,#3
    132     vdup.16     q9,r1
    133     rsb         r1,r2,#0
    134     vzip.16     q2,q8
    135     vdup.16     q10,r1
    136 
    137     vzip.16     q9,q10
    138 
    139     vmin.s16    q8,q3,q2
    140     vmax.s16    q2,q9,q8
    141     vadd.i16    q1,q1,q2
    142     vsub.i16    q0,q0,q2
    143     vqmovun.s16 d2,q1
    144     vqmovun.s16 d0,q0
    145     beq         l1.3528
    146     vst1.8      {d2},[r12]
    147 l1.3528:
    148     cmp         r9,#0
    149     beq         l1.3540
    150     vst1.8      {d0},[r0]
    151 l1.3540:
    152     pop         {r4-r12,pc}
    153 
    154 
    155