Home | History | Annotate | Download | only in arm64
      1 ///*****************************************************************************
      2 //*
      3 //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 //*
      5 //* Licensed under the Apache License, Version 2.0 (the "License");
      6 //* you may not use this file except in compliance with the License.
      7 //* You may obtain a copy of the License at:
      8 //*
      9 //* http://www.apache.org/licenses/LICENSE-2.0
     10 //*
     11 //* Unless required by applicable law or agreed to in writing, software
     12 //* distributed under the License is distributed on an "AS IS" BASIS,
     13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 //* See the License for the specific language governing permissions and
     15 //* limitations under the License.
     16 //*
     17 //*****************************************************************************/
     18 ///*******************************************************************************
     19 //* @file
     20 //*  ihevc_deblk_luma_horz.s
     21 //*
     22 //* @brief
     23 //*  contains function definitions for inter prediction  interpolation.
     24 //* functions are coded using neon  intrinsics and can be compiled using
     25 
     26 //* rvct
     27 //*
     28 //* @author
     29 //*  anand s
     30 //*
     31 //* @par list of functions:
     32 //*
     33 //*
     34 //* @remarks
     35 //*  none
     36 //*
     37 //void ihevc_deblk_chroma_horz(UWORD8 *pu1_src,
     38 //                             WORD32 src_strd,
     39 //                             WORD32 quant_param_p,
     40 //                             WORD32 quant_param_q,
     41 //                             WORD32 qp_offset_u,
     42 //                             WORD32 qp_offset_v,
     43 //                             WORD32 tc_offset_div2,
     44 //                             WORD32 filter_flag_p,
     45 //                             WORD32 filter_flag_q)
     46 //
     47 
     48 .text
     49 .align 4
     50 .include "ihevc_neon_macros.s"
     51 
     52 
     53 
     54 .extern gai4_ihevc_qp_table
     55 .extern gai4_ihevc_tc_table
     56 .globl ihevc_deblk_chroma_horz_av8
     57 
     58 .type ihevc_deblk_chroma_horz_av8, %function
     59 
     60 ihevc_deblk_chroma_horz_av8:
     61     sxtw        x4,w4
     62     sxtw        x5,w5
     63     sxtw        x6,w6
     64     ldr         w9, [sp]
     65     sxtw        x9,w9
     66     push_v_regs
     67     stp         x19, x20,[sp,#-16]!
     68     mov         x10, x4
     69     mov         x8, x7
     70     mov         x7, x5
     71     mov         x4, x6
     72 
     73     sub         x12,x0,x1
     74     ld1         {v0.8b},[x0]
     75     sub         x5,x12,x1
     76     add         x6,x0,x1
     77     add         x1,x2,x3
     78     uxtl        v0.8h, v0.8b
     79     ld1         {v2.8b},[x12]
     80     add         x2,x1,#1
     81     ld1         {v4.8b},[x5]
     82     ld1         {v16.8b},[x6]
     83     adds        x1,x10,x2,asr #1
     84     uxtl        v2.8h, v2.8b
     85     adrp        x3, :got:gai4_ihevc_qp_table
     86     ldr         x3, [x3, #:got_lo12:gai4_ihevc_qp_table]
     87     bmi         l1.3312
     88     cmp         x1,#0x39
     89     bgt         lbl78
     90     ldr         w1, [x3,x1,lsl #2]
     91 lbl78:
     92     sub         x20,x1,#6
     93     csel        x1, x20, x1,gt
     94 l1.3312:
     95     adds        x2,x7,x2,asr #1
     96     uxtl        v4.8h, v4.8b
     97     bmi         l1.3332
     98     cmp         x2,#0x39
     99     bgt         lbl85
    100     ldr         w2, [x3,x2,lsl #2]
    101 lbl85:
    102     sub         x20,x2,#6
    103     csel        x2, x20, x2,gt
    104 l1.3332:
    105     add         x1,x1,x4,lsl #1
    106     sub         v6.8h,  v0.8h ,  v2.8h
    107     add         x3,x1,#2
    108     cmp         x3,#0x35
    109     mov         x20,#0x35
    110     csel        x1, x20, x1,gt
    111     shl         v6.8h, v6.8h,#2
    112     uxtl        v16.8h, v16.8b
    113     bgt         l1.3368
    114     adds        x3,x1,#2
    115     add         x20,x1,#2
    116     csel        x1, x20, x1,pl
    117     mov         x20,#0
    118     csel        x1, x20, x1,mi
    119 l1.3368:
    120     adrp        x3, :got:gai4_ihevc_tc_table
    121     ldr         x3, [x3, #:got_lo12:gai4_ihevc_tc_table]
    122     add         v4.8h,  v6.8h ,  v4.8h
    123     add         x2,x2,x4,lsl #1
    124     sub         v6.8h,  v4.8h ,  v16.8h
    125     add         x4,x2,#2
    126     ldr         w1, [x3,x1,lsl #2]
    127     cmp         x4,#0x35
    128     mov         x20,#0x35
    129     csel        x2, x20, x2,gt
    130     bgt         l1.3412
    131     adds        x4,x2,#2
    132     add         x20,x2,#2
    133     csel        x2, x20, x2,pl
    134     mov         x20,#0
    135     csel        x2, x20, x2,mi
    136 l1.3412:
    137 
    138 
    139     ldr         w2, [x3,x2,lsl #2]
    140     cmp         x8,#0
    141     dup         v31.8h,w2
    142     dup         v30.8h,w1
    143     sub         x20,x1,#0
    144     neg         x1, x20
    145     srshr       v6.8h, v6.8h,#3
    146     dup         v28.8h,w1
    147     sub         x20,x2,#0
    148     neg         x1, x20
    149     zip1        v4.8h, v30.8h, v31.8h
    150     dup         v29.8h,w1
    151 
    152     zip1        v18.8h, v28.8h, v29.8h
    153 
    154     smin        v16.8h,  v6.8h ,  v4.8h
    155     smax        v4.8h,  v18.8h ,  v16.8h
    156     add         v2.8h,  v2.8h ,  v4.8h
    157     sub         v0.8h,  v0.8h ,  v4.8h
    158     sqxtun      v2.8b, v2.8h
    159     sqxtun      v0.8b, v0.8h
    160     beq         l1.3528
    161     st1         {v2.8b},[x12]
    162 l1.3528:
    163     cmp         x9,#0
    164     beq         l1.3540
    165     st1         {v0.8b},[x0]
    166 l1.3540:
    167     ldp         x19, x20,[sp],#16
    168     pop_v_regs
    169     ret
    170 
    171 
    172 
    173 
    174