Home | History | Annotate | Download | only in arm64
      1 ///*****************************************************************************
      2 //*
      3 //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 //*
      5 //* Licensed under the Apache License, Version 2.0 (the "License");
      6 //* you may not use this file except in compliance with the License.
      7 //* You may obtain a copy of the License at:
      8 //*
      9 //* http://www.apache.org/licenses/LICENSE-2.0
     10 //*
     11 //* Unless required by applicable law or agreed to in writing, software
     12 //* distributed under the License is distributed on an "AS IS" BASIS,
     13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 //* See the License for the specific language governing permissions and
     15 //* limitations under the License.
     16 //*
     17 //*****************************************************************************/
     18 ///**
     19 //*******************************************************************************
     20 //* @file
     21 //*  ihevc_intra_pred_luma_mode_18_34_neon.s
     22 //*
     23 //* @brief
     24 //*  contains function definitions for intra prediction dc filtering.
     25 //* functions are coded using neon  intrinsics and can be compiled using
     26 
     27 //* rvct
     28 //*
     29 //* @author
     30 //*  yogeswaran rs
     31 //*
     32 //* @par list of functions:
     33 //*
     34 //*
     35 //* @remarks
     36 //*  none
     37 //*
     38 //*******************************************************************************
     39 //*/
     40 ///**
     41 //*******************************************************************************
     42 //*
     43 //* @brief
     44 //*    luma intraprediction filter for dc input
     45 //*
     46 //* @par description:
     47 //*
     48 //* @param[in] pu1_ref
     49 //*  uword8 pointer to the source
     50 //*
     51 //* @param[out] pu1_dst
     52 //*  uword8 pointer to the destination
     53 //*
     54 //* @param[in] src_strd
     55 //*  integer source stride
     56 //*
     57 //* @param[in] dst_strd
     58 //*  integer destination stride
     59 //*
     60 //* @param[in] pi1_coeff
     61 //*  word8 pointer to the planar coefficients
     62 //*
     63 //* @param[in] nt
     64 //*  size of tranform block
     65 //*
     66 //* @param[in] mode
     67 //*  type of filtering
     68 //*
     69 //* @returns
     70 //*
     71 //* @remarks
     72 //*  none
     73 //*
     74 //*******************************************************************************
     75 //*/
     76 
     77 //void ihevc_intra_pred_chroma_mode_18_34(uword8 *pu1_ref,
     78 //                                      word32 src_strd,
     79 //                                      uword8 *pu1_dst,
     80 //                                      word32 dst_strd,
     81 //                                      word32 nt,
     82 //                                      word32 mode)
     83 //
     84 //**************variables vs registers*****************************************
     85 //x0 => *pu1_ref
     86 //x1 => src_strd
     87 //x2 => *pu1_dst
     88 //x3 => dst_strd
     89 
     90 //stack contents from #40
     91 //    nt
     92 //    mode
     93 //    pi1_coeff
     94 
     95 .text
     96 .align 4
     97 .include "ihevc_neon_macros.s"
     98 
     99 
    100 
    101 .globl ihevc_intra_pred_chroma_mode_18_34_av8
    102 
    103 .type ihevc_intra_pred_chroma_mode_18_34_av8, %function
    104 
    105 ihevc_intra_pred_chroma_mode_18_34_av8:
    106 
    107     // stmfd sp!, {x4-x12, x14}    //stack stores the values of the arguments
    108 
    109     stp         x19, x20,[sp,#-16]!
    110 
    111 
    112     cmp         x4,#4
    113     beq         mode2_4
    114 
    115     mov         x12,x4
    116     mov         x11,x4
    117     add         x0,x0,x4,lsl #2
    118 
    119     cmp         x5,#0x22
    120     mov         x10,x2
    121 
    122     add         x0,x0,#4
    123 
    124     sub         x20,x0,#4
    125     csel        x0, x20, x0,ne
    126     mov         x20,#2
    127     csel        x6, x20, x6,eq
    128     mov         x20,#-2
    129     csel        x6, x20, x6,ne
    130     mov         x8,x0
    131 
    132 
    133 kernel:
    134 
    135 
    136     ld1         {v0.8b, v1.8b},[x8],x6
    137     st1         {v0.8b, v1.8b},[x10],x3
    138     ld1         {v2.8b, v3.8b},[x8],x6
    139     st1         {v2.8b, v3.8b},[x10],x3
    140     ld1         {v4.8b, v5.8b},[x8],x6
    141     st1         {v4.8b, v5.8b},[x10],x3
    142     ld1         {v6.8b, v7.8b},[x8],x6
    143     st1         {v6.8b, v7.8b},[x10],x3
    144     ld1         {v16.8b, v17.8b},[x8],x6
    145     st1         {v16.8b, v17.8b},[x10],x3
    146     ld1         {v18.8b, v19.8b},[x8],x6
    147     st1         {v18.8b, v19.8b},[x10],x3
    148     ld1         {v20.8b, v21.8b},[x8],x6
    149     st1         {v20.8b, v21.8b},[x10],x3
    150     ld1         {v22.8b, v23.8b},[x8],x6
    151     st1         {v22.8b, v23.8b},[x10],x3
    152 
    153     subs        x12,x12,#8
    154     bne         kernel
    155 
    156     cmp         x11,#16
    157     add         x8,x0,#16
    158     add         x10,x2,#16
    159     sub         x11, x11,#16
    160     mov         x12,#16
    161     beq         kernel
    162     b           end_func
    163 
    164 mode2_4:
    165 
    166     add         x0,x0,#20
    167     cmp         x5,#0x22
    168     sub         x20,x0,#4
    169     csel        x0, x20, x0,ne
    170 
    171     mov         x20,#2
    172     csel        x8, x20, x8,eq
    173     mov         x20,#-2
    174     csel        x8, x20, x8,ne
    175 
    176     ld1         {v0.8b},[x0],x8
    177     st1         {v0.2s},[x2],x3
    178 
    179     ld1         {v0.8b},[x0],x8
    180     st1         {v0.2s},[x2],x3
    181 
    182     ld1         {v0.8b},[x0],x8
    183     st1         {v0.2s},[x2],x3
    184 
    185     ld1         {v0.8b},[x0],x8
    186     st1         {v0.2s},[x2],x3
    187 
    188 end_func:
    189     // ldmfd sp!,{x4-x12,x15}                  //reload the registers from sp
    190     ldp         x19, x20,[sp],#16
    191 
    192     ret
    193 
    194 
    195 
    196 
    197 
    198 
    199