Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/**
     19 @*******************************************************************************
     20 @* @file
     21 @*  ihevc_intra_pred_luma_mode_18_34_neon.s
     22 @*
     23 @* @brief
     24 @*  contains function definitions for intra prediction dc filtering.
     25 @* functions are coded using neon  intrinsics and can be compiled using
     26 
     27 @* rvct
     28 @*
     29 @* @author
     30 @*  yogeswaran rs
     31 @*
     32 @* @par list of functions:
     33 @*
     34 @*
     35 @* @remarks
     36 @*  none
     37 @*
     38 @*******************************************************************************
     39 @*/
     40 @/**
     41 @*******************************************************************************
     42 @*
     43 @* @brief
     44 @*    luma intraprediction filter for dc input
     45 @*
     46 @* @par description:
     47 @*
     48 @* @param[in] pu1_ref
     49 @*  uword8 pointer to the source
     50 @*
     51 @* @param[out] pu1_dst
     52 @*  uword8 pointer to the destination
     53 @*
     54 @* @param[in] src_strd
     55 @*  integer source stride
     56 @*
     57 @* @param[in] dst_strd
     58 @*  integer destination stride
     59 @*
     60 @* @param[in] pi1_coeff
     61 @*  word8 pointer to the planar coefficients
     62 @*
     63 @* @param[in] nt
     64 @*  size of tranform block
     65 @*
     66 @* @param[in] mode
     67 @*  type of filtering
     68 @*
     69 @* @returns
     70 @*
     71 @* @remarks
     72 @*  none
     73 @*
     74 @*******************************************************************************
     75 @*/
     76 
     77 @void ihevc_intra_pred_chroma_mode_18_34(uword8 *pu1_ref,
     78 @                                      word32 src_strd,
     79 @                                      uword8 *pu1_dst,
     80 @                                      word32 dst_strd,
     81 @                                      word32 nt,
     82 @                                      word32 mode)
     83 @
     84 @**************variables vs registers*****************************************
     85 @r0 => *pu1_ref
     86 @r1 => src_strd
     87 @r2 => *pu1_dst
     88 @r3 => dst_strd
     89 
     90 @stack contents from #40
     91 @   nt
     92 @   mode
     93 @   pi1_coeff
     94 
     95 .text
     96 .align 4
     97 
     98 
     99 
    100 
    101 .globl ihevc_intra_pred_chroma_mode_18_34_a9q
    102 
    103 .type ihevc_intra_pred_chroma_mode_18_34_a9q, %function
    104 
    105 ihevc_intra_pred_chroma_mode_18_34_a9q:
    106 
    107     stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
    108 
    109 
    110     ldr         r4,[sp,#40]
    111     ldr         r5,[sp,#44]
    112 
    113     cmp         r4,#4
    114     beq         mode2_4
    115 
    116     mov         r12,r4
    117     mov         r11,r4
    118     add         r0,r0,r4,lsl #2
    119 
    120     cmp         r5,#0x22
    121     mov         r10,r2
    122 
    123     add         r0,r0,#4
    124 
    125     subne       r0,r0,#4
    126     moveq       r6,#2
    127     movne       r6,#-2
    128     mov         r8,r0
    129 
    130 
    131 kernel:
    132 
    133 
    134     vld1.8      {d0,d1},[r8],r6
    135     vst1.8      {d0,d1},[r10],r3
    136     vld1.8      {d2,d3},[r8],r6
    137     vst1.8      {d2,d3},[r10],r3
    138     vld1.8      {d4,d5},[r8],r6
    139     vst1.8      {d4,d5},[r10],r3
    140     vld1.8      {d6,d7},[r8],r6
    141     vst1.8      {d6,d7},[r10],r3
    142     vld1.8      {d8,d9},[r8],r6
    143     vst1.8      {d8,d9},[r10],r3
    144     vld1.8      {d10,d11},[r8],r6
    145     vst1.8      {d10,d11},[r10],r3
    146     vld1.8      {d12,d13},[r8],r6
    147     vst1.8      {d12,d13},[r10],r3
    148     vld1.8      {d14,d15},[r8],r6
    149     vst1.8      {d14,d15},[r10],r3
    150 
    151     subs        r12,r12,#8
    152     bne         kernel
    153 
    154     cmp         r11,#16
    155     add         r8,r0,#16
    156     add         r10,r2,#16
    157     sub         r11,#16
    158     mov         r12,#16
    159     beq         kernel
    160     b           end_func
    161 
    162 mode2_4:
    163 
    164     add         r0,r0,#20
    165     cmp         r5,#0x22
    166     subne       r0,r0,#4
    167 
    168     moveq       r8,#2
    169     movne       r8,#-2
    170 
    171     vld1.8      {d0},[r0],r8
    172     vst1.32     {d0},[r2],r3
    173 
    174     vld1.8      {d0},[r0],r8
    175     vst1.32     {d0},[r2],r3
    176 
    177     vld1.8      {d0},[r0],r8
    178     vst1.32     {d0},[r2],r3
    179 
    180     vld1.8      {d0},[r0],r8
    181     vst1.32     {d0},[r2],r3
    182 
    183 end_func:
    184     ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
    185 
    186 
    187 
    188 
    189 
    190 
    191