Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/**
     19 @*******************************************************************************
     20 @* @file
     21 @*  ihevc_intra_pred_luma_mode_18_34_neon.s
     22 @*
     23 @* @brief
     24 @*  contains function definitions for intra prediction dc filtering.
     25 @* functions are coded using neon  intrinsics and can be compiled using
     26 
     27 @* rvct
     28 @*
     29 @* @author
     30 @*  yogeswaran rs
     31 @*
     32 @* @par list of functions:
     33 @*
     34 @*
     35 @* @remarks
     36 @*  none
     37 @*
     38 @*******************************************************************************
     39 @*/
     40 @/**
     41 @*******************************************************************************
     42 @*
     43 @* @brief
     44 @*    luma intraprediction filter for dc input
     45 @*
     46 @* @par description:
     47 @*
     48 @* @param[in] pu1_ref
     49 @*  uword8 pointer to the source
     50 @*
     51 @* @param[out] pu1_dst
     52 @*  uword8 pointer to the destination
     53 @*
     54 @* @param[in] src_strd
     55 @*  integer source stride
     56 @*
     57 @* @param[in] dst_strd
     58 @*  integer destination stride
     59 @*
     60 @* @param[in] pi1_coeff
     61 @*  word8 pointer to the planar coefficients
     62 @*
     63 @* @param[in] nt
     64 @*  size of tranform block
     65 @*
     66 @* @param[in] mode
     67 @*  type of filtering
     68 @*
     69 @* @returns
     70 @*
     71 @* @remarks
     72 @*  none
     73 @*
     74 @*******************************************************************************
     75 @*/
     76 
     77 @void ihevc_intra_pred_chroma_mode_18_34(uword8 *pu1_ref,
     78 @                                      word32 src_strd,
     79 @                                      uword8 *pu1_dst,
     80 @                                      word32 dst_strd,
     81 @                                      word32 nt,
     82 @                                      word32 mode)
     83 @
     84 @**************variables vs registers*****************************************
     85 @r0 => *pu1_ref
     86 @r1 => src_strd
     87 @r2 => *pu1_dst
     88 @r3 => dst_strd
     89 
     90 @stack contents from #104
     91 @   nt
     92 @   mode
     93 @   pi1_coeff
     94 
     95 .equ    nt_offset,          104
     96 .equ    mode_offset,        108
     97 
     98 .text
     99 .align 4
    100 
    101 
    102 
    103 
    104 .globl ihevc_intra_pred_chroma_mode_18_34_a9q
    105 
    106 .type ihevc_intra_pred_chroma_mode_18_34_a9q, %function
    107 
    108 ihevc_intra_pred_chroma_mode_18_34_a9q:
    109 
    110     stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
    111     vpush       {d8 - d15}
    112 
    113     ldr         r4,[sp,#nt_offset]
    114     ldr         r5,[sp,#mode_offset]
    115 
    116     cmp         r4,#4
    117     beq         mode2_4
    118 
    119     mov         r12,r4
    120     mov         r11,r4
    121     add         r0,r0,r4,lsl #2
    122 
    123     cmp         r5,#0x22
    124     mov         r10,r2
    125 
    126     add         r0,r0,#4
    127 
    128     subne       r0,r0,#4
    129     moveq       r6,#2
    130     movne       r6,#-2
    131     mov         r8,r0
    132 
    133 
    134 kernel:
    135 
    136 
    137     vld1.8      {d0,d1},[r8],r6
    138     vst1.8      {d0,d1},[r10],r3
    139     vld1.8      {d2,d3},[r8],r6
    140     vst1.8      {d2,d3},[r10],r3
    141     vld1.8      {d4,d5},[r8],r6
    142     vst1.8      {d4,d5},[r10],r3
    143     vld1.8      {d6,d7},[r8],r6
    144     vst1.8      {d6,d7},[r10],r3
    145     vld1.8      {d8,d9},[r8],r6
    146     vst1.8      {d8,d9},[r10],r3
    147     vld1.8      {d10,d11},[r8],r6
    148     vst1.8      {d10,d11},[r10],r3
    149     vld1.8      {d12,d13},[r8],r6
    150     vst1.8      {d12,d13},[r10],r3
    151     vld1.8      {d14,d15},[r8],r6
    152     vst1.8      {d14,d15},[r10],r3
    153 
    154     subs        r12,r12,#8
    155     bne         kernel
    156 
    157     cmp         r11,#16
    158     add         r8,r0,#16
    159     add         r10,r2,#16
    160     sub         r11,#16
    161     mov         r12,#16
    162     beq         kernel
    163     b           end_func
    164 
    165 mode2_4:
    166 
    167     add         r0,r0,#20
    168     cmp         r5,#0x22
    169     subne       r0,r0,#4
    170 
    171     moveq       r8,#2
    172     movne       r8,#-2
    173 
    174     vld1.8      {d0},[r0],r8
    175     vst1.32     {d0},[r2],r3
    176 
    177     vld1.8      {d0},[r0],r8
    178     vst1.32     {d0},[r2],r3
    179 
    180     vld1.8      {d0},[r0],r8
    181     vst1.32     {d0},[r2],r3
    182 
    183     vld1.8      {d0},[r0],r8
    184     vst1.32     {d0},[r2],r3
    185 
    186 end_func:
    187     vpop        {d8 - d15}
    188     ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
    189 
    190 
    191 
    192 
    193 
    194 
    195