Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/**
     19 @*******************************************************************************
     20 @* @file
     21 @*  ihevc_intra_pred_luma_mode_18_34_neon.s
     22 @*
     23 @* @brief
     24 @*  contains function definitions for intra prediction dc filtering.
     25 @* functions are coded using neon  intrinsics and can be compiled using
     26 
     27 @* rvct
     28 @*
     29 @* @author
     30 @*  yogeswaran rs
     31 @*
     32 @* @par list of functions:
     33 @*
     34 @*
     35 @* @remarks
     36 @*  none
     37 @*
     38 @*******************************************************************************
     39 @*/
     40 @/**
     41 @*******************************************************************************
     42 @*
     43 @* @brief
     44 @*    luma intraprediction filter for dc input
     45 @*
     46 @* @par description:
     47 @*
     48 @* @param[in] pu1_ref
     49 @*  uword8 pointer to the source
     50 @*
     51 @* @param[out] pu1_dst
     52 @*  uword8 pointer to the destination
     53 @*
     54 @* @param[in] src_strd
     55 @*  integer source stride
     56 @*
     57 @* @param[in] dst_strd
     58 @*  integer destination stride
     59 @*
     60 @* @param[in] pi1_coeff
     61 @*  word8 pointer to the planar coefficients
     62 @*
     63 @* @param[in] nt
     64 @*  size of tranform block
     65 @*
     66 @* @param[in] mode
     67 @*  type of filtering
     68 @*
     69 @* @returns
     70 @*
     71 @* @remarks
     72 @*  none
     73 @*
     74 @*******************************************************************************
     75 @*/
     76 
     77 @void ihevc_intra_pred_luma_mode_18_34(uword8 *pu1_ref,
     78 @                                      word32 src_strd,
     79 @                                      uword8 *pu1_dst,
     80 @                                      word32 dst_strd,
     81 @                                      word32 nt,
     82 @                                      word32 mode)
     83 @
     84 @**************variables vs registers*****************************************
     85 @r0 => *pu1_ref
     86 @r1 => src_strd
     87 @r2 => *pu1_dst
     88 @r3 => dst_strd
     89 
     90 @stack contents from #40
     91 @   nt
     92 @   mode
     93 @   pi1_coeff
     94 
     95 .text
     96 .align 4
     97 
     98 
     99 
    100 
    101 .globl ihevc_intra_pred_luma_mode_18_34_a9q
    102 
    103 .type ihevc_intra_pred_luma_mode_18_34_a9q, %function
    104 
    105 ihevc_intra_pred_luma_mode_18_34_a9q:
    106 
    107     stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
    108 
    109 
    110     ldr         r4,[sp,#40]
    111     ldr         r5,[sp,#44]
    112 
    113     cmp         r4,#4
    114     beq         mode2_4
    115 
    116     mov         r11,r4
    117     mov         r12,r4
    118     sub         r14,r4,#8
    119 
    120     add         r0,r0,r4,lsl #1
    121 
    122     cmp         r5,#0x22
    123     mov         r10,r2
    124 
    125     add         r0,r0,#2
    126     subne       r0,r0,#2
    127     moveq       r6,#1
    128     movne       r6,#-1
    129     mov         r8,r0
    130 
    131 prologue_cpy_32:
    132 
    133     vld1.8      {d0},[r8],r6
    134     lsr         r1, r4, #3
    135     vld1.8      {d1},[r8],r6
    136     mul         r1, r4, r1
    137     vld1.8      {d2},[r8],r6
    138     vld1.8      {d3},[r8],r6
    139     subs        r1,r1,#8
    140     vld1.8      {d4},[r8],r6
    141     vld1.8      {d5},[r8],r6
    142     vld1.8      {d6},[r8],r6
    143 
    144     vld1.8      {d7},[r8],r6
    145 
    146 
    147     beq         epilogue_mode2
    148     sub         r11,r11,#8
    149 
    150     cmp         r5,#0x22
    151     addne       r0,r0,#8
    152     movne       r8,r0
    153     bne         kernel_mode18
    154     @add        r8,r0,#8
    155 
    156 kernel_mode2:
    157     vst1.8      {d0},[r10],r3
    158     vst1.8      {d1},[r10],r3
    159     subs        r12,r12,#8
    160     vst1.8      {d2},[r10],r3
    161     addne       r2,r2,#8
    162     vst1.8      {d3},[r10],r3
    163 
    164     vld1.8      {d0},[r8],r6
    165     vst1.8      {d4},[r10],r3
    166 
    167     vst1.8      {d5},[r10],r3
    168     vld1.8      {d1},[r8],r6
    169     vst1.8      {d6},[r10],r3
    170     vld1.8      {d2},[r8],r6
    171     vst1.8      {d7},[r10],r3
    172 
    173     vld1.8      {d3},[r8],r6
    174     subeq       r2,r10,r14
    175     vld1.8      {d4},[r8],r6
    176     mov         r10,r2
    177     vld1.8      {d5},[r8],r6
    178     moveq       r12,r4
    179     vld1.8      {d6},[r8],r6
    180     subs        r11,r11,#8
    181 
    182     vld1.8      {d7},[r8],r6
    183 
    184     addeq       r0,r0,#8
    185     moveq       r11,r4
    186     moveq       r8,r0
    187 
    188     subs        r1, r1, #8
    189 
    190     bne         kernel_mode2
    191 
    192     b           epilogue_mode2
    193 
    194 kernel_mode18:
    195     vst1.8      {d0},[r10],r3
    196     vst1.8      {d1},[r10],r3
    197     subs        r12,r12,#8
    198     vst1.8      {d2},[r10],r3
    199     addne       r2,r2,#8
    200     vst1.8      {d3},[r10],r3
    201 
    202     vld1.8      {d0},[r8],r6
    203     vst1.8      {d4},[r10],r3
    204 
    205     vst1.8      {d5},[r10],r3
    206     vld1.8      {d1},[r8],r6
    207 
    208     vst1.8      {d6},[r10],r3
    209     vld1.8      {d2},[r8],r6
    210     vst1.8      {d7},[r10],r3
    211 
    212     vld1.8      {d3},[r8],r6
    213     subeq       r2,r10,r14
    214     vld1.8      {d4},[r8],r6
    215     mov         r10,r2
    216     vld1.8      {d5},[r8],r6
    217     moveq       r12,r4
    218     vld1.8      {d6},[r8],r6
    219     subs        r11,r11,#8
    220     vld1.8      {d7},[r8],r6
    221 
    222     addne       r0,r0,#8
    223     moveq       r11,r4
    224     subeq       r0,r8,r14
    225     subs        r1, r1, #8
    226     mov         r8,r0
    227 
    228     bne         kernel_mode18
    229 
    230 
    231 epilogue_mode2:
    232 
    233     vst1.8      {d0},[r10],r3
    234     vst1.8      {d1},[r10],r3
    235     vst1.8      {d2},[r10],r3
    236     vst1.8      {d3},[r10],r3
    237     vst1.8      {d4},[r10],r3
    238     vst1.8      {d5},[r10],r3
    239     vst1.8      {d6},[r10],r3
    240     vst1.8      {d7},[r10],r3
    241 
    242     b           end_func
    243 
    244 mode2_4:
    245 
    246     add         r0,r0,#10
    247     cmp         r5,#0x22
    248     subne       r0,r0,#2
    249 
    250     moveq       r8,#1
    251     movne       r8,#-1
    252 
    253     vld1.8      {d0},[r0],r8
    254     vst1.32     {d0[0]},[r2],r3
    255 
    256     vld1.8      {d0},[r0],r8
    257     vst1.32     {d0[0]},[r2],r3
    258 
    259     vld1.8      {d0},[r0],r8
    260     vst1.32     {d0[0]},[r2],r3
    261 
    262     vld1.8      {d0},[r0],r8
    263     vst1.32     {d0[0]},[r2],r3
    264 
    265 end_func:
    266     ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
    267 
    268 
    269 
    270 
    271 
    272 
    273 
    274