1 ///***************************************************************************** 2 //* 3 //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 //* 5 //* Licensed under the Apache License, Version 2.0 (the "License"); 6 //* you may not use this file except in compliance with the License. 7 //* You may obtain a copy of the License at: 8 //* 9 //* http://www.apache.org/licenses/LICENSE-2.0 10 //* 11 //* Unless required by applicable law or agreed to in writing, software 12 //* distributed under the License is distributed on an "AS IS" BASIS, 13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 //* See the License for the specific language governing permissions and 15 //* limitations under the License. 16 //* 17 //*****************************************************************************/ 18 ///** 19 //******************************************************************************* 20 //* @file 21 //* ihevc_intra_pred_luma_mode_18_34_neon.s 22 //* 23 //* @brief 24 //* contains function definitions for intra prediction dc filtering. 25 //* functions are coded using neon intrinsics and can be compiled using 26 27 //* rvct 28 //* 29 //* @author 30 //* yogeswaran rs 31 //* 32 //* @par list of functions: 33 //* 34 //* 35 //* @remarks 36 //* none 37 //* 38 //******************************************************************************* 39 //*/ 40 ///** 41 //******************************************************************************* 42 //* 43 //* @brief 44 //* luma intraprediction filter for dc input 45 //* 46 //* @par description: 47 //* 48 //* @param[in] pu1_ref 49 //* uword8 pointer to the source 50 //* 51 //* @param[out] pu1_dst 52 //* uword8 pointer to the destination 53 //* 54 //* @param[in] src_strd 55 //* integer source stride 56 //* 57 //* @param[in] dst_strd 58 //* integer destination stride 59 //* 60 //* @param[in] pi1_coeff 61 //* word8 pointer to the planar coefficients 62 //* 63 //* @param[in] nt 64 //* size of tranform block 65 //* 66 //* @param[in] mode 67 //* type of filtering 68 //* 69 //* @returns 70 //* 71 //* @remarks 72 //* none 73 //* 74 //******************************************************************************* 75 //*/ 76 77 //void ihevc_intra_pred_chroma_mode_18_34(uword8 *pu1_ref, 78 // word32 src_strd, 79 // uword8 *pu1_dst, 80 // word32 dst_strd, 81 // word32 nt, 82 // word32 mode) 83 // 84 //**************variables vs registers***************************************** 85 //x0 => *pu1_ref 86 //x1 => src_strd 87 //x2 => *pu1_dst 88 //x3 => dst_strd 89 90 //stack contents from #40 91 // nt 92 // mode 93 // pi1_coeff 94 95 .text 96 .align 4 97 .include "ihevc_neon_macros.s" 98 99 100 101 .globl ihevc_intra_pred_chroma_mode_18_34_av8 102 103 .type ihevc_intra_pred_chroma_mode_18_34_av8, %function 104 105 ihevc_intra_pred_chroma_mode_18_34_av8: 106 107 // stmfd sp!, {x4-x12, x14} //stack stores the values of the arguments 108 109 stp x19, x20,[sp,#-16]! 110 111 112 cmp x4,#4 113 beq mode2_4 114 115 mov x12,x4 116 mov x11,x4 117 add x0,x0,x4,lsl #2 118 119 cmp x5,#0x22 120 mov x10,x2 121 122 add x0,x0,#4 123 124 sub x20,x0,#4 125 csel x0, x20, x0,ne 126 mov x20,#2 127 csel x6, x20, x6,eq 128 mov x20,#-2 129 csel x6, x20, x6,ne 130 mov x8,x0 131 132 133 kernel: 134 135 136 ld1 {v0.8b, v1.8b},[x8],x6 137 st1 {v0.8b, v1.8b},[x10],x3 138 ld1 {v2.8b, v3.8b},[x8],x6 139 st1 {v2.8b, v3.8b},[x10],x3 140 ld1 {v4.8b, v5.8b},[x8],x6 141 st1 {v4.8b, v5.8b},[x10],x3 142 ld1 {v6.8b, v7.8b},[x8],x6 143 st1 {v6.8b, v7.8b},[x10],x3 144 ld1 {v16.8b, v17.8b},[x8],x6 145 st1 {v16.8b, v17.8b},[x10],x3 146 ld1 {v18.8b, v19.8b},[x8],x6 147 st1 {v18.8b, v19.8b},[x10],x3 148 ld1 {v20.8b, v21.8b},[x8],x6 149 st1 {v20.8b, v21.8b},[x10],x3 150 ld1 {v22.8b, v23.8b},[x8],x6 151 st1 {v22.8b, v23.8b},[x10],x3 152 153 subs x12,x12,#8 154 bne kernel 155 156 cmp x11,#16 157 add x8,x0,#16 158 add x10,x2,#16 159 sub x11, x11,#16 160 mov x12,#16 161 beq kernel 162 b end_func 163 164 mode2_4: 165 166 add x0,x0,#20 167 cmp x5,#0x22 168 sub x20,x0,#4 169 csel x0, x20, x0,ne 170 171 mov x20,#2 172 csel x8, x20, x8,eq 173 mov x20,#-2 174 csel x8, x20, x8,ne 175 176 ld1 {v0.8b},[x0],x8 177 st1 {v0.2s},[x2],x3 178 179 ld1 {v0.8b},[x0],x8 180 st1 {v0.2s},[x2],x3 181 182 ld1 {v0.8b},[x0],x8 183 st1 {v0.2s},[x2],x3 184 185 ld1 {v0.8b},[x0],x8 186 st1 {v0.2s},[x2],x3 187 188 end_func: 189 // ldmfd sp!,{x4-x12,x15} //reload the registers from sp 190 ldp x19, x20,[sp],#16 191 192 ret 193 194 195 196 197 198 199