1 @/***************************************************************************** 2 @* 3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 @* 5 @* Licensed under the Apache License, Version 2.0 (the "License"); 6 @* you may not use this file except in compliance with the License. 7 @* You may obtain a copy of the License at: 8 @* 9 @* http://www.apache.org/licenses/LICENSE-2.0 10 @* 11 @* Unless required by applicable law or agreed to in writing, software 12 @* distributed under the License is distributed on an "AS IS" BASIS, 13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @* See the License for the specific language governing permissions and 15 @* limitations under the License. 16 @* 17 @*****************************************************************************/ 18 @/** 19 @******************************************************************************* 20 @* @file 21 @* ihevc_intra_pred_chroma_ver_neon.s 22 @* 23 @* @brief 24 @* contains function definitions for intra prediction dc filtering. 25 @* functions are coded using neon intrinsics and can be compiled using 26 27 @* rvct 28 @* 29 @* @author 30 @* yogeswaran rs 31 @* 32 @* @par list of functions: 33 @* 34 @* 35 @* @remarks 36 @* none 37 @* 38 @******************************************************************************* 39 @*/ 40 @/** 41 @******************************************************************************* 42 @* 43 @* @brief 44 @* luma intraprediction filter for dc input 45 @* 46 @* @par description: 47 @* 48 @* @param[in] pu1_ref 49 @* uword8 pointer to the source 50 @* 51 @* @param[out] pu1_dst 52 @* uword8 pointer to the destination 53 @* 54 @* @param[in] src_strd 55 @* integer source stride 56 @* 57 @* @param[in] dst_strd 58 @* integer destination stride 59 @* 60 @* @param[in] nt 61 @* size of tranform block 62 @* 63 @* @param[in] mode 64 @* type of filtering 65 @* 66 @* @returns 67 @* 68 @* @remarks 69 @* none 70 @* 71 @******************************************************************************* 72 @*/ 73 74 @void ihevc_intra_pred_chroma_ver(uword8 *pu1_ref, 75 @ word32 src_strd, 76 @ uword8 *pu1_dst, 77 @ word32 dst_strd, 78 @ word32 nt, 79 @ word32 mode) 80 @**************variables vs registers***************************************** 81 @r0 => *pu1_ref 82 @r1 => src_strd 83 @r2 => *pu1_dst 84 @r3 => dst_strd 85 86 @stack contents from #40 87 @ nt 88 @ mode 89 90 .text 91 .align 4 92 93 94 95 96 .globl ihevc_intra_pred_chroma_ver_a9q 97 98 .type ihevc_intra_pred_chroma_ver_a9q, %function 99 100 ihevc_intra_pred_chroma_ver_a9q: 101 102 stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments 103 104 ldr r4,[sp,#40] @loads nt 105 lsl r5, r4, #2 @4nt 106 107 108 cmp r4, #8 109 beq blk_8 110 blt blk_4 111 112 copy_16: 113 add r5, r5, #2 @2nt+2 114 add r6, r0, r5 @&src[2nt+1] 115 116 add r5, r2, r3 @pu1_dst + dst_strd 117 vld2.8 {d20,d21}, [r6]! @16 loads (col 0:15) 118 add r8, r5, r3 119 120 add r10, r8, r3 121 vld2.8 {d22,d23}, [r6] @16 loads (col 16:31) 122 lsl r11, r3, #2 123 124 add r11, r11, #0xfffffff0 125 126 127 vst2.8 {d20,d21}, [r2]! 128 vst2.8 {d20,d21}, [r5]! 129 vst2.8 {d20,d21}, [r8]! 130 vst2.8 {d20,d21}, [r10]! 131 132 vst2.8 {d22,d23}, [r2], r11 133 vst2.8 {d22,d23}, [r5], r11 134 vst2.8 {d22,d23}, [r8], r11 135 vst2.8 {d22,d23}, [r10], r11 136 137 subs r4, r4, #4 138 139 kernel_copy_16: 140 vst2.8 {d20,d21}, [r2]! 141 vst2.8 {d20,d21}, [r5]! 142 vst2.8 {d20,d21}, [r8]! 143 vst2.8 {d20,d21}, [r10]! 144 145 vst2.8 {d22,d23}, [r2], r11 146 vst2.8 {d22,d23}, [r5], r11 147 vst2.8 {d22,d23}, [r8], r11 148 vst2.8 {d22,d23}, [r10], r11 149 150 subs r4, r4, #4 151 152 153 vst2.8 {d20,d21}, [r2]! 154 vst2.8 {d20,d21}, [r5]! 155 vst2.8 {d20,d21}, [r8]! 156 vst2.8 {d20,d21}, [r10]! 157 158 vst2.8 {d22,d23}, [r2], r11 159 vst2.8 {d22,d23}, [r5], r11 160 vst2.8 {d22,d23}, [r8], r11 161 vst2.8 {d22,d23}, [r10], r11 162 163 subs r4, r4, #4 164 165 vst2.8 {d20,d21}, [r2]! 166 vst2.8 {d20,d21}, [r5]! 167 vst2.8 {d20,d21}, [r8]! 168 vst2.8 {d20,d21}, [r10]! 169 170 vst2.8 {d22,d23}, [r2], r11 171 vst2.8 {d22,d23}, [r5], r11 172 vst2.8 {d22,d23}, [r8], r11 173 vst2.8 {d22,d23}, [r10], r11 174 175 subs r4, r4, #4 176 bne kernel_copy_16 177 178 b end_func 179 180 blk_8: 181 182 add r5, r5, #2 @2nt+2 183 add r6, r0, r5 @&src[2nt+1] 184 185 add r5, r2, r3 @pu1_dst + dst_strd 186 vld2.8 {d20,d21}, [r6]! @16 loads (col 0:15) 187 add r8, r5, r3 188 189 add r10, r8, r3 190 vld2.8 {d22,d23}, [r6] @16 loads (col 16:31) 191 192 lsl r11,r3,#2 193 194 vst2.8 {d20,d21}, [r2],r11 195 vst2.8 {d20,d21}, [r5],r11 196 vst2.8 {d20,d21}, [r8],r11 197 vst2.8 {d20,d21}, [r10],r11 198 199 vst2.8 {d20,d21}, [r2] 200 vst2.8 {d20,d21}, [r5] 201 vst2.8 {d20,d21}, [r8] 202 vst2.8 {d20,d21}, [r10] 203 204 subs r4, r4, #8 205 beq end_func 206 207 blk_4: 208 209 @lsl r5, r4, #2 @4nt 210 add r5, r5, #2 @2nt+2 211 add r6, r0, r5 @&src[2nt+1] 212 213 vld1.8 {d0},[r6] 214 add r5, r2, r3 @pu1_dst + dst_strd 215 216 vst1.8 {d0},[r2] 217 add r8, r5, r3 218 vst1.8 {d0},[r5] 219 add r10, r8, r3 220 vst1.8 {d0},[r8] 221 vst1.8 {d0},[r10] 222 223 224 225 end_func: 226 ldmfd sp!,{r4-r12,r15} @reload the registers from sp 227 228 229 230