1 ///***************************************************************************** 2 //* 3 //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 //* 5 //* Licensed under the Apache License, Version 2.0 (the "License"); 6 //* you may not use this file except in compliance with the License. 7 //* You may obtain a copy of the License at: 8 //* 9 //* http://www.apache.org/licenses/LICENSE-2.0 10 //* 11 //* Unless required by applicable law or agreed to in writing, software 12 //* distributed under the License is distributed on an "AS IS" BASIS, 13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 //* See the License for the specific language governing permissions and 15 //* limitations under the License. 16 //* 17 //*****************************************************************************/ 18 ///** 19 // ******************************************************************************* 20 // * ,:file 21 // * ihevc_mem_fns_neon.s 22 // * 23 // * ,:brief 24 // * Contains function definitions for memory manipulation 25 // * 26 // * ,:author 27 // * Naveen SR 28 // * 29 // * ,:par List of Functions: 30 // * - ihevc_memcpy() 31 // * - ihevc_memset_mul_8() 32 // * - ihevc_memset_16bit_mul_8() 33 // * 34 // * ,:remarks 35 // * None 36 // * 37 // ******************************************************************************* 38 //*/ 39 40 ///** 41 //******************************************************************************* 42 //* 43 //* ,:brief 44 //* memcpy of a 1d array 45 //* 46 //* ,:par Description: 47 //* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 48 //* 49 //* ,:param[in] pu1_dst 50 //* UWORD8 pointer to the destination 51 //* 52 //* ,:param[in] pu1_src 53 //* UWORD8 pointer to the source 54 //* 55 //* ,:param[in] num_bytes 56 //* number of bytes to copy 57 //* ,:returns 58 //* 59 //* ,:remarks 60 //* None 61 //* 62 //******************************************************************************* 63 //*/ 64 //void ihevc_memcpy_mul_8(UWORD8 *pu1_dst, 65 // UWORD8 *pu1_src, 66 // UWORD8 num_bytes) 67 //**************Variables Vs Registers************************* 68 // x0 => *pu1_dst 69 // x1 => *pu1_src 70 // x2 => num_bytes 71 72 .text 73 .p2align 2 74 75 76 .global ihevc_memcpy_mul_8_av8 77 .type ihevc_memcpy_mul_8_av8, %function 78 79 ihevc_memcpy_mul_8_av8: 80 81 LOOP_NEON_MEMCPY_MUL_8: 82 // Memcpy 8 bytes 83 LD1 {v0.8b},[x1],#8 84 ST1 {v0.8b},[x0],#8 85 86 SUBS x2,x2,#8 87 BNE LOOP_NEON_MEMCPY_MUL_8 88 ret 89 90 91 92 //******************************************************************************* 93 //*/ 94 //void ihevc_memcpy(UWORD8 *pu1_dst, 95 // UWORD8 *pu1_src, 96 // UWORD8 num_bytes) 97 //**************Variables Vs Registers************************* 98 // x0 => *pu1_dst 99 // x1 => *pu1_src 100 // x2 => num_bytes 101 102 103 104 .global ihevc_memcpy_av8 105 .type ihevc_memcpy_av8, %function 106 107 ihevc_memcpy_av8: 108 SUBS x2,x2,#8 109 BLT ARM_MEMCPY 110 LOOP_NEON_MEMCPY: 111 // Memcpy 8 bytes 112 LD1 {v0.8b},[x1],#8 113 ST1 {v0.8b},[x0],#8 114 115 SUBS x2,x2,#8 116 BGE LOOP_NEON_MEMCPY 117 CMN x2,#8 118 BEQ MEMCPY_RETURN 119 120 ARM_MEMCPY: 121 ADD x2,x2,#8 122 123 LOOP_ARM_MEMCPY: 124 LDRB w3,[x1],#1 125 STRB w3,[x0],#1 126 SUBS x2,x2,#1 127 BNE LOOP_ARM_MEMCPY 128 MEMCPY_RETURN: 129 ret 130 131 132 133 134 //void ihevc_memset_mul_8(UWORD8 *pu1_dst, 135 // UWORD8 value, 136 // UWORD8 num_bytes) 137 //**************Variables Vs Registers************************* 138 // x0 => *pu1_dst 139 // x1 => value 140 // x2 => num_bytes 141 142 .text 143 .p2align 2 144 145 146 147 .global ihevc_memset_mul_8_av8 148 .type ihevc_memset_mul_8_av8, %function 149 150 ihevc_memset_mul_8_av8: 151 152 // Assumptions: numbytes is either 8, 16 or 32 153 dup v0.8b,w1 154 LOOP_MEMSET_MUL_8: 155 // Memset 8 bytes 156 ST1 {v0.8b},[x0],#8 157 158 SUBS x2,x2,#8 159 BNE LOOP_MEMSET_MUL_8 160 161 ret 162 163 164 165 166 //void ihevc_memset(UWORD8 *pu1_dst, 167 // UWORD8 value, 168 // UWORD8 num_bytes) 169 //**************Variables Vs Registers************************* 170 // x0 => *pu1_dst 171 // x1 => value 172 // x2 => num_bytes 173 174 175 176 .global ihevc_memset_av8 177 .type ihevc_memset_av8, %function 178 179 ihevc_memset_av8: 180 SUBS x2,x2,#8 181 BLT ARM_MEMSET 182 dup v0.8b,w1 183 LOOP_NEON_MEMSET: 184 // Memcpy 8 bytes 185 ST1 {v0.8b},[x0],#8 186 187 SUBS x2,x2,#8 188 BGE LOOP_NEON_MEMSET 189 CMN x2,#8 190 BEQ MEMSET_RETURN 191 192 ARM_MEMSET: 193 ADD x2,x2,#8 194 195 LOOP_ARM_MEMSET: 196 STRB w1,[x0],#1 197 SUBS x2,x2,#1 198 BNE LOOP_ARM_MEMSET 199 200 MEMSET_RETURN: 201 ret 202 203 204 205 206 //void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst, 207 // UWORD16 value, 208 // UWORD8 num_words) 209 //**************Variables Vs Registers************************* 210 // x0 => *pu2_dst 211 // x1 => value 212 // x2 => num_words 213 214 .text 215 .p2align 2 216 217 218 219 .global ihevc_memset_16bit_mul_8_av8 220 .type ihevc_memset_16bit_mul_8_av8, %function 221 222 ihevc_memset_16bit_mul_8_av8: 223 224 // Assumptions: num_words is either 8, 16 or 32 225 226 // Memset 8 words 227 dup v0.8h,w1 228 LOOP_MEMSET_16BIT_MUL_8: 229 ST1 {v0.8h},[x0],#16 230 231 SUBS x2,x2,#8 232 BNE LOOP_MEMSET_16BIT_MUL_8 233 234 ret 235 236 237 238 239 //void ihevc_memset_16bit(UWORD16 *pu2_dst, 240 // UWORD16 value, 241 // UWORD8 num_words) 242 //**************Variables Vs Registers************************* 243 // x0 => *pu2_dst 244 // x1 => value 245 // x2 => num_words 246 247 248 249 .global ihevc_memset_16bit_av8 250 .type ihevc_memset_16bit_av8, %function 251 252 ihevc_memset_16bit_av8: 253 SUBS x2,x2,#8 254 BLT ARM_MEMSET_16BIT 255 dup v0.8h,w1 256 LOOP_NEON_MEMSET_16BIT: 257 // Memset 8 words 258 ST1 {v0.8h},[x0],#16 259 260 SUBS x2,x2,#8 261 BGE LOOP_NEON_MEMSET_16BIT 262 CMN x2,#8 263 BEQ MEMSET_16BIT_RETURN 264 265 ARM_MEMSET_16BIT: 266 ADD x2,x2,#8 267 268 LOOP_ARM_MEMSET_16BIT: 269 STRH w1,[x0],#2 270 SUBS x2,x2,#1 271 BNE LOOP_ARM_MEMSET_16BIT 272 273 MEMSET_16BIT_RETURN: 274 ret 275 276 277 278 279 .section .note.GNU-stack,"",%progbits 280 281