1 //****************************************************************************** 2 //* 3 //* Copyright (C) 2015 The Android Open Source Project 4 //* 5 //* Licensed under the Apache License, Version 2.0 (the "License"); 6 //* you may not use this file except in compliance with the License. 7 //* You may obtain a copy of the License at: 8 //* 9 //* http://www.apache.org/licenses/LICENSE-2.0 10 //* 11 //* Unless required by applicable law or agreed to in writing, software 12 //* distributed under the License is distributed on an "AS IS" BASIS, 13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 //* See the License for the specific language governing permissions and 15 //* limitations under the License. 16 //* 17 //***************************************************************************** 18 //* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 //*/ 20 ///** 21 // ******************************************************************************* 22 // * @file 23 // * ih264_mem_fns_neon.s 24 // * 25 // * @brief 26 // * Contains function definitions for memory manipulation 27 // * 28 // * @author 29 // * Naveen SR 30 // * 31 // * @par List of Functions: 32 // * - ih264_memcpy_av8() 33 // * - ih264_memcpy_mul_8_av8() 34 // * - ih264_memset_mul_8_av8() 35 // * - ih264_memset_16bit_mul_8_av8() 36 // * - ih264_memset_16bit_av8() 37 // * 38 // * @remarks 39 // * None 40 // * 41 // ******************************************************************************* 42 //*/ 43 44 .text 45 .p2align 2 46 .include "ih264_neon_macros.s" 47 ///** 48 //******************************************************************************* 49 //* 50 //* @brief 51 //* memcpy of a 1d array 52 //* 53 //* @par Description: 54 //* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 55 //* 56 //* @param[in] pu1_dst 57 //* UWORD8 pointer to the destination 58 //* 59 //* @param[in] pu1_src 60 //* UWORD8 pointer to the source 61 //* 62 //* @param[in] num_bytes 63 //* number of bytes to copy 64 //* @returns 65 //* 66 //* @remarks 67 //* None 68 //* 69 //******************************************************************************* 70 //*/ 71 //void ih264_memcpy_mul_8(UWORD8 *pu1_dst, 72 // UWORD8 *pu1_src, 73 // UWORD8 num_bytes) 74 //**************Variables Vs Registers************************* 75 // x0 => *pu1_dst 76 // x1 => *pu1_src 77 // x2 => num_bytes 78 79 80 81 82 83 .global ih264_memcpy_mul_8_av8 84 85 ih264_memcpy_mul_8_av8: 86 87 loop_neon_memcpy_mul_8: 88 // Memcpy 8 bytes 89 ld1 {v0.8b}, [x1], #8 90 st1 {v0.8b}, [x0], #8 91 92 subs x2, x2, #8 93 bne loop_neon_memcpy_mul_8 94 ret 95 96 97 98 //******************************************************************************* 99 //*/ 100 //void ih264_memcpy(UWORD8 *pu1_dst, 101 // UWORD8 *pu1_src, 102 // UWORD8 num_bytes) 103 //**************Variables Vs Registers************************* 104 // x0 => *pu1_dst 105 // x1 => *pu1_src 106 // x2 => num_bytes 107 108 109 110 .global ih264_memcpy_av8 111 112 ih264_memcpy_av8: 113 subs x2, x2, #8 114 blt arm_memcpy 115 loop_neon_memcpy: 116 // Memcpy 8 bytes 117 ld1 {v0.8b}, [x1], #8 118 st1 {v0.8b}, [x0], #8 119 120 subs x2, x2, #8 121 bge loop_neon_memcpy 122 cmn x2, #8 123 beq end_func1 124 125 arm_memcpy: 126 add x2, x2, #8 127 128 loop_arm_memcpy: 129 ldrb w3, [x1], #1 130 sxtw x3, w3 131 strb w3, [x0], #1 132 sxtw x3, w3 133 subs x2, x2, #1 134 bne loop_arm_memcpy 135 ret 136 end_func1: 137 ret 138 139 140 //void ih264_memset_mul_8(UWORD8 *pu1_dst, 141 // UWORD8 value, 142 // UWORD8 num_bytes) 143 //**************Variables Vs Registers************************* 144 // x0 => *pu1_dst 145 // x1 => value 146 // x2 => num_bytes 147 148 149 .global ih264_memset_mul_8_av8 150 151 ih264_memset_mul_8_av8: 152 153 // Assumptions: numbytes is either 8, 16 or 32 154 dup v0.8b, w1 155 loop_memset_mul_8: 156 // Memset 8 bytes 157 st1 {v0.8b}, [x0], #8 158 159 subs x2, x2, #8 160 bne loop_memset_mul_8 161 162 ret 163 164 165 //void ih264_memset(UWORD8 *pu1_dst, 166 // UWORD8 value, 167 // UWORD8 num_bytes) 168 //**************Variables Vs Registers************************* 169 // x0 => *pu1_dst 170 // x1 => value 171 // x2 => num_bytes 172 173 174 175 .global ih264_memset_av8 176 177 ih264_memset_av8: 178 subs x2, x2, #8 179 blt arm_memset 180 dup v0.8b, w1 181 loop_neon_memset: 182 // Memcpy 8 bytes 183 st1 {v0.8b}, [x0], #8 184 185 subs x2, x2, #8 186 bge loop_neon_memset 187 cmn x2, #8 188 beq end_func2 189 190 arm_memset: 191 add x2, x2, #8 192 193 loop_arm_memset: 194 strb w1, [x0], #1 195 sxtw x1, w1 196 subs x2, x2, #1 197 bne loop_arm_memset 198 ret 199 end_func2: 200 ret 201 202 203 204 205 206 //void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, 207 // UWORD16 value, 208 // UWORD8 num_words) 209 //**************Variables Vs Registers************************* 210 // x0 => *pu2_dst 211 // x1 => value 212 // x2 => num_words 213 214 215 .global ih264_memset_16bit_mul_8_av8 216 217 ih264_memset_16bit_mul_8_av8: 218 219 // Assumptions: num_words is either 8, 16 or 32 220 221 // Memset 8 words 222 dup v0.4h, w1 223 loop_memset_16bit_mul_8: 224 st1 {v0.4h}, [x0], #8 225 st1 {v0.4h}, [x0], #8 226 227 subs x2, x2, #8 228 bne loop_memset_16bit_mul_8 229 230 ret 231 232 233 234 //void ih264_memset_16bit(UWORD16 *pu2_dst, 235 // UWORD16 value, 236 // UWORD8 num_words) 237 //**************Variables Vs Registers************************* 238 // x0 => *pu2_dst 239 // x1 => value 240 // x2 => num_words 241 242 243 244 .global ih264_memset_16bit_av8 245 246 ih264_memset_16bit_av8: 247 subs x2, x2, #8 248 blt arm_memset_16bit 249 dup v0.4h, w1 250 loop_neon_memset_16bit: 251 // Memset 8 words 252 st1 {v0.4h}, [x0], #8 253 st1 {v0.4h}, [x0], #8 254 255 subs x2, x2, #8 256 bge loop_neon_memset_16bit 257 cmn x2, #8 258 beq end_func3 259 260 arm_memset_16bit: 261 add x2, x2, #8 262 263 loop_arm_memset_16bit: 264 strh w1, [x0], #2 265 sxtw x1, w1 266 subs x2, x2, #1 267 bne loop_arm_memset_16bit 268 ret 269 270 end_func3: 271 ret 272 273 274 275