1 //****************************************************************************** 2 //* 3 //* Copyright (C) 2015 The Android Open Source Project 4 //* 5 //* Licensed under the Apache License, Version 2.0 (the "License"); 6 //* you may not use this file except in compliance with the License. 7 //* You may obtain a copy of the License at: 8 //* 9 //* http://www.apache.org/licenses/LICENSE-2.0 10 //* 11 //* Unless required by applicable law or agreed to in writing, software 12 //* distributed under the License is distributed on an "AS IS" BASIS, 13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 //* See the License for the specific language governing permissions and 15 //* limitations under the License. 16 //* 17 //***************************************************************************** 18 //* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 //*/ 20 ///** 21 // ******************************************************************************* 22 // * @file 23 // * ih264_mem_fns_neon.s 24 // * 25 // * @brief 26 // * Contains function definitions for memory manipulation 27 // * 28 // * @author 29 // * Naveen SR 30 // * 31 // * @par List of Functions: 32 // * - ih264_memcpy_av8() 33 // * - ih264_memcpy_mul_8_av8() 34 // * - ih264_memset_mul_8_av8() 35 // * - ih264_memset_16bit_mul_8_av8() 36 // * - ih264_memset_16bit_av8() 37 // * 38 // * @remarks 39 // * None 40 // * 41 // ******************************************************************************* 42 //*/ 43 44 .text 45 .p2align 2 46 .include "ih264_neon_macros.s" 47 ///** 48 //******************************************************************************* 49 //* 50 //* @brief 51 //* memcpy of a 1d array 52 //* 53 //* @par Description: 54 //* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 55 //* 56 //* @param[in] pu1_dst 57 //* UWORD8 pointer to the destination 58 //* 59 //* @param[in] pu1_src 60 //* UWORD8 pointer to the source 61 //* 62 //* @param[in] num_bytes 63 //* number of bytes to copy 64 //* @returns 65 //* 66 //* @remarks 67 //* None 68 //* 69 //******************************************************************************* 70 //*/ 71 //void ih264_memcpy_mul_8(UWORD8 *pu1_dst, 72 // UWORD8 *pu1_src, 73 // UWORD32 num_bytes) 74 //**************Variables Vs Registers************************* 75 // x0 => *pu1_dst 76 // x1 => *pu1_src 77 // w2 => num_bytes 78 79 80 81 82 83 .global ih264_memcpy_mul_8_av8 84 85 ih264_memcpy_mul_8_av8: 86 87 loop_neon_memcpy_mul_8: 88 // Memcpy 8 bytes 89 ld1 {v0.8b}, [x1], #8 90 st1 {v0.8b}, [x0], #8 91 92 subs w2, w2, #8 93 bne loop_neon_memcpy_mul_8 94 ret 95 96 97 98 //******************************************************************************* 99 //*/ 100 //void ih264_memcpy(UWORD8 *pu1_dst, 101 // UWORD8 *pu1_src, 102 // UWORD32 num_bytes) 103 //**************Variables Vs Registers************************* 104 // x0 => *pu1_dst 105 // x1 => *pu1_src 106 // w2 => num_bytes 107 108 109 110 .global ih264_memcpy_av8 111 112 ih264_memcpy_av8: 113 subs w2, w2, #8 114 blt arm_memcpy 115 loop_neon_memcpy: 116 // Memcpy 8 bytes 117 ld1 {v0.8b}, [x1], #8 118 st1 {v0.8b}, [x0], #8 119 120 subs w2, w2, #8 121 bge loop_neon_memcpy 122 cmn w2, #8 123 beq end_func1 124 125 arm_memcpy: 126 add w2, w2, #8 127 128 loop_arm_memcpy: 129 ldrb w3, [x1], #1 130 strb w3, [x0], #1 131 subs w2, w2, #1 132 bne loop_arm_memcpy 133 ret 134 end_func1: 135 ret 136 137 138 //void ih264_memset_mul_8(UWORD8 *pu1_dst, 139 // UWORD8 value, 140 // UWORD32 num_bytes) 141 //**************Variables Vs Registers************************* 142 // x0 => *pu1_dst 143 // x1 => value 144 // x2 => num_bytes 145 146 147 .global ih264_memset_mul_8_av8 148 149 ih264_memset_mul_8_av8: 150 151 // Assumptions: numbytes is either 8, 16 or 32 152 dup v0.8b, w1 153 loop_memset_mul_8: 154 // Memset 8 bytes 155 st1 {v0.8b}, [x0], #8 156 157 subs w2, w2, #8 158 bne loop_memset_mul_8 159 160 ret 161 162 163 //void ih264_memset(UWORD8 *pu1_dst, 164 // UWORD8 value, 165 // UWORD32 num_bytes) 166 //**************Variables Vs Registers************************* 167 // x0 => *pu1_dst 168 // w1 => value 169 // w2 => num_bytes 170 171 172 173 .global ih264_memset_av8 174 175 ih264_memset_av8: 176 subs w2, w2, #8 177 blt arm_memset 178 dup v0.8b, w1 179 loop_neon_memset: 180 // Memcpy 8 bytes 181 st1 {v0.8b}, [x0], #8 182 183 subs w2, w2, #8 184 bge loop_neon_memset 185 cmn w2, #8 186 beq end_func2 187 188 arm_memset: 189 add w2, w2, #8 190 191 loop_arm_memset: 192 strb w1, [x0], #1 193 subs w2, w2, #1 194 bne loop_arm_memset 195 ret 196 end_func2: 197 ret 198 199 200 201 202 203 //void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, 204 // UWORD16 value, 205 // UWORD32 num_words) 206 //**************Variables Vs Registers************************* 207 // x0 => *pu2_dst 208 // w1 => value 209 // w2 => num_words 210 211 212 .global ih264_memset_16bit_mul_8_av8 213 214 ih264_memset_16bit_mul_8_av8: 215 216 // Assumptions: num_words is either 8, 16 or 32 217 218 // Memset 8 words 219 dup v0.4h, w1 220 loop_memset_16bit_mul_8: 221 st1 {v0.4h}, [x0], #8 222 st1 {v0.4h}, [x0], #8 223 224 subs w2, w2, #8 225 bne loop_memset_16bit_mul_8 226 227 ret 228 229 230 231 //void ih264_memset_16bit(UWORD16 *pu2_dst, 232 // UWORD16 value, 233 // UWORD32 num_words) 234 //**************Variables Vs Registers************************* 235 // x0 => *pu2_dst 236 // w1 => value 237 // w2 => num_words 238 239 240 241 .global ih264_memset_16bit_av8 242 243 ih264_memset_16bit_av8: 244 subs w2, w2, #8 245 blt arm_memset_16bit 246 dup v0.4h, w1 247 loop_neon_memset_16bit: 248 // Memset 8 words 249 st1 {v0.4h}, [x0], #8 250 st1 {v0.4h}, [x0], #8 251 252 subs w2, w2, #8 253 bge loop_neon_memset_16bit 254 cmn w2, #8 255 beq end_func3 256 257 arm_memset_16bit: 258 add w2, w2, #8 259 260 loop_arm_memset_16bit: 261 strh w1, [x0], #2 262 subs w2, w2, #1 263 bne loop_arm_memset_16bit 264 ret 265 266 end_func3: 267 ret 268 269 270 271