1 @/****************************************************************************** 2 @ * 3 @ * Copyright (C) 2015 The Android Open Source Project 4 @ * 5 @ * Licensed under the Apache License, Version 2.0 (the "License"); 6 @ * you may not use this file except in compliance with the License. 7 @ * You may obtain a copy of the License at: 8 @ * 9 @ * http://www.apache.org/licenses/LICENSE-2.0 10 @ * 11 @ * Unless required by applicable law or agreed to in writing, software 12 @ * distributed under the License is distributed on an "AS IS" BASIS, 13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 @ * See the License for the specific language governing permissions and 15 @ * limitations under the License. 16 @ * 17 @ ***************************************************************************** 18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 @*/ 20 @** 21 @ ******************************************************************************* 22 @ * @file 23 @ * ih264_mem_fns_neon.s 24 @ * 25 @ * @brief 26 @ * Contains function definitions for memory manipulation 27 @ * 28 @ * @author 29 @ * Naveen SR 30 @ * 31 @ * @par List of Functions: 32 @ * - ih264_memcpy_mul_8_a9q() 33 @ * - ih264_memcpy_a9q() 34 @ * - ih264_memset_mul_8_a9q() 35 @ * - ih264_memset_a9q() 36 @ * - ih264_memset_16bit_mul_8_a9q() 37 @ * - ih264_memset_a9q() 38 @ * 39 @ * @remarks 40 @ * None 41 @ * 42 @ ******************************************************************************* 43 @* 44 45 @** 46 @******************************************************************************* 47 @* 48 @* @brief 49 @* memcpy of a 1d array 50 @* 51 @* @par Description: 52 @* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 53 @* 54 @* @param[in] pu1_dst 55 @* UWORD8 pointer to the destination 56 @* 57 @* @param[in] pu1_src 58 @* UWORD8 pointer to the source 59 @* 60 @* @param[in] num_bytes 61 @* number of bytes to copy 62 @* @returns 63 @* 64 @* @remarks 65 @* None 66 @* 67 @******************************************************************************* 68 @* 69 @void ih264_memcpy_mul_8(UWORD8 *pu1_dst, 70 @ UWORD8 *pu1_src, 71 @ UWORD32 num_bytes) 72 @**************Variables Vs Registers************************* 73 @ r0 => *pu1_dst 74 @ r1 => *pu1_src 75 @ r2 => num_bytes 76 77 .text 78 .p2align 2 79 80 81 .global ih264_memcpy_mul_8_a9q 82 83 ih264_memcpy_mul_8_a9q: 84 85 loop_neon_memcpy_mul_8: 86 @ Memcpy 8 bytes 87 vld1.8 d0, [r1]! 88 vst1.8 d0, [r0]! 89 90 subs r2, r2, #8 91 bne loop_neon_memcpy_mul_8 92 bx lr 93 94 95 96 @******************************************************************************* 97 @* 98 @void ih264_memcpy(UWORD8 *pu1_dst, 99 @ UWORD8 *pu1_src, 100 @ UWORD32 num_bytes) 101 @**************Variables Vs Registers************************* 102 @ r0 => *pu1_dst 103 @ r1 => *pu1_src 104 @ r2 => num_bytes 105 106 107 108 .global ih264_memcpy_a9q 109 110 ih264_memcpy_a9q: 111 subs r2, #8 112 blt memcpy 113 loop_neon_memcpy: 114 @ Memcpy 8 bytes 115 vld1.8 d0, [r1]! 116 vst1.8 d0, [r0]! 117 118 subs r2, #8 119 bge loop_neon_memcpy 120 cmp r2, #-8 121 bxeq lr 122 123 memcpy: 124 add r2, #8 125 126 loop_memcpy: 127 ldrb r3, [r1], #1 128 strb r3, [r0], #1 129 subs r2, #1 130 bne loop_memcpy 131 bx lr 132 133 134 135 136 @void ih264_memset_mul_8(UWORD8 *pu1_dst, 137 @ UWORD8 value, 138 @ UWORD32 num_bytes) 139 @**************Variables Vs Registers************************* 140 @ r0 => *pu1_dst 141 @ r1 => value 142 @ r2 => num_bytes 143 144 145 146 147 148 .global ih264_memset_mul_8_a9q 149 150 ih264_memset_mul_8_a9q: 151 152 @ Assumptions: numbytes is either 8, 16 or 32 153 vdup.8 d0, r1 154 loop_memset_mul_8: 155 @ Memset 8 bytes 156 vst1.8 d0, [r0]! 157 158 subs r2, r2, #8 159 bne loop_memset_mul_8 160 161 bx lr 162 163 164 165 166 @void ih264_memset(UWORD8 *pu1_dst, 167 @ UWORD8 value, 168 @ UWORD8 num_bytes) 169 @**************Variables Vs Registers************************* 170 @ r0 => *pu1_dst 171 @ r1 => value 172 @ r2 => num_bytes 173 174 175 176 .global ih264_memset_a9q 177 178 ih264_memset_a9q: 179 subs r2, #8 180 blt memset 181 vdup.8 d0, r1 182 loop_neon_memset: 183 @ Memcpy 8 bytes 184 vst1.8 d0, [r0]! 185 186 subs r2, #8 187 bge loop_neon_memset 188 cmp r2, #-8 189 bxeq lr 190 191 memset: 192 add r2, #8 193 194 loop_memset: 195 strb r1, [r0], #1 196 subs r2, #1 197 bne loop_memset 198 bx lr 199 200 201 202 203 @void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, 204 @ UWORD16 value, 205 @ UWORD32 num_words) 206 @**************Variables Vs Registers************************* 207 @ r0 => *pu2_dst 208 @ r1 => value 209 @ r2 => num_words 210 211 212 213 214 215 .global ih264_memset_16bit_mul_8_a9q 216 217 ih264_memset_16bit_mul_8_a9q: 218 219 @ Assumptions: num_words is either 8, 16 or 32 220 221 @ Memset 8 words 222 vdup.16 d0, r1 223 loop_memset_16bit_mul_8: 224 vst1.16 d0, [r0]! 225 vst1.16 d0, [r0]! 226 227 subs r2, r2, #8 228 bne loop_memset_16bit_mul_8 229 230 bx lr 231 232 233 234 235 @void ih264_memset_16bit(UWORD16 *pu2_dst, 236 @ UWORD16 value, 237 @ UWORD32 num_words) 238 @**************Variables Vs Registers************************* 239 @ r0 => *pu2_dst 240 @ r1 => value 241 @ r2 => num_words 242 243 244 245 .global ih264_memset_16bit_a9q 246 247 ih264_memset_16bit_a9q: 248 subs r2, #8 249 blt memset_16bit 250 vdup.16 d0, r1 251 loop_neon_memset_16bit: 252 @ Memset 8 words 253 vst1.16 d0, [r0]! 254 vst1.16 d0, [r0]! 255 256 subs r2, #8 257 bge loop_neon_memset_16bit 258 cmp r2, #-8 259 bxeq lr 260 261 memset_16bit: 262 add r2, #8 263 264 loop_memset_16bit: 265 strh r1, [r0], #2 266 subs r2, #1 267 bne loop_memset_16bit 268 bx lr 269 270 271 272 273