Home | History | Annotate | Download | only in arm
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2015 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http://www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 @**
     21 @ *******************************************************************************
     22 @ * @file
     23 @ *  ih264_mem_fns_neon.s
     24 @ *
     25 @ * @brief
     26 @ *  Contains function definitions for memory manipulation
     27 @ *
     28 @ * @author
     29 @ *  Naveen SR
     30 @ *
     31 @ * @par List of Functions:
     32 @ *  - ih264_memcpy_mul_8_a9q()
     33 @ *  - ih264_memcpy_a9q()
     34 @ *  - ih264_memset_mul_8_a9q()
     35 @ *  - ih264_memset_a9q()
     36 @ *  - ih264_memset_16bit_mul_8_a9q()
     37 @ *  - ih264_memset_a9q()
     38 @ *
     39 @ * @remarks
     40 @ *  None
     41 @ *
     42 @ *******************************************************************************
     43 @*
     44 
     45 @**
     46 @*******************************************************************************
     47 @*
     48 @* @brief
     49 @*   memcpy of a 1d array
     50 @*
     51 @* @par Description:
     52 @*   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
     53 @*
     54 @* @param[in] pu1_dst
     55 @*  UWORD8 pointer to the destination
     56 @*
     57 @* @param[in] pu1_src
     58 @*  UWORD8 pointer to the source
     59 @*
     60 @* @param[in] num_bytes
     61 @*  number of bytes to copy
     62 @* @returns
     63 @*
     64 @* @remarks
     65 @*  None
     66 @*
     67 @*******************************************************************************
     68 @*
     69 @void ih264_memcpy_mul_8(UWORD8 *pu1_dst,
     70 @                    UWORD8 *pu1_src,
     71 @                   UWORD32 num_bytes)
     72 @**************Variables Vs Registers*************************
     73 @   r0 => *pu1_dst
     74 @   r1 => *pu1_src
     75 @   r2 => num_bytes
     76 
     77 .text
     78 .p2align 2
     79 
     80 
     81     .global ih264_memcpy_mul_8_a9q
     82 
     83 ih264_memcpy_mul_8_a9q:
     84 
     85 loop_neon_memcpy_mul_8:
     86     @ Memcpy 8 bytes
     87     vld1.8        d0, [r1]!
     88     vst1.8        d0, [r0]!
     89 
     90     subs          r2, r2, #8
     91     bne           loop_neon_memcpy_mul_8
     92     bx            lr
     93 
     94 
     95 
     96 @*******************************************************************************
     97 @*
     98 @void ih264_memcpy(UWORD8 *pu1_dst,
     99 @                  UWORD8 *pu1_src,
    100 @                  UWORD32 num_bytes)
    101 @**************Variables Vs Registers*************************
    102 @   r0 => *pu1_dst
    103 @   r1 => *pu1_src
    104 @   r2 => num_bytes
    105 
    106 
    107 
    108     .global ih264_memcpy_a9q
    109 
    110 ih264_memcpy_a9q:
    111     subs          r2, #8
    112     blt           memcpy
    113 loop_neon_memcpy:
    114     @ Memcpy 8 bytes
    115     vld1.8        d0, [r1]!
    116     vst1.8        d0, [r0]!
    117 
    118     subs          r2, #8
    119     bge           loop_neon_memcpy
    120     cmp           r2, #-8
    121     bxeq          lr
    122 
    123 memcpy:
    124     add           r2, #8
    125 
    126 loop_memcpy:
    127     ldrb          r3, [r1], #1
    128     strb          r3, [r0], #1
    129     subs          r2, #1
    130     bne           loop_memcpy
    131     bx            lr
    132 
    133 
    134 
    135 
    136 @void ih264_memset_mul_8(UWORD8 *pu1_dst,
    137 @                       UWORD8 value,
    138 @                       UWORD32 num_bytes)
    139 @**************Variables Vs Registers*************************
    140 @   r0 => *pu1_dst
    141 @   r1 => value
    142 @   r2 => num_bytes
    143 
    144 
    145 
    146 
    147 
    148     .global ih264_memset_mul_8_a9q
    149 
    150 ih264_memset_mul_8_a9q:
    151 
    152 @ Assumptions: numbytes is either 8, 16 or 32
    153     vdup.8        d0, r1
    154 loop_memset_mul_8:
    155     @ Memset 8 bytes
    156     vst1.8        d0, [r0]!
    157 
    158     subs          r2, r2, #8
    159     bne           loop_memset_mul_8
    160 
    161     bx            lr
    162 
    163 
    164 
    165 
    166 @void ih264_memset(UWORD8 *pu1_dst,
    167 @                       UWORD8 value,
    168 @                       UWORD8 num_bytes)
    169 @**************Variables Vs Registers*************************
    170 @   r0 => *pu1_dst
    171 @   r1 => value
    172 @   r2 => num_bytes
    173 
    174 
    175 
    176     .global ih264_memset_a9q
    177 
    178 ih264_memset_a9q:
    179     subs          r2, #8
    180     blt           memset
    181     vdup.8        d0, r1
    182 loop_neon_memset:
    183     @ Memcpy 8 bytes
    184     vst1.8        d0, [r0]!
    185 
    186     subs          r2, #8
    187     bge           loop_neon_memset
    188     cmp           r2, #-8
    189     bxeq          lr
    190 
    191 memset:
    192     add           r2, #8
    193 
    194 loop_memset:
    195     strb          r1, [r0], #1
    196     subs          r2, #1
    197     bne           loop_memset
    198     bx            lr
    199 
    200 
    201 
    202 
    203 @void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst,
    204 @                                   UWORD16 value,
    205 @                                   UWORD32 num_words)
    206 @**************Variables Vs Registers*************************
    207 @   r0 => *pu2_dst
    208 @   r1 => value
    209 @   r2 => num_words
    210 
    211 
    212 
    213 
    214 
    215     .global ih264_memset_16bit_mul_8_a9q
    216 
    217 ih264_memset_16bit_mul_8_a9q:
    218 
    219 @ Assumptions: num_words is either 8, 16 or 32
    220 
    221     @ Memset 8 words
    222     vdup.16       d0, r1
    223 loop_memset_16bit_mul_8:
    224     vst1.16       d0, [r0]!
    225     vst1.16       d0, [r0]!
    226 
    227     subs          r2, r2, #8
    228     bne           loop_memset_16bit_mul_8
    229 
    230     bx            lr
    231 
    232 
    233 
    234 
    235 @void ih264_memset_16bit(UWORD16 *pu2_dst,
    236 @                       UWORD16 value,
    237 @                       UWORD32 num_words)
    238 @**************Variables Vs Registers*************************
    239 @   r0 => *pu2_dst
    240 @   r1 => value
    241 @   r2 => num_words
    242 
    243 
    244 
    245     .global ih264_memset_16bit_a9q
    246 
    247 ih264_memset_16bit_a9q:
    248     subs          r2, #8
    249     blt           memset_16bit
    250     vdup.16       d0, r1
    251 loop_neon_memset_16bit:
    252     @ Memset 8 words
    253     vst1.16       d0, [r0]!
    254     vst1.16       d0, [r0]!
    255 
    256     subs          r2, #8
    257     bge           loop_neon_memset_16bit
    258     cmp           r2, #-8
    259     bxeq          lr
    260 
    261 memset_16bit:
    262     add           r2, #8
    263 
    264 loop_memset_16bit:
    265     strh          r1, [r0], #2
    266     subs          r2, #1
    267     bne           loop_memset_16bit
    268     bx            lr
    269 
    270 
    271 
    272 
    273