Home | History | Annotate | Download | only in arm
      1 @/*****************************************************************************
      2 @*
      3 @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 @*
      5 @* Licensed under the Apache License, Version 2.0 (the "License");
      6 @* you may not use this file except in compliance with the License.
      7 @* You may obtain a copy of the License at:
      8 @*
      9 @* http://www.apache.org/licenses/LICENSE-2.0
     10 @*
     11 @* Unless required by applicable law or agreed to in writing, software
     12 @* distributed under the License is distributed on an "AS IS" BASIS,
     13 @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @* See the License for the specific language governing permissions and
     15 @* limitations under the License.
     16 @*
     17 @*****************************************************************************/
     18 @/**
     19 @ *******************************************************************************
     20 @ * ,:file
     21 @ *  ihevc_mem_fns_neon.s
     22 @ *
     23 @ * ,:brief
     24 @ *  Contains function definitions for memory manipulation
     25 @ *
     26 @ * ,:author
     27 @ *  Naveen SR
     28 @ *
     29 @ * ,:par List of Functions:
     30 @ *  - ihevc_memcpy()
     31 @ *  - ihevc_memset_mul_8()
     32 @ *  - ihevc_memset_16bit_mul_8()
     33 @ *
     34 @ * ,:remarks
     35 @ *  None
     36 @ *
     37 @ *******************************************************************************
     38 @*/
     39 
     40 @/**
     41 @*******************************************************************************
     42 @*
     43 @* ,:brief
     44 @*   memcpy of a 1d array
     45 @*
     46 @* ,:par Description:
     47 @*   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
     48 @*
     49 @* ,:param[in] pu1_dst
     50 @*  UWORD8 pointer to the destination
     51 @*
     52 @* ,:param[in] pu1_src
     53 @*  UWORD8 pointer to the source
     54 @*
     55 @* ,:param[in] num_bytes
     56 @*  number of bytes to copy
     57 @* ,:returns
     58 @*
     59 @* ,:remarks
     60 @*  None
     61 @*
     62 @*******************************************************************************
     63 @*/
     64 @void ihevc_memcpy_mul_8(UWORD8 *pu1_dst,
     65 @                    UWORD8 *pu1_src,
     66 @                   UWORD8 num_bytes)
     67 @**************Variables Vs Registers*************************
     68 @   r0 => *pu1_dst
     69 @   r1 => *pu1_src
     70 @   r2 => num_bytes
     71 
     72 .text
     73 .p2align 2
     74 
     75 
     76 
     77 
     78     .global ihevc_memcpy_mul_8_a9q
     79 .type ihevc_memcpy_mul_8_a9q, %function
     80 
     81 ihevc_memcpy_mul_8_a9q:
     82 
     83 LOOP_NEON_MEMCPY_MUL_8:
     84     @ Memcpy 8 bytes
     85     VLD1.8      d0,[r1]!
     86     VST1.8      d0,[r0]!
     87 
     88     SUBS        r2,r2,#8
     89     BNE         LOOP_NEON_MEMCPY_MUL_8
     90     MOV         PC,LR
     91 
     92 
     93 
     94 @*******************************************************************************
     95 @*/
     96 @void ihevc_memcpy(UWORD8 *pu1_dst,
     97 @                  UWORD8 *pu1_src,
     98 @                  UWORD8 num_bytes)
     99 @**************Variables Vs Registers*************************
    100 @   r0 => *pu1_dst
    101 @   r1 => *pu1_src
    102 @   r2 => num_bytes
    103 
    104 
    105 
    106     .global ihevc_memcpy_a9q
    107 .type ihevc_memcpy_a9q, %function
    108 
    109 ihevc_memcpy_a9q:
    110     SUBS        r2,#8
    111     BLT         ARM_MEMCPY
    112 LOOP_NEON_MEMCPY:
    113     @ Memcpy 8 bytes
    114     VLD1.8      d0,[r1]!
    115     VST1.8      d0,[r0]!
    116 
    117     SUBS        r2,#8
    118     BGE         LOOP_NEON_MEMCPY
    119     CMP         r2,#-8
    120     BXEQ        LR
    121 
    122 ARM_MEMCPY:
    123     ADD         r2,#8
    124 
    125 LOOP_ARM_MEMCPY:
    126     LDRB        r3,[r1],#1
    127     STRB        r3,[r0],#1
    128     SUBS        r2,#1
    129     BNE         LOOP_ARM_MEMCPY
    130     BX          LR
    131 
    132 
    133 
    134 
    135 @void ihevc_memset_mul_8(UWORD8 *pu1_dst,
    136 @                       UWORD8 value,
    137 @                       UWORD8 num_bytes)
    138 @**************Variables Vs Registers*************************
    139 @   r0 => *pu1_dst
    140 @   r1 => value
    141 @   r2 => num_bytes
    142 
    143 .text
    144 .p2align 2
    145 
    146 
    147 
    148     .global ihevc_memset_mul_8_a9q
    149 .type ihevc_memset_mul_8_a9q, %function
    150 
    151 ihevc_memset_mul_8_a9q:
    152 
    153 @ Assumptions: numbytes is either 8, 16 or 32
    154     VDUP.8      d0,r1
    155 LOOP_MEMSET_MUL_8:
    156     @ Memset 8 bytes
    157     VST1.8      d0,[r0]!
    158 
    159     SUBS        r2,r2,#8
    160     BNE         LOOP_MEMSET_MUL_8
    161 
    162     BX          LR
    163 
    164 
    165 
    166 
    167 @void ihevc_memset(UWORD8 *pu1_dst,
    168 @                       UWORD8 value,
    169 @                       UWORD8 num_bytes)
    170 @**************Variables Vs Registers*************************
    171 @   r0 => *pu1_dst
    172 @   r1 => value
    173 @   r2 => num_bytes
    174 
    175 
    176 
    177     .global ihevc_memset_a9q
    178 .type ihevc_memset_a9q, %function
    179 
    180 ihevc_memset_a9q:
    181     SUBS        r2,#8
    182     BLT         ARM_MEMSET
    183     VDUP.8      d0,r1
    184 LOOP_NEON_MEMSET:
    185     @ Memcpy 8 bytes
    186     VST1.8      d0,[r0]!
    187 
    188     SUBS        r2,#8
    189     BGE         LOOP_NEON_MEMSET
    190     CMP         r2,#-8
    191     BXEQ        LR
    192 
    193 ARM_MEMSET:
    194     ADD         r2,#8
    195 
    196 LOOP_ARM_MEMSET:
    197     STRB        r1,[r0],#1
    198     SUBS        r2,#1
    199     BNE         LOOP_ARM_MEMSET
    200     BX          LR
    201 
    202 
    203 
    204 
    205 @void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst,
    206 @                                   UWORD16 value,
    207 @                                   UWORD8 num_words)
    208 @**************Variables Vs Registers*************************
    209 @   r0 => *pu2_dst
    210 @   r1 => value
    211 @   r2 => num_words
    212 
    213 .text
    214 .p2align 2
    215 
    216 
    217 
    218     .global ihevc_memset_16bit_mul_8_a9q
    219 .type ihevc_memset_16bit_mul_8_a9q, %function
    220 
    221 ihevc_memset_16bit_mul_8_a9q:
    222 
    223 @ Assumptions: num_words is either 8, 16 or 32
    224 
    225     @ Memset 8 words
    226     VDUP.16     d0,r1
    227 LOOP_MEMSET_16BIT_MUL_8:
    228     VST1.16     d0,[r0]!
    229     VST1.16     d0,[r0]!
    230 
    231     SUBS        r2,r2,#8
    232     BNE         LOOP_MEMSET_16BIT_MUL_8
    233 
    234     BX          LR
    235 
    236 
    237 
    238 
    239 @void ihevc_memset_16bit(UWORD16 *pu2_dst,
    240 @                       UWORD16 value,
    241 @                       UWORD8 num_words)
    242 @**************Variables Vs Registers*************************
    243 @   r0 => *pu2_dst
    244 @   r1 => value
    245 @   r2 => num_words
    246 
    247 
    248 
    249     .global ihevc_memset_16bit_a9q
    250 .type ihevc_memset_16bit_a9q, %function
    251 
    252 ihevc_memset_16bit_a9q:
    253     SUBS        r2,#8
    254     BLT         ARM_MEMSET_16BIT
    255     VDUP.16     d0,r1
    256 LOOP_NEON_MEMSET_16BIT:
    257     @ Memset 8 words
    258     VST1.16     d0,[r0]!
    259     VST1.16     d0,[r0]!
    260 
    261     SUBS        r2,#8
    262     BGE         LOOP_NEON_MEMSET_16BIT
    263     CMP         r2,#-8
    264     BXEQ        LR
    265 
    266 ARM_MEMSET_16BIT:
    267     ADD         r2,#8
    268 
    269 LOOP_ARM_MEMSET_16BIT:
    270     STRH        r1,[r0],#2
    271     SUBS        r2,#1
    272     BNE         LOOP_ARM_MEMSET_16BIT
    273     BX          LR
    274 
    275 
    276 
    277 
    278     .section .note.GNU-stack,"",%progbits
    279 
    280