Home | History | Annotate | Download | only in arm64
      1 ///*****************************************************************************
      2 //*
      3 //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 //*
      5 //* Licensed under the Apache License, Version 2.0 (the "License");
      6 //* you may not use this file except in compliance with the License.
      7 //* You may obtain a copy of the License at:
      8 //*
      9 //* http://www.apache.org/licenses/LICENSE-2.0
     10 //*
     11 //* Unless required by applicable law or agreed to in writing, software
     12 //* distributed under the License is distributed on an "AS IS" BASIS,
     13 //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 //* See the License for the specific language governing permissions and
     15 //* limitations under the License.
     16 //*
     17 //*****************************************************************************/
     18 ///**
     19 // *******************************************************************************
     20 // * ,:file
     21 // *  ihevc_mem_fns_neon.s
     22 // *
     23 // * ,:brief
     24 // *  Contains function definitions for memory manipulation
     25 // *
     26 // * ,:author
     27 // *     Naveen SR
     28 // *
     29 // * ,:par List of Functions:
     30 // *  - ihevc_memcpy()
     31 // *  - ihevc_memset_mul_8()
     32 // *  - ihevc_memset_16bit_mul_8()
     33 // *
     34 // * ,:remarks
     35 // *  None
     36 // *
     37 // *******************************************************************************
     38 //*/
     39 
     40 ///**
     41 //*******************************************************************************
     42 //*
     43 //* ,:brief
     44 //*   memcpy of a 1d array
     45 //*
     46 //* ,:par Description:
     47 //*   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
     48 //*
     49 //* ,:param[in] pu1_dst
     50 //*  UWORD8 pointer to the destination
     51 //*
     52 //* ,:param[in] pu1_src
     53 //*  UWORD8 pointer to the source
     54 //*
     55 //* ,:param[in] num_bytes
     56 //*  number of bytes to copy
     57 //* ,:returns
     58 //*
     59 //* ,:remarks
     60 //*  None
     61 //*
     62 //*******************************************************************************
     63 //*/
     64 //void ihevc_memcpy_mul_8(UWORD8 *pu1_dst,
     65 //                      UWORD8 *pu1_src,
     66 //                      UWORD8 num_bytes)
     67 //**************Variables Vs Registers*************************
     68 //    x0 => *pu1_dst
     69 //    x1 => *pu1_src
     70 //    x2 => num_bytes
     71 
     72 .text
     73 .p2align 2
     74 
     75 
     76     .global ihevc_memcpy_mul_8_av8
     77 .type ihevc_memcpy_mul_8_av8, %function
     78 
     79 ihevc_memcpy_mul_8_av8:
     80 
     81 LOOP_NEON_MEMCPY_MUL_8:
     82     // Memcpy 8 bytes
     83     LD1         {v0.8b},[x1],#8
     84     ST1         {v0.8b},[x0],#8
     85 
     86     SUBS        x2,x2,#8
     87     BNE         LOOP_NEON_MEMCPY_MUL_8
     88     ret
     89 
     90 
     91 
     92 //*******************************************************************************
     93 //*/
     94 //void ihevc_memcpy(UWORD8 *pu1_dst,
     95 //                  UWORD8 *pu1_src,
     96 //                  UWORD8 num_bytes)
     97 //**************Variables Vs Registers*************************
     98 //    x0 => *pu1_dst
     99 //    x1 => *pu1_src
    100 //    x2 => num_bytes
    101 
    102 
    103 
    104     .global ihevc_memcpy_av8
    105 .type ihevc_memcpy_av8, %function
    106 
    107 ihevc_memcpy_av8:
    108     SUBS        x2,x2,#8
    109     BLT         ARM_MEMCPY
    110 LOOP_NEON_MEMCPY:
    111     // Memcpy 8 bytes
    112     LD1         {v0.8b},[x1],#8
    113     ST1         {v0.8b},[x0],#8
    114 
    115     SUBS        x2,x2,#8
    116     BGE         LOOP_NEON_MEMCPY
    117     CMN         x2,#8
    118     BEQ         MEMCPY_RETURN
    119 
    120 ARM_MEMCPY:
    121     ADD         x2,x2,#8
    122 
    123 LOOP_ARM_MEMCPY:
    124     LDRB        w3,[x1],#1
    125     STRB        w3,[x0],#1
    126     SUBS        x2,x2,#1
    127     BNE         LOOP_ARM_MEMCPY
    128 MEMCPY_RETURN:
    129     ret
    130 
    131 
    132 
    133 
    134 //void ihevc_memset_mul_8(UWORD8 *pu1_dst,
    135 //                       UWORD8 value,
    136 //                       UWORD8 num_bytes)
    137 //**************Variables Vs Registers*************************
    138 //    x0 => *pu1_dst
    139 //    x1 => value
    140 //    x2 => num_bytes
    141 
    142 .text
    143 .p2align 2
    144 
    145 
    146 
    147     .global ihevc_memset_mul_8_av8
    148 .type ihevc_memset_mul_8_av8, %function
    149 
    150 ihevc_memset_mul_8_av8:
    151 
    152 // Assumptions: numbytes is either 8, 16 or 32
    153     dup         v0.8b,w1
    154 LOOP_MEMSET_MUL_8:
    155     // Memset 8 bytes
    156     ST1         {v0.8b},[x0],#8
    157 
    158     SUBS        x2,x2,#8
    159     BNE         LOOP_MEMSET_MUL_8
    160 
    161     ret
    162 
    163 
    164 
    165 
    166 //void ihevc_memset(UWORD8 *pu1_dst,
    167 //                       UWORD8 value,
    168 //                       UWORD8 num_bytes)
    169 //**************Variables Vs Registers*************************
    170 //    x0 => *pu1_dst
    171 //    x1 => value
    172 //    x2 => num_bytes
    173 
    174 
    175 
    176     .global ihevc_memset_av8
    177 .type ihevc_memset_av8, %function
    178 
    179 ihevc_memset_av8:
    180     SUBS        x2,x2,#8
    181     BLT         ARM_MEMSET
    182     dup         v0.8b,w1
    183 LOOP_NEON_MEMSET:
    184     // Memcpy 8 bytes
    185     ST1         {v0.8b},[x0],#8
    186 
    187     SUBS        x2,x2,#8
    188     BGE         LOOP_NEON_MEMSET
    189     CMN         x2,#8
    190     BEQ         MEMSET_RETURN
    191 
    192 ARM_MEMSET:
    193     ADD         x2,x2,#8
    194 
    195 LOOP_ARM_MEMSET:
    196     STRB        w1,[x0],#1
    197     SUBS        x2,x2,#1
    198     BNE         LOOP_ARM_MEMSET
    199 
    200 MEMSET_RETURN:
    201     ret
    202 
    203 
    204 
    205 
    206 //void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst,
    207 //                                      UWORD16 value,
    208 //                                      UWORD8 num_words)
    209 //**************Variables Vs Registers*************************
    210 //    x0 => *pu2_dst
    211 //    x1 => value
    212 //    x2 => num_words
    213 
    214 .text
    215 .p2align 2
    216 
    217 
    218 
    219     .global ihevc_memset_16bit_mul_8_av8
    220 .type ihevc_memset_16bit_mul_8_av8, %function
    221 
    222 ihevc_memset_16bit_mul_8_av8:
    223 
    224 // Assumptions: num_words is either 8, 16 or 32
    225 
    226     // Memset 8 words
    227     dup         v0.8h,w1
    228 LOOP_MEMSET_16BIT_MUL_8:
    229     ST1         {v0.8h},[x0],#16
    230 
    231     SUBS        x2,x2,#8
    232     BNE         LOOP_MEMSET_16BIT_MUL_8
    233 
    234     ret
    235 
    236 
    237 
    238 
    239 //void ihevc_memset_16bit(UWORD16 *pu2_dst,
    240 //                       UWORD16 value,
    241 //                       UWORD8 num_words)
    242 //**************Variables Vs Registers*************************
    243 //    x0 => *pu2_dst
    244 //    x1 => value
    245 //    x2 => num_words
    246 
    247 
    248 
    249     .global ihevc_memset_16bit_av8
    250 .type ihevc_memset_16bit_av8, %function
    251 
    252 ihevc_memset_16bit_av8:
    253     SUBS        x2,x2,#8
    254     BLT         ARM_MEMSET_16BIT
    255     dup         v0.8h,w1
    256 LOOP_NEON_MEMSET_16BIT:
    257     // Memset 8 words
    258     ST1         {v0.8h},[x0],#16
    259 
    260     SUBS        x2,x2,#8
    261     BGE         LOOP_NEON_MEMSET_16BIT
    262     CMN         x2,#8
    263     BEQ         MEMSET_16BIT_RETURN
    264 
    265 ARM_MEMSET_16BIT:
    266     ADD         x2,x2,#8
    267 
    268 LOOP_ARM_MEMSET_16BIT:
    269     STRH        w1,[x0],#2
    270     SUBS        x2,x2,#1
    271     BNE         LOOP_ARM_MEMSET_16BIT
    272 
    273 MEMSET_16BIT_RETURN:
    274     ret
    275 
    276 
    277 
    278 
    279     .section .note.GNU-stack,"",%progbits
    280 
    281