Home | History | Annotate | Download | only in x86
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21  *******************************************************************************
     22  * @file
     23  *  ih264_mem_fns_atom_intr.c
     24  *
     25  * @brief
     26  *  Functions used for memory operations
     27  *
     28  * @author
     29  *  Ittiam
     30  *
     31  * @par List of Functions:
     32  *
     33  * @remarks
     34  *  None
     35  *
     36  *******************************************************************************
     37  */
     38 
     39 /*****************************************************************************/
     40 /* File Includes                                                             */
     41 /*****************************************************************************/
     42 #include <stdio.h>
     43 #include <stddef.h>
     44 #include <stdlib.h>
     45 #include <string.h>
     46 #include <assert.h>
     47 
     48 #include "ih264_typedefs.h"
     49 #include "ih264_mem_fns.h"
     50 
     51 #include <immintrin.h>
     52 
     53 /**
     54  *******************************************************************************
     55  *
     56  * @brief
     57  *   memcpy of a 8,16 or 32 bytes
     58  *
     59  * @par Description:
     60  *   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
     61  *
     62  * @param[in] pu1_dst
     63  *  UWORD8 pointer to the destination
     64  *
     65  * @param[in] pu1_src
     66  *  UWORD8 pointer to the source
     67  *
     68  * @param[in] num_bytes
     69  *  number of bytes to copy
     70  * @returns
     71  *
     72  * @remarks
     73  *  None
     74  *
     75  *******************************************************************************
     76  */
     77 
     78 
     79 
     80 
     81 void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes)
     82 {
     83     int col;
     84     for(col = num_bytes; col >= 8; col -= 8)
     85     {
     86         __m128i src_temp16x8b;
     87         src_temp16x8b = _mm_loadl_epi64((__m128i *)(pu1_src));
     88         pu1_src += 8;
     89         _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
     90         pu1_dst += 8;
     91     }
     92 }
     93 
     94 /**
     95  *******************************************************************************
     96  *
     97  * @brief
     98  *   memset of a 8,16 or 32 bytes
     99  *
    100  * @par Description:
    101  *   Does memset of 8bit data for 8,16 or 32 number of bytes
    102  *
    103  * @param[in] pu1_dst
    104  *  UWORD8 pointer to the destination
    105  *
    106  * @param[in] value
    107  *  UWORD8 value used for memset
    108  *
    109  * @param[in] num_bytes
    110  *  number of bytes to set
    111  * @returns
    112  *
    113  * @remarks
    114  *  None
    115  *
    116  *******************************************************************************
    117  */
    118 
    119 
    120 void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes)
    121 {
    122     int col;
    123     __m128i src_temp16x8b;
    124     src_temp16x8b = _mm_set1_epi8(value);
    125     for(col = num_bytes; col >= 8; col -= 8)
    126     {
    127         _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b);
    128         pu1_dst += 8;
    129     }
    130 }
    131 
    132 /**
    133  *******************************************************************************
    134  *
    135  * @brief
    136  *   memset of 16bit data of a 8,16 or 32 bytes
    137  *
    138  * @par Description:
    139  *   Does memset of 16bit data for 8,16 or 32 number of bytes
    140  *
    141  * @param[in] pu2_dst
    142  *  UWORD8 pointer to the destination
    143  *
    144  * @param[in] value
    145  *  UWORD16 value used for memset
    146  *
    147  * @param[in] num_words
    148  *  number of words to set
    149  * @returns
    150  *
    151  * @remarks
    152  *  None
    153  *
    154  *******************************************************************************
    155  */
    156 
    157 
    158 void ih264_memset_16bit_mul_8_ssse3(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words)
    159 {
    160     int col;
    161     __m128i src_temp16x8b;
    162     src_temp16x8b = _mm_set1_epi16(value);
    163     for(col = num_words; col >= 8; col -= 8)
    164     {
    165         _mm_storeu_si128((__m128i *)(pu2_dst), src_temp16x8b);
    166         pu2_dst += 8;
    167     }
    168 }
    169 
    170