1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_mem_fns_atom_intr.c 22 * 23 * @brief 24 * Functions used for memory operations 25 * 26 * @author 27 * Ittiam 28 * 29 * @par List of Functions: 30 * 31 * @remarks 32 * None 33 * 34 ******************************************************************************* 35 */ 36 37 /*****************************************************************************/ 38 /* File Includes */ 39 /*****************************************************************************/ 40 #include <stdio.h> 41 #include <stddef.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <assert.h> 45 46 #include "ihevc_typedefs.h" 47 #include "ihevc_func_selector.h" 48 #include "ihevc_mem_fns.h" 49 50 #include <immintrin.h> 51 52 /** 53 ******************************************************************************* 54 * 55 * @brief 56 * memcpy of a 8,16 or 32 bytes 57 * 58 * @par Description: 59 * Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 60 * 61 * @param[in] pu1_dst 62 * UWORD8 pointer to the destination 63 * 64 * @param[in] pu1_src 65 * UWORD8 pointer to the source 66 * 67 * @param[in] num_bytes 68 * number of bytes to copy 69 * @returns 70 * 71 * @remarks 72 * None 73 * 74 ******************************************************************************* 75 */ 76 77 78 79 80 void ihevc_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes) 81 { 82 int col; 83 for(col = num_bytes; col >= 8; col -= 8) 84 { 85 __m128i src_temp16x8b; 86 src_temp16x8b = _mm_loadl_epi64((__m128i *)(pu1_src)); 87 pu1_src += 8; 88 _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b); 89 pu1_dst += 8; 90 } 91 } 92 93 /** 94 ******************************************************************************* 95 * 96 * @brief 97 * memset of a 8,16 or 32 bytes 98 * 99 * @par Description: 100 * Does memset of 8bit data for 8,16 or 32 number of bytes 101 * 102 * @param[in] pu1_dst 103 * UWORD8 pointer to the destination 104 * 105 * @param[in] value 106 * UWORD8 value used for memset 107 * 108 * @param[in] num_bytes 109 * number of bytes to set 110 * @returns 111 * 112 * @remarks 113 * None 114 * 115 ******************************************************************************* 116 */ 117 118 119 void ihevc_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes) 120 { 121 int col; 122 __m128i src_temp16x8b; 123 src_temp16x8b = _mm_set1_epi8(value); 124 for(col = num_bytes; col >= 8; col -= 8) 125 { 126 _mm_storel_epi64((__m128i *)(pu1_dst), src_temp16x8b); 127 pu1_dst += 8; 128 } 129 } 130 131 /** 132 ******************************************************************************* 133 * 134 * @brief 135 * memset of 16bit data of a 8,16 or 32 bytes 136 * 137 * @par Description: 138 * Does memset of 16bit data for 8,16 or 32 number of bytes 139 * 140 * @param[in] pu2_dst 141 * UWORD8 pointer to the destination 142 * 143 * @param[in] value 144 * UWORD16 value used for memset 145 * 146 * @param[in] num_words 147 * number of words to set 148 * @returns 149 * 150 * @remarks 151 * None 152 * 153 ******************************************************************************* 154 */ 155 156 157 void ihevc_memset_16bit_mul_8_ssse3(UWORD16 *pu2_dst, UWORD16 value, UWORD32 num_words) 158 { 159 int col; 160 __m128i src_temp16x8b; 161 src_temp16x8b = _mm_set1_epi16(value); 162 for(col = num_words; col >= 8; col -= 8) 163 { 164 _mm_storeu_si128((__m128i *)(pu2_dst), src_temp16x8b); 165 pu2_dst += 8; 166 } 167 } 168 169