1 /****************************************************************************** 2 * 3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ******************************************************************************/ 18 /** 19 ******************************************************************************* 20 * @file 21 * ihevc_padding_atom_intr.c 22 * 23 * @brief 24 * Contains function definitions for Padding 25 * 26 * @author 27 * Srinivas T 28 * 29 * @par List of Functions: 30 * - ihevc_pad_left_luma_ssse3() 31 * - ihevc_pad_left_chroma_ssse3() 32 * - ihevc_pad_right_luma_ssse3() 33 * - ihevc_pad_right_chroma_ssse3() 34 * 35 * @remarks 36 * None 37 * 38 ******************************************************************************* 39 */ 40 41 #include <string.h> 42 #include <assert.h> 43 #include "ihevc_typedefs.h" 44 #include "ihevc_func_selector.h" 45 #include "ihevc_platform_macros.h" 46 #include "ihevc_mem_fns.h" 47 #include "ihevc_debug.h" 48 49 #include <immintrin.h> 50 51 52 /** 53 ******************************************************************************* 54 * 55 * @brief 56 * Padding (luma block) at the left of a 2d array 57 * 58 * @par Description: 59 * The left column of a 2d array is replicated for pad_size times at the left 60 * 61 * 62 * @param[in] pu1_src 63 * UWORD8 pointer to the source 64 * 65 * @param[in] src_strd 66 * integer source stride 67 * 68 * @param[in] ht 69 * integer height of the array 70 * 71 * @param[in] wd 72 * integer width of the array 73 * 74 * @param[in] pad_size 75 * integer -padding size of the array 76 * 77 * @param[in] ht 78 * integer height of the array 79 * 80 * @param[in] wd 81 * integer width of the array 82 * 83 * @returns 84 * 85 * @remarks 86 * None 87 * 88 ******************************************************************************* 89 */ 90 91 void ihevc_pad_left_luma_ssse3(UWORD8 *pu1_src, 92 WORD32 src_strd, 93 WORD32 ht, 94 WORD32 pad_size) 95 { 96 WORD32 row; 97 WORD32 i; 98 UWORD8 *pu1_dst; 99 __m128i const0_16x8b; 100 101 const0_16x8b = _mm_setzero_si128(); 102 103 ASSERT(pad_size % 8 == 0); 104 105 for(row = 0; row < ht; row++) 106 { 107 __m128i src_temp0_16x8b; 108 109 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 110 pu1_dst = pu1_src - pad_size; 111 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 112 for(i = 0; i < pad_size; i += 8) 113 { 114 _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b); 115 } 116 pu1_src += src_strd; 117 } 118 119 } 120 121 122 123 /** 124 ******************************************************************************* 125 * 126 * @brief 127 * Padding (chroma block) at the left of a 2d array 128 * 129 * @par Description: 130 * The left column of a 2d array is replicated for pad_size times at the left 131 * 132 * 133 * @param[in] pu1_src 134 * UWORD8 pointer to the source 135 * 136 * @param[in] src_strd 137 * integer source stride 138 * 139 * @param[in] ht 140 * integer height of the array 141 * 142 * @param[in] wd 143 * integer width of the array (each colour component) 144 * 145 * @param[in] pad_size 146 * integer -padding size of the array 147 * 148 * @param[in] ht 149 * integer height of the array 150 * 151 * @param[in] wd 152 * integer width of the array 153 * 154 * @returns 155 * 156 * @remarks 157 * None 158 * 159 ******************************************************************************* 160 */ 161 162 void ihevc_pad_left_chroma_ssse3(UWORD8 *pu1_src, 163 WORD32 src_strd, 164 WORD32 ht, 165 WORD32 pad_size) 166 { 167 WORD32 row; 168 WORD32 col; 169 UWORD8 *pu1_dst; 170 __m128i const0_16x8b, const1_16x8b; 171 const0_16x8b = _mm_setzero_si128(); 172 const1_16x8b = _mm_set1_epi8(1); 173 const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b); 174 175 ASSERT(pad_size % 8 == 0); 176 for(row = 0; row < ht; row++) 177 { 178 __m128i src_temp0_16x8b; 179 180 src_temp0_16x8b = _mm_loadu_si128((__m128i *)pu1_src); 181 pu1_dst = pu1_src - pad_size; 182 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 183 184 for(col = 0; col < pad_size; col += 8) 185 { 186 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); 187 } 188 pu1_src += src_strd; 189 } 190 191 } 192 193 194 195 /** 196 ******************************************************************************* 197 * 198 * @brief 199 * Padding (luma block) at the right of a 2d array 200 * 201 * @par Description: 202 * The right column of a 2d array is replicated for pad_size times at the right 203 * 204 * 205 * @param[in] pu1_src 206 * UWORD8 pointer to the source 207 * 208 * @param[in] src_strd 209 * integer source stride 210 * 211 * @param[in] ht 212 * integer height of the array 213 * 214 * @param[in] wd 215 * integer width of the array 216 * 217 * @param[in] pad_size 218 * integer -padding size of the array 219 * 220 * @param[in] ht 221 * integer height of the array 222 * 223 * @param[in] wd 224 * integer width of the array 225 * 226 * @returns 227 * 228 * @remarks 229 * None 230 * 231 ******************************************************************************* 232 */ 233 234 void ihevc_pad_right_luma_ssse3(UWORD8 *pu1_src, 235 WORD32 src_strd, 236 WORD32 ht, 237 WORD32 pad_size) 238 { 239 WORD32 row; 240 WORD32 col; 241 UWORD8 *pu1_dst; 242 __m128i const0_16x8b; 243 244 ASSERT(pad_size % 8 == 0); 245 246 for(row = 0; row < ht; row++) 247 { 248 __m128i src_temp0_16x8b; 249 250 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 1)); 251 const0_16x8b = _mm_setzero_si128(); 252 pu1_dst = pu1_src; 253 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 254 for(col = 0; col < pad_size; col += 8) 255 { 256 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); 257 } 258 pu1_src += src_strd; 259 } 260 261 } 262 263 264 265 /** 266 ******************************************************************************* 267 * 268 * @brief 269 * Padding (chroma block) at the right of a 2d array 270 * 271 * @par Description: 272 * The right column of a 2d array is replicated for pad_size times at the right 273 * 274 * 275 * @param[in] pu1_src 276 * UWORD8 pointer to the source 277 * 278 * @param[in] src_strd 279 * integer source stride 280 * 281 * @param[in] ht 282 * integer height of the array 283 * 284 * @param[in] wd 285 * integer width of the array (each colour component) 286 * 287 * @param[in] pad_size 288 * integer -padding size of the array 289 * 290 * @param[in] ht 291 * integer height of the array 292 * 293 * @param[in] wd 294 * integer width of the array 295 * 296 * @returns 297 * 298 * @remarks 299 * None 300 * 301 ******************************************************************************* 302 */ 303 304 void ihevc_pad_right_chroma_ssse3(UWORD8 *pu1_src, 305 WORD32 src_strd, 306 WORD32 ht, 307 WORD32 pad_size) 308 { 309 WORD32 row; 310 WORD32 col; 311 UWORD8 *pu1_dst; 312 __m128i const0_16x8b, const1_16x8b; 313 const0_16x8b = _mm_setzero_si128(); 314 const1_16x8b = _mm_set1_epi8(1); 315 const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b); 316 317 ASSERT(pad_size % 8 == 0); 318 319 for(row = 0; row < ht; row++) 320 { 321 __m128i src_temp0_16x8b; 322 323 src_temp0_16x8b = _mm_loadu_si128((__m128i *)(pu1_src - 2)); 324 pu1_dst = pu1_src; 325 src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b); 326 for(col = 0; col < pad_size; col += 8) 327 { 328 _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b); 329 } 330 331 pu1_src += src_strd; 332 } 333 } 334 335