Home | History | Annotate | Download | only in x86
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19 *******************************************************************************
     20 * @file
     21 *  ihevc_padding_atom_intr.c
     22 *
     23 * @brief
     24 *  Contains function definitions for Padding
     25 *
     26 * @author
     27 *  Srinivas T
     28 *
     29 * @par List of Functions:
     30 *   - ihevc_pad_left_luma_ssse3()
     31 *   - ihevc_pad_left_chroma_ssse3()
     32 *   - ihevc_pad_right_luma_ssse3()
     33 *   - ihevc_pad_right_chroma_ssse3()
     34 *
     35 * @remarks
     36 *  None
     37 *
     38 *******************************************************************************
     39 */
     40 
     41 #include <string.h>
     42 #include <assert.h>
     43 #include "ihevc_typedefs.h"
     44 #include "ihevc_func_selector.h"
     45 #include "ihevc_platform_macros.h"
     46 #include "ihevc_mem_fns.h"
     47 #include "ihevc_debug.h"
     48 
     49 #include <immintrin.h>
     50 
     51 
     52 /**
     53 *******************************************************************************
     54 *
     55 * @brief
     56 *   Padding (luma block) at the left of a 2d array
     57 *
     58 * @par Description:
     59 *   The left column of a 2d array is replicated for pad_size times at the left
     60 *
     61 *
     62 * @param[in] pu1_src
     63 *  UWORD8 pointer to the source
     64 *
     65 * @param[in] src_strd
     66 *  integer source stride
     67 *
     68 * @param[in] ht
     69 *  integer height of the array
     70 *
     71 * @param[in] wd
     72 *  integer width of the array
     73 *
     74 * @param[in] pad_size
     75 *  integer -padding size of the array
     76 *
     77 * @param[in] ht
     78 *  integer height of the array
     79 *
     80 * @param[in] wd
     81 *  integer width of the array
     82 *
     83 * @returns
     84 *
     85 * @remarks
     86 *  None
     87 *
     88 *******************************************************************************
     89 */
     90 
     91 void ihevc_pad_left_luma_ssse3(UWORD8 *pu1_src,
     92                                WORD32 src_strd,
     93                                WORD32 ht,
     94                                WORD32 pad_size)
     95 {
     96     WORD32 row;
     97     WORD32 i;
     98     UWORD8 *pu1_dst;
     99     __m128i const0_16x8b;
    100 
    101     const0_16x8b = _mm_setzero_si128();
    102 
    103     ASSERT(pad_size % 8 == 0);
    104 
    105     for(row = 0; row < ht; row++)
    106     {
    107         __m128i src_temp0_16x8b;
    108 
    109         src_temp0_16x8b =  _mm_loadu_si128((__m128i *)pu1_src);
    110         pu1_dst = pu1_src - pad_size;
    111         src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
    112         for(i = 0; i < pad_size; i += 8)
    113         {
    114             _mm_storel_epi64((__m128i *)(pu1_dst + i), src_temp0_16x8b);
    115         }
    116         pu1_src += src_strd;
    117     }
    118 
    119 }
    120 
    121 
    122 
    123 /**
    124 *******************************************************************************
    125 *
    126 * @brief
    127 *   Padding (chroma block) at the left of a 2d array
    128 *
    129 * @par Description:
    130 *   The left column of a 2d array is replicated for pad_size times at the left
    131 *
    132 *
    133 * @param[in] pu1_src
    134 *  UWORD8 pointer to the source
    135 *
    136 * @param[in] src_strd
    137 *  integer source stride
    138 *
    139 * @param[in] ht
    140 *  integer height of the array
    141 *
    142 * @param[in] wd
    143 *  integer width of the array (each colour component)
    144 *
    145 * @param[in] pad_size
    146 *  integer -padding size of the array
    147 *
    148 * @param[in] ht
    149 *  integer height of the array
    150 *
    151 * @param[in] wd
    152 *  integer width of the array
    153 *
    154 * @returns
    155 *
    156 * @remarks
    157 *  None
    158 *
    159 *******************************************************************************
    160 */
    161 
    162 void ihevc_pad_left_chroma_ssse3(UWORD8 *pu1_src,
    163                                  WORD32 src_strd,
    164                                  WORD32 ht,
    165                                  WORD32 pad_size)
    166 {
    167     WORD32 row;
    168     WORD32 col;
    169     UWORD8 *pu1_dst;
    170     __m128i const0_16x8b, const1_16x8b;
    171     const0_16x8b = _mm_setzero_si128();
    172     const1_16x8b = _mm_set1_epi8(1);
    173     const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
    174 
    175     ASSERT(pad_size % 8 == 0);
    176     for(row = 0; row < ht; row++)
    177     {
    178         __m128i src_temp0_16x8b;
    179 
    180         src_temp0_16x8b =  _mm_loadu_si128((__m128i *)pu1_src);
    181         pu1_dst = pu1_src - pad_size;
    182         src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
    183 
    184         for(col = 0; col < pad_size; col += 8)
    185         {
    186             _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
    187         }
    188         pu1_src += src_strd;
    189     }
    190 
    191 }
    192 
    193 
    194 
    195 /**
    196 *******************************************************************************
    197 *
    198 * @brief
    199 * Padding (luma block) at the right of a 2d array
    200 *
    201 * @par Description:
    202 * The right column of a 2d array is replicated for pad_size times at the right
    203 *
    204 *
    205 * @param[in] pu1_src
    206 *  UWORD8 pointer to the source
    207 *
    208 * @param[in] src_strd
    209 *  integer source stride
    210 *
    211 * @param[in] ht
    212 *  integer height of the array
    213 *
    214 * @param[in] wd
    215 *  integer width of the array
    216 *
    217 * @param[in] pad_size
    218 *  integer -padding size of the array
    219 *
    220 * @param[in] ht
    221 *  integer height of the array
    222 *
    223 * @param[in] wd
    224 *  integer width of the array
    225 *
    226 * @returns
    227 *
    228 * @remarks
    229 *  None
    230 *
    231 *******************************************************************************
    232 */
    233 
    234 void ihevc_pad_right_luma_ssse3(UWORD8 *pu1_src,
    235                                 WORD32 src_strd,
    236                                 WORD32 ht,
    237                                 WORD32 pad_size)
    238 {
    239     WORD32 row;
    240     WORD32 col;
    241     UWORD8 *pu1_dst;
    242     __m128i const0_16x8b;
    243 
    244     ASSERT(pad_size % 8 == 0);
    245 
    246     for(row = 0; row < ht; row++)
    247     {
    248         __m128i src_temp0_16x8b;
    249 
    250         src_temp0_16x8b =  _mm_loadu_si128((__m128i *)(pu1_src - 1));
    251         const0_16x8b = _mm_setzero_si128();
    252         pu1_dst = pu1_src;
    253         src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
    254         for(col = 0; col < pad_size; col += 8)
    255         {
    256             _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
    257         }
    258         pu1_src += src_strd;
    259     }
    260 
    261 }
    262 
    263 
    264 
    265 /**
    266 *******************************************************************************
    267 *
    268 * @brief
    269 * Padding (chroma block) at the right of a 2d array
    270 *
    271 * @par Description:
    272 * The right column of a 2d array is replicated for pad_size times at the right
    273 *
    274 *
    275 * @param[in] pu1_src
    276 *  UWORD8 pointer to the source
    277 *
    278 * @param[in] src_strd
    279 *  integer source stride
    280 *
    281 * @param[in] ht
    282 *  integer height of the array
    283 *
    284 * @param[in] wd
    285 *  integer width of the array (each colour component)
    286 *
    287 * @param[in] pad_size
    288 *  integer -padding size of the array
    289 *
    290 * @param[in] ht
    291 *  integer height of the array
    292 *
    293 * @param[in] wd
    294 *  integer width of the array
    295 *
    296 * @returns
    297 *
    298 * @remarks
    299 *  None
    300 *
    301 *******************************************************************************
    302 */
    303 
    304 void ihevc_pad_right_chroma_ssse3(UWORD8 *pu1_src,
    305                                   WORD32 src_strd,
    306                                   WORD32 ht,
    307                                   WORD32 pad_size)
    308 {
    309     WORD32 row;
    310     WORD32 col;
    311     UWORD8 *pu1_dst;
    312     __m128i const0_16x8b, const1_16x8b;
    313     const0_16x8b = _mm_setzero_si128();
    314     const1_16x8b = _mm_set1_epi8(1);
    315     const0_16x8b = _mm_unpacklo_epi8(const0_16x8b, const1_16x8b);
    316 
    317     ASSERT(pad_size % 8 == 0);
    318 
    319     for(row = 0; row < ht; row++)
    320     {
    321         __m128i src_temp0_16x8b;
    322 
    323         src_temp0_16x8b =  _mm_loadu_si128((__m128i *)(pu1_src - 2));
    324         pu1_dst = pu1_src;
    325         src_temp0_16x8b = _mm_shuffle_epi8(src_temp0_16x8b, const0_16x8b);
    326         for(col = 0; col < pad_size; col += 8)
    327         {
    328             _mm_storel_epi64((__m128i *)(pu1_dst + col), src_temp0_16x8b);
    329         }
    330 
    331         pu1_src += src_strd;
    332     }
    333 }
    334 
    335