Home | History | Annotate | Download | only in common
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 ****************************************************************************/
     23 
     24 #ifndef __SWR_SIMD16INTRIN_H__
     25 #define __SWR_SIMD16INTRIN_H__
     26 
     27 #if ENABLE_AVX512_SIMD16
     28 
     29 #if KNOB_SIMD16_WIDTH == 16
     30 typedef SIMD512                             SIMD16;
     31 #else
     32 #error Unsupported vector width
     33 #endif//KNOB_SIMD16_WIDTH == 16
     34 
     35 #define _simd16_setzero_ps                  SIMD16::setzero_ps
     36 #define _simd16_setzero_si                  SIMD16::setzero_si
     37 #define _simd16_set1_ps                     SIMD16::set1_ps
     38 #define _simd16_set1_epi8                   SIMD16::set1_epi8
     39 #define _simd16_set1_epi32                  SIMD16::set1_epi32
     40 #define _simd16_set_ps                      SIMD16::set_ps
     41 #define _simd16_set_epi32                   SIMD16::set_epi32
     42 #define _simd16_load_ps                     SIMD16::load_ps
     43 #define _simd16_loadu_ps                    SIMD16::loadu_ps
     44 #if 1
     45 #define _simd16_load1_ps                    SIMD16::broadcast_ss
     46 #endif
     47 #define _simd16_load_si                     SIMD16::load_si
     48 #define _simd16_loadu_si                    SIMD16::loadu_si
     49 #define _simd16_broadcast_ss(m)             SIMD16::broadcast_ss((float const*)m)
     50 #define _simd16_store_ps                    SIMD16::store_ps
     51 #define _simd16_store_si                    SIMD16::store_si
     52 #define _simd16_extract_ps(a, imm8)         SIMD16::extract_ps<imm8>(a)
     53 #define _simd16_extract_si(a, imm8)         SIMD16::extract_si<imm8>(a)
     54 #define _simd16_insert_ps(a, b, imm8)       SIMD16::insert_ps<imm8>(a, b)
     55 #define _simd16_insert_si(a, b, imm8)       SIMD16::insert_si<imm8>(a, b)
     56 #define _simd16_maskstore_ps                SIMD16::maskstore_ps
     57 #define _simd16_blend_ps(a, b, mask)        SIMD16::blend_ps<mask>(a, b)
     58 #define _simd16_blendv_ps                   SIMD16::blendv_ps
     59 #define _simd16_blendv_epi32                SIMD16::blendv_epi32
     60 #define _simd16_mul_ps                      SIMD16::mul_ps
     61 #define _simd16_div_ps                      SIMD16::div_ps
     62 #define _simd16_add_ps                      SIMD16::add_ps
     63 #define _simd16_sub_ps                      SIMD16::sub_ps
     64 #define _simd16_rsqrt_ps                    SIMD16::rsqrt_ps
     65 #define _simd16_min_ps                      SIMD16::min_ps
     66 #define _simd16_max_ps                      SIMD16::max_ps
     67 #define _simd16_movemask_ps                 SIMD16::movemask_ps
     68 #define _simd16_movemask_pd                 SIMD16::movemask_pd
     69 #define _simd16_cvtps_epi32                 SIMD16::cvtps_epi32
     70 #define _simd16_cvttps_epi32                SIMD16::cvttps_epi32
     71 #define _simd16_cvtepi32_ps                 SIMD16::cvtepi32_ps
     72 #define _simd16_cmp_ps(a, b, comp)          SIMD16::cmp_ps<SIMD16::CompareType(comp)>(a, b)
     73 #define _simd16_cmplt_ps                    SIMD16::cmplt_ps
     74 #define _simd16_cmpgt_ps                    SIMD16::cmpgt_ps
     75 #define _simd16_cmpneq_ps                   SIMD16::cmpneq_ps
     76 #define _simd16_cmpeq_ps                    SIMD16::cmpeq_ps
     77 #define _simd16_cmpge_ps                    SIMD16::cmpge_ps
     78 #define _simd16_cmple_ps                    SIMD16::cmple_ps
     79 #define _simd16_castsi_ps                   SIMD16::castsi_ps
     80 #define _simd16_castps_si                   SIMD16::castps_si
     81 #define _simd16_castsi_pd                   SIMD16::castsi_pd
     82 #define _simd16_castpd_si                   SIMD16::castpd_si
     83 #define _simd16_castpd_ps                   SIMD16::castpd_ps
     84 #define _simd16_castps_pd                   SIMD16::castps_pd
     85 #define _simd16_and_ps                      SIMD16::and_ps
     86 #define _simd16_andnot_ps                   SIMD16::andnot_ps
     87 #define _simd16_or_ps                       SIMD16::or_ps
     88 #define _simd16_xor_ps                      SIMD16::xor_ps
     89 #define _simd16_round_ps(a, mode)           SIMD16::round_ps<SIMD16::RoundMode(mode)>(a)
     90 #define _simd16_mul_epi32                   SIMD16::mul_epi32
     91 #define _simd16_mullo_epi32                 SIMD16::mullo_epi32
     92 #define _simd16_sub_epi32                   SIMD16::sub_epi32
     93 #define _simd16_sub_epi64                   SIMD16::sub_epi64
     94 #define _simd16_min_epi32                   SIMD16::min_epi32
     95 #define _simd16_max_epi32                   SIMD16::max_epi32
     96 #define _simd16_min_epu32                   SIMD16::min_epu32
     97 #define _simd16_max_epu32                   SIMD16::max_epu32
     98 #define _simd16_add_epi32                   SIMD16::add_epi32
     99 #define _simd16_and_si                      SIMD16::and_si
    100 #define _simd16_andnot_si                   SIMD16::andnot_si
    101 #define _simd16_or_si                       SIMD16::or_si
    102 #define _simd16_xor_si                      SIMD16::xor_si
    103 #define _simd16_cmpeq_epi32                 SIMD16::cmpeq_epi32
    104 #define _simd16_cmpgt_epi32                 SIMD16::cmpgt_epi32
    105 #define _simd16_cmplt_epi32                 SIMD16::cmplt_epi32
    106 #define _simd16_testz_ps                    SIMD16::testz_ps
    107 #define _simd16_unpacklo_ps                 SIMD16::unpacklo_ps
    108 #define _simd16_unpackhi_ps                 SIMD16::unpackhi_ps
    109 #define _simd16_unpacklo_pd                 SIMD16::unpacklo_pd
    110 #define _simd16_unpackhi_pd                 SIMD16::unpackhi_pd
    111 #define _simd16_unpacklo_epi8               SIMD16::unpacklo_epi8
    112 #define _simd16_unpackhi_epi8               SIMD16::unpackhi_epi8
    113 #define _simd16_unpacklo_epi16              SIMD16::unpacklo_epi16
    114 #define _simd16_unpackhi_epi16              SIMD16::unpackhi_epi16
    115 #define _simd16_unpacklo_epi32              SIMD16::unpacklo_epi32
    116 #define _simd16_unpackhi_epi32              SIMD16::unpackhi_epi32
    117 #define _simd16_unpacklo_epi64              SIMD16::unpacklo_epi64
    118 #define _simd16_unpackhi_epi64              SIMD16::unpackhi_epi64
    119 #define _simd16_slli_epi32(a, i)            SIMD16::slli_epi32<i>(a)
    120 #define _simd16_srli_epi32(a, i)            SIMD16::srli_epi32<i>(a)
    121 #define _simd16_srai_epi32(a, i)            SIMD16::srai_epi32<i>(a)
    122 #define _simd16_fmadd_ps                    SIMD16::fmadd_ps
    123 #define _simd16_fmsub_ps                    SIMD16::fmsub_ps
    124 #define _simd16_adds_epu8                   SIMD16::adds_epu8
    125 #define _simd16_subs_epu8                   SIMD16::subs_epu8
    126 #define _simd16_add_epi8                    SIMD16::add_epi8
    127 #define _simd16_shuffle_epi8                SIMD16::shuffle_epi8
    128 
    129 #define _simd16_i32gather_ps(m, index, scale)               SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(m, index)
    130 #define _simd16_mask_i32gather_ps(a, m, index, mask, scale) SIMD16::mask_i32gather_ps<SIMD16::ScaleFactor(scale)>(a, m, index, mask)
    131 
    132 #define _simd16_abs_epi32                   SIMD16::abs_epi32
    133 
    134 #define _simd16_cmpeq_epi64                 SIMD16::cmpeq_epi64
    135 #define _simd16_cmpgt_epi64                 SIMD16::cmpgt_epi64
    136 #define _simd16_cmpeq_epi16                 SIMD16::cmpeq_epi16
    137 #define _simd16_cmpgt_epi16                 SIMD16::cmpgt_epi16
    138 #define _simd16_cmpeq_epi8                  SIMD16::cmpeq_epi8
    139 #define _simd16_cmpgt_epi8                  SIMD16::cmpgt_epi8
    140 
    141 #define _simd16_permute_ps                  SIMD16::permute_ps
    142 #define _simd16_permute_epi32               SIMD16::permute_epi32
    143 #define _simd16_sllv_epi32                  SIMD16::sllv_epi32
    144 #define _simd16_srlv_epi32                  SIMD16::sllv_epi32
    145 #define _simd16_permute2f128_ps(a, b, i)    SIMD16::permute2f128_ps<i>(a, b)
    146 #define _simd16_permute2f128_pd(a, b, i)    SIMD16::permute2f128_pd<i>(a, b)
    147 #define _simd16_permute2f128_si(a, b, i)    SIMD16::permute2f128_si<i>(a, b)
    148 #define _simd16_shuffle_ps(a, b, i)         SIMD16::shuffle_ps<i>(a, b)
    149 #define _simd16_shuffle_pd(a, b, i)         SIMD16::shuffle_pd<i>(a, b)
    150 #define _simd16_shuffle_epi32(a, b, imm8)   SIMD16::shuffle_epi32<imm8>(a, b)
    151 #define _simd16_shuffle_epi64(a, b, imm8)   SIMD16::shuffle_epi64<imm8>(a, b)
    152 #define _simd16_cvtepu8_epi16               SIMD16::cvtepu8_epi16
    153 #define _simd16_cvtepu8_epi32               SIMD16::cvtepu8_epi32
    154 #define _simd16_cvtepu16_epi32              SIMD16::cvtepu16_epi32
    155 #define _simd16_cvtepu16_epi64              SIMD16::cvtepu16_epi64
    156 #define _simd16_cvtepu32_epi64              SIMD16::cvtepu32_epi64
    157 #define _simd16_packus_epi16                SIMD16::packus_epi16
    158 #define _simd16_packs_epi16                 SIMD16::packs_epi16
    159 #define _simd16_packus_epi32                SIMD16::packus_epi32
    160 #define _simd16_packs_epi32                 SIMD16::packs_epi32
    161 #define _simd16_cmplt_ps_mask               SIMD16::cmp_ps_mask<SIMD16::CompareType::LT_OQ>
    162 #define _simd16_cmpeq_ps_mask               SIMD16::cmp_ps_mask<SIMD16::CompareType::EQ_OQ>
    163 #define _simd16_int2mask(mask)              simd16mask(mask)
    164 #define _simd16_mask2int(mask)              int(mask)
    165 #define _simd16_vmask_ps                    SIMD16::vmask_ps
    166 
    167 #endif//ENABLE_AVX512_SIMD16
    168 
    169 #endif//__SWR_SIMD16INTRIN_H_
    170