Home | History | Annotate | Download | only in clang-include
      1 /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __IMMINTRIN_H
     25 #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef __FMAINTRIN_H
     29 #define __FMAINTRIN_H
     30 
     31 /* Define the default attributes for the functions in this file. */
     32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
     33 
     34 static __inline__ __m128 __DEFAULT_FN_ATTRS
     35 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
     36 {
     37   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     38 }
     39 
     40 static __inline__ __m128d __DEFAULT_FN_ATTRS
     41 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
     42 {
     43   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     44 }
     45 
     46 static __inline__ __m128 __DEFAULT_FN_ATTRS
     47 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
     48 {
     49   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     50 }
     51 
     52 static __inline__ __m128d __DEFAULT_FN_ATTRS
     53 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
     54 {
     55   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     56 }
     57 
     58 static __inline__ __m128 __DEFAULT_FN_ATTRS
     59 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
     60 {
     61   return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     62 }
     63 
     64 static __inline__ __m128d __DEFAULT_FN_ATTRS
     65 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
     66 {
     67   return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     68 }
     69 
     70 static __inline__ __m128 __DEFAULT_FN_ATTRS
     71 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
     72 {
     73   return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     74 }
     75 
     76 static __inline__ __m128d __DEFAULT_FN_ATTRS
     77 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
     78 {
     79   return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     80 }
     81 
     82 static __inline__ __m128 __DEFAULT_FN_ATTRS
     83 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
     84 {
     85   return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     86 }
     87 
     88 static __inline__ __m128d __DEFAULT_FN_ATTRS
     89 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
     90 {
     91   return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     92 }
     93 
     94 static __inline__ __m128 __DEFAULT_FN_ATTRS
     95 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
     96 {
     97   return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     98 }
     99 
    100 static __inline__ __m128d __DEFAULT_FN_ATTRS
    101 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
    102 {
    103   return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    104 }
    105 
    106 static __inline__ __m128 __DEFAULT_FN_ATTRS
    107 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
    108 {
    109   return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    110 }
    111 
    112 static __inline__ __m128d __DEFAULT_FN_ATTRS
    113 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
    114 {
    115   return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    116 }
    117 
    118 static __inline__ __m128 __DEFAULT_FN_ATTRS
    119 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
    120 {
    121   return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    122 }
    123 
    124 static __inline__ __m128d __DEFAULT_FN_ATTRS
    125 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
    126 {
    127   return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    128 }
    129 
    130 static __inline__ __m128 __DEFAULT_FN_ATTRS
    131 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
    132 {
    133   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    134 }
    135 
    136 static __inline__ __m128d __DEFAULT_FN_ATTRS
    137 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
    138 {
    139   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    140 }
    141 
    142 static __inline__ __m128 __DEFAULT_FN_ATTRS
    143 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
    144 {
    145   return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    146 }
    147 
    148 static __inline__ __m128d __DEFAULT_FN_ATTRS
    149 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
    150 {
    151   return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    152 }
    153 
    154 static __inline__ __m256 __DEFAULT_FN_ATTRS
    155 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
    156 {
    157   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    158 }
    159 
    160 static __inline__ __m256d __DEFAULT_FN_ATTRS
    161 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
    162 {
    163   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    164 }
    165 
    166 static __inline__ __m256 __DEFAULT_FN_ATTRS
    167 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
    168 {
    169   return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    170 }
    171 
    172 static __inline__ __m256d __DEFAULT_FN_ATTRS
    173 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
    174 {
    175   return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    176 }
    177 
    178 static __inline__ __m256 __DEFAULT_FN_ATTRS
    179 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
    180 {
    181   return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    182 }
    183 
    184 static __inline__ __m256d __DEFAULT_FN_ATTRS
    185 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
    186 {
    187   return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    188 }
    189 
    190 static __inline__ __m256 __DEFAULT_FN_ATTRS
    191 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
    192 {
    193   return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    194 }
    195 
    196 static __inline__ __m256d __DEFAULT_FN_ATTRS
    197 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
    198 {
    199   return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    200 }
    201 
    202 static __inline__ __m256 __DEFAULT_FN_ATTRS
    203 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
    204 {
    205   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    206 }
    207 
    208 static __inline__ __m256d __DEFAULT_FN_ATTRS
    209 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
    210 {
    211   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    212 }
    213 
    214 static __inline__ __m256 __DEFAULT_FN_ATTRS
    215 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
    216 {
    217   return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    218 }
    219 
    220 static __inline__ __m256d __DEFAULT_FN_ATTRS
    221 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
    222 {
    223   return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    224 }
    225 
    226 #undef __DEFAULT_FN_ATTRS
    227 
    228 #endif /* __FMAINTRIN_H */
    229