Home | History | Annotate | Download | only in include
      1 /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __X86INTRIN_H
     25 #error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
     26 #endif
     27 
     28 #ifndef __FMA4INTRIN_H
     29 #define __FMA4INTRIN_H
     30 
     31 #include <pmmintrin.h>
     32 
     33 /* Define the default attributes for the functions in this file. */
     34 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
     35 
     36 static __inline__ __m128 __DEFAULT_FN_ATTRS
     37 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
     38 {
     39   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     40 }
     41 
     42 static __inline__ __m128d __DEFAULT_FN_ATTRS
     43 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
     44 {
     45   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     46 }
     47 
     48 static __inline__ __m128 __DEFAULT_FN_ATTRS
     49 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
     50 {
     51   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     52 }
     53 
     54 static __inline__ __m128d __DEFAULT_FN_ATTRS
     55 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
     56 {
     57   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     58 }
     59 
     60 static __inline__ __m128 __DEFAULT_FN_ATTRS
     61 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
     62 {
     63   return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     64 }
     65 
     66 static __inline__ __m128d __DEFAULT_FN_ATTRS
     67 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
     68 {
     69   return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     70 }
     71 
     72 static __inline__ __m128 __DEFAULT_FN_ATTRS
     73 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
     74 {
     75   return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     76 }
     77 
     78 static __inline__ __m128d __DEFAULT_FN_ATTRS
     79 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
     80 {
     81   return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     82 }
     83 
     84 static __inline__ __m128 __DEFAULT_FN_ATTRS
     85 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
     86 {
     87   return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     88 }
     89 
     90 static __inline__ __m128d __DEFAULT_FN_ATTRS
     91 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
     92 {
     93   return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     94 }
     95 
     96 static __inline__ __m128 __DEFAULT_FN_ATTRS
     97 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
     98 {
     99   return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    100 }
    101 
    102 static __inline__ __m128d __DEFAULT_FN_ATTRS
    103 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
    104 {
    105   return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    106 }
    107 
    108 static __inline__ __m128 __DEFAULT_FN_ATTRS
    109 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
    110 {
    111   return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    112 }
    113 
    114 static __inline__ __m128d __DEFAULT_FN_ATTRS
    115 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
    116 {
    117   return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    118 }
    119 
    120 static __inline__ __m128 __DEFAULT_FN_ATTRS
    121 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
    122 {
    123   return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    124 }
    125 
    126 static __inline__ __m128d __DEFAULT_FN_ATTRS
    127 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
    128 {
    129   return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    130 }
    131 
    132 static __inline__ __m128 __DEFAULT_FN_ATTRS
    133 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
    134 {
    135   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    136 }
    137 
    138 static __inline__ __m128d __DEFAULT_FN_ATTRS
    139 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
    140 {
    141   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    142 }
    143 
    144 static __inline__ __m128 __DEFAULT_FN_ATTRS
    145 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
    146 {
    147   return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    148 }
    149 
    150 static __inline__ __m128d __DEFAULT_FN_ATTRS
    151 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
    152 {
    153   return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    154 }
    155 
    156 static __inline__ __m256 __DEFAULT_FN_ATTRS
    157 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
    158 {
    159   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    160 }
    161 
    162 static __inline__ __m256d __DEFAULT_FN_ATTRS
    163 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
    164 {
    165   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    166 }
    167 
    168 static __inline__ __m256 __DEFAULT_FN_ATTRS
    169 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
    170 {
    171   return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    172 }
    173 
    174 static __inline__ __m256d __DEFAULT_FN_ATTRS
    175 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
    176 {
    177   return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    178 }
    179 
    180 static __inline__ __m256 __DEFAULT_FN_ATTRS
    181 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
    182 {
    183   return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    184 }
    185 
    186 static __inline__ __m256d __DEFAULT_FN_ATTRS
    187 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
    188 {
    189   return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    190 }
    191 
    192 static __inline__ __m256 __DEFAULT_FN_ATTRS
    193 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
    194 {
    195   return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    196 }
    197 
    198 static __inline__ __m256d __DEFAULT_FN_ATTRS
    199 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
    200 {
    201   return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    202 }
    203 
    204 static __inline__ __m256 __DEFAULT_FN_ATTRS
    205 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
    206 {
    207   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    208 }
    209 
    210 static __inline__ __m256d __DEFAULT_FN_ATTRS
    211 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
    212 {
    213   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    214 }
    215 
    216 static __inline__ __m256 __DEFAULT_FN_ATTRS
    217 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
    218 {
    219   return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    220 }
    221 
    222 static __inline__ __m256d __DEFAULT_FN_ATTRS
    223 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
    224 {
    225   return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    226 }
    227 
    228 #undef __DEFAULT_FN_ATTRS
    229 
    230 #endif /* __FMA4INTRIN_H */
    231