Home | History | Annotate | Download | only in include
      1 /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
      2  *
      3  * Permission is hereby granted, free of charge, to any person obtaining a copy
      4  * of this software and associated documentation files (the "Software"), to deal
      5  * in the Software without restriction, including without limitation the rights
      6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
      7  * copies of the Software, and to permit persons to whom the Software is
      8  * furnished to do so, subject to the following conditions:
      9  *
     10  * The above copyright notice and this permission notice shall be included in
     11  * all copies or substantial portions of the Software.
     12  *
     13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     19  * THE SOFTWARE.
     20  *
     21  *===-----------------------------------------------------------------------===
     22  */
     23 
     24 #ifndef __IMMINTRIN_H
     25 #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef __FMAINTRIN_H
     29 #define __FMAINTRIN_H
     30 
     31 #ifndef __FMA__
     32 # error "FMA instruction set is not enabled"
     33 #else
     34 
     35 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
     36 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
     37 {
     38   return (__m128)__builtin_ia32_vfmaddps(__A, __B, __C);
     39 }
     40 
     41 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
     42 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
     43 {
     44   return (__m128d)__builtin_ia32_vfmaddpd(__A, __B, __C);
     45 }
     46 
     47 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
     48 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
     49 {
     50   return (__m128)__builtin_ia32_vfmaddss(__A, __B, __C);
     51 }
     52 
     53 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
     54 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
     55 {
     56   return (__m128d)__builtin_ia32_vfmaddsd(__A, __B, __C);
     57 }
     58 
     59 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
     60 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
     61 {
     62   return (__m128)__builtin_ia32_vfmsubps(__A, __B, __C);
     63 }
     64 
     65 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
     66 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
     67 {
     68   return (__m128d)__builtin_ia32_vfmsubpd(__A, __B, __C);
     69 }
     70 
     71 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
     72 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
     73 {
     74   return (__m128)__builtin_ia32_vfmsubss(__A, __B, __C);
     75 }
     76 
     77 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
     78 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
     79 {
     80   return (__m128d)__builtin_ia32_vfmsubsd(__A, __B, __C);
     81 }
     82 
     83 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
     84 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
     85 {
     86   return (__m128)__builtin_ia32_vfnmaddps(__A, __B, __C);
     87 }
     88 
     89 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
     90 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
     91 {
     92   return (__m128d)__builtin_ia32_vfnmaddpd(__A, __B, __C);
     93 }
     94 
     95 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
     96 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
     97 {
     98   return (__m128)__builtin_ia32_vfnmaddss(__A, __B, __C);
     99 }
    100 
    101 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
    102 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
    103 {
    104   return (__m128d)__builtin_ia32_vfnmaddsd(__A, __B, __C);
    105 }
    106 
    107 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
    108 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
    109 {
    110   return (__m128)__builtin_ia32_vfnmsubps(__A, __B, __C);
    111 }
    112 
    113 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
    114 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
    115 {
    116   return (__m128d)__builtin_ia32_vfnmsubpd(__A, __B, __C);
    117 }
    118 
    119 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
    120 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
    121 {
    122   return (__m128)__builtin_ia32_vfnmsubss(__A, __B, __C);
    123 }
    124 
    125 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
    126 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
    127 {
    128   return (__m128d)__builtin_ia32_vfnmsubsd(__A, __B, __C);
    129 }
    130 
    131 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
    132 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
    133 {
    134   return (__m128)__builtin_ia32_vfmaddsubps(__A, __B, __C);
    135 }
    136 
    137 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
    138 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
    139 {
    140   return (__m128d)__builtin_ia32_vfmaddsubpd(__A, __B, __C);
    141 }
    142 
    143 static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
    144 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
    145 {
    146   return (__m128)__builtin_ia32_vfmsubaddps(__A, __B, __C);
    147 }
    148 
    149 static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
    150 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
    151 {
    152   return (__m128d)__builtin_ia32_vfmsubaddpd(__A, __B, __C);
    153 }
    154 
    155 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
    156 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
    157 {
    158   return (__m256)__builtin_ia32_vfmaddps256(__A, __B, __C);
    159 }
    160 
    161 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
    162 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
    163 {
    164   return (__m256d)__builtin_ia32_vfmaddpd256(__A, __B, __C);
    165 }
    166 
    167 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
    168 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
    169 {
    170   return (__m256)__builtin_ia32_vfmsubps256(__A, __B, __C);
    171 }
    172 
    173 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
    174 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
    175 {
    176   return (__m256d)__builtin_ia32_vfmsubpd256(__A, __B, __C);
    177 }
    178 
    179 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
    180 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
    181 {
    182   return (__m256)__builtin_ia32_vfnmaddps256(__A, __B, __C);
    183 }
    184 
    185 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
    186 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
    187 {
    188   return (__m256d)__builtin_ia32_vfnmaddpd256(__A, __B, __C);
    189 }
    190 
    191 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
    192 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
    193 {
    194   return (__m256)__builtin_ia32_vfnmsubps256(__A, __B, __C);
    195 }
    196 
    197 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
    198 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
    199 {
    200   return (__m256d)__builtin_ia32_vfnmsubpd256(__A, __B, __C);
    201 }
    202 
    203 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
    204 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
    205 {
    206   return (__m256)__builtin_ia32_vfmaddsubps256(__A, __B, __C);
    207 }
    208 
    209 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
    210 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
    211 {
    212   return (__m256d)__builtin_ia32_vfmaddsubpd256(__A, __B, __C);
    213 }
    214 
    215 static __inline__ __m256 __attribute__((__always_inline__, __nodebug__))
    216 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
    217 {
    218   return (__m256)__builtin_ia32_vfmsubaddps256(__A, __B, __C);
    219 }
    220 
    221 static __inline__ __m256d __attribute__((__always_inline__, __nodebug__))
    222 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
    223 {
    224   return (__m256d)__builtin_ia32_vfmsubaddpd256(__A, __B, __C);
    225 }
    226 
    227 #endif /* __FMA__ */
    228 
    229 #endif /* __FMAINTRIN_H */
    230