1 /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __IMMINTRIN_H 25 #error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef __FMAINTRIN_H 29 #define __FMAINTRIN_H 30 31 /* Define the default attributes for the functions in this file. */ 32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma"))) 33 34 static __inline__ __m128 __DEFAULT_FN_ATTRS 35 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) 36 { 37 return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 38 } 39 40 static __inline__ __m128d __DEFAULT_FN_ATTRS 41 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) 42 { 43 return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 44 } 45 46 static __inline__ __m128 __DEFAULT_FN_ATTRS 47 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) 48 { 49 return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 50 } 51 52 static __inline__ __m128d __DEFAULT_FN_ATTRS 53 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) 54 { 55 return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); 56 } 57 58 static __inline__ __m128 __DEFAULT_FN_ATTRS 59 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) 60 { 61 return (__m128)__builtin_ia32_vfmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 62 } 63 64 static __inline__ __m128d __DEFAULT_FN_ATTRS 65 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) 66 { 67 return (__m128d)__builtin_ia32_vfmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 68 } 69 70 static __inline__ __m128 __DEFAULT_FN_ATTRS 71 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) 72 { 73 return (__m128)__builtin_ia32_vfmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 74 } 75 76 static __inline__ __m128d __DEFAULT_FN_ATTRS 77 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) 78 { 79 return (__m128d)__builtin_ia32_vfmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C); 80 } 81 82 static __inline__ __m128 __DEFAULT_FN_ATTRS 83 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) 84 { 85 return (__m128)__builtin_ia32_vfnmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 86 } 87 88 static __inline__ __m128d __DEFAULT_FN_ATTRS 89 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) 90 { 91 return (__m128d)__builtin_ia32_vfnmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 92 } 93 94 static __inline__ __m128 __DEFAULT_FN_ATTRS 95 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) 96 { 97 return (__m128)__builtin_ia32_vfnmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 98 } 99 100 static __inline__ __m128d __DEFAULT_FN_ATTRS 101 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) 102 { 103 return (__m128d)__builtin_ia32_vfnmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); 104 } 105 106 static __inline__ __m128 __DEFAULT_FN_ATTRS 107 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) 108 { 109 return (__m128)__builtin_ia32_vfnmsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 110 } 111 112 static __inline__ __m128d __DEFAULT_FN_ATTRS 113 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) 114 { 115 return (__m128d)__builtin_ia32_vfnmsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 116 } 117 118 static __inline__ __m128 __DEFAULT_FN_ATTRS 119 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) 120 { 121 return (__m128)__builtin_ia32_vfnmsubss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 122 } 123 124 static __inline__ __m128d __DEFAULT_FN_ATTRS 125 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) 126 { 127 return (__m128d)__builtin_ia32_vfnmsubsd((__v2df)__A, (__v2df)__B, (__v2df)__C); 128 } 129 130 static __inline__ __m128 __DEFAULT_FN_ATTRS 131 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) 132 { 133 return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 134 } 135 136 static __inline__ __m128d __DEFAULT_FN_ATTRS 137 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) 138 { 139 return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 140 } 141 142 static __inline__ __m128 __DEFAULT_FN_ATTRS 143 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) 144 { 145 return (__m128)__builtin_ia32_vfmsubaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); 146 } 147 148 static __inline__ __m128d __DEFAULT_FN_ATTRS 149 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) 150 { 151 return (__m128d)__builtin_ia32_vfmsubaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); 152 } 153 154 static __inline__ __m256 __DEFAULT_FN_ATTRS 155 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) 156 { 157 return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 158 } 159 160 static __inline__ __m256d __DEFAULT_FN_ATTRS 161 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) 162 { 163 return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 164 } 165 166 static __inline__ __m256 __DEFAULT_FN_ATTRS 167 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) 168 { 169 return (__m256)__builtin_ia32_vfmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 170 } 171 172 static __inline__ __m256d __DEFAULT_FN_ATTRS 173 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) 174 { 175 return (__m256d)__builtin_ia32_vfmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 176 } 177 178 static __inline__ __m256 __DEFAULT_FN_ATTRS 179 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) 180 { 181 return (__m256)__builtin_ia32_vfnmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 182 } 183 184 static __inline__ __m256d __DEFAULT_FN_ATTRS 185 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) 186 { 187 return (__m256d)__builtin_ia32_vfnmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 188 } 189 190 static __inline__ __m256 __DEFAULT_FN_ATTRS 191 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) 192 { 193 return (__m256)__builtin_ia32_vfnmsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 194 } 195 196 static __inline__ __m256d __DEFAULT_FN_ATTRS 197 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) 198 { 199 return (__m256d)__builtin_ia32_vfnmsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 200 } 201 202 static __inline__ __m256 __DEFAULT_FN_ATTRS 203 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) 204 { 205 return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 206 } 207 208 static __inline__ __m256d __DEFAULT_FN_ATTRS 209 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) 210 { 211 return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 212 } 213 214 static __inline__ __m256 __DEFAULT_FN_ATTRS 215 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) 216 { 217 return (__m256)__builtin_ia32_vfmsubaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); 218 } 219 220 static __inline__ __m256d __DEFAULT_FN_ATTRS 221 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) 222 { 223 return (__m256d)__builtin_ia32_vfmsubaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); 224 } 225 226 #undef __DEFAULT_FN_ATTRS 227 228 #endif /* __FMAINTRIN_H */ 229