1 /* Copyright (C) 2011 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 # error "Never use <fmaintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _FMAINTRIN_H_INCLUDED 29 #define _FMAINTRIN_H_INCLUDED 30 31 #ifndef __FMA__ 32 # error "FMA instruction set not enabled" 33 #else 34 35 extern __inline __m128d 36 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 37 _mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C) 38 { 39 return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, 40 (__v2df)__C); 41 } 42 43 extern __inline __m256d 44 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 45 _mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C) 46 { 47 return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, 48 (__v4df)__C); 49 } 50 51 extern __inline __m128 52 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 53 _mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C) 54 { 55 return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, 56 (__v4sf)__C); 57 } 58 59 extern __inline __m256 60 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 61 _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C) 62 { 63 return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, 64 (__v8sf)__C); 65 } 66 67 extern __inline __m128d 68 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 69 _mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C) 70 { 71 return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, 72 (__v2df)__C); 73 } 74 75 extern __inline __m128 76 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 77 _mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C) 78 { 79 return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, 80 (__v4sf)__C); 81 } 82 83 extern __inline __m128d 84 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 85 _mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C) 86 { 87 return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B, 88 -(__v2df)__C); 89 } 90 91 extern __inline __m256d 92 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 93 _mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C) 94 { 95 return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B, 96 -(__v4df)__C); 97 } 98 99 extern __inline __m128 100 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 101 _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C) 102 { 103 return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B, 104 -(__v4sf)__C); 105 } 106 107 extern __inline __m256 108 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 109 _mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C) 110 { 111 return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B, 112 -(__v8sf)__C); 113 } 114 115 extern __inline __m128d 116 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 117 _mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C) 118 { 119 return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B, 120 -(__v2df)__C); 121 } 122 123 extern __inline __m128 124 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 125 _mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C) 126 { 127 return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B, 128 -(__v4sf)__C); 129 } 130 131 extern __inline __m128d 132 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 133 _mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C) 134 { 135 return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, 136 (__v2df)__C); 137 } 138 139 extern __inline __m256d 140 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 141 _mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C) 142 { 143 return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, 144 (__v4df)__C); 145 } 146 147 extern __inline __m128 148 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 149 _mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C) 150 { 151 return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, 152 (__v4sf)__C); 153 } 154 155 extern __inline __m256 156 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 157 _mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C) 158 { 159 return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, 160 (__v8sf)__C); 161 } 162 163 extern __inline __m128d 164 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 165 _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C) 166 { 167 return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B, 168 (__v2df)__C); 169 } 170 171 extern __inline __m128 172 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 173 _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C) 174 { 175 return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B, 176 (__v4sf)__C); 177 } 178 179 extern __inline __m128d 180 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 181 _mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C) 182 { 183 return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B, 184 -(__v2df)__C); 185 } 186 187 extern __inline __m256d 188 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 189 _mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C) 190 { 191 return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B, 192 -(__v4df)__C); 193 } 194 195 extern __inline __m128 196 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 197 _mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C) 198 { 199 return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B, 200 -(__v4sf)__C); 201 } 202 203 extern __inline __m256 204 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 205 _mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C) 206 { 207 return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B, 208 -(__v8sf)__C); 209 } 210 211 extern __inline __m128d 212 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 213 _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C) 214 { 215 return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B, 216 -(__v2df)__C); 217 } 218 219 extern __inline __m128 220 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 221 _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C) 222 { 223 return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B, 224 -(__v4sf)__C); 225 } 226 227 extern __inline __m128d 228 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 229 _mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C) 230 { 231 return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, 232 (__v2df)__C); 233 } 234 235 extern __inline __m256d 236 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 237 _mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C) 238 { 239 return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, 240 (__v4df)__B, 241 (__v4df)__C); 242 } 243 244 extern __inline __m128 245 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 246 _mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C) 247 { 248 return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, 249 (__v4sf)__C); 250 } 251 252 extern __inline __m256 253 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 254 _mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C) 255 { 256 return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, 257 (__v8sf)__B, 258 (__v8sf)__C); 259 } 260 261 extern __inline __m128d 262 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 263 _mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C) 264 { 265 return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B, 266 -(__v2df)__C); 267 } 268 269 extern __inline __m256d 270 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 271 _mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C) 272 { 273 return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A, 274 (__v4df)__B, 275 -(__v4df)__C); 276 } 277 278 extern __inline __m128 279 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 280 _mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C) 281 { 282 return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B, 283 -(__v4sf)__C); 284 } 285 286 extern __inline __m256 287 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 288 _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C) 289 { 290 return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A, 291 (__v8sf)__B, 292 -(__v8sf)__C); 293 } 294 295 #endif 296 297 #endif 298