Home | History | Annotate | Download | only in include
      1 /* Copyright (C) 2011 Free Software Foundation, Inc.
      2 
      3    This file is part of GCC.
      4 
      5    GCC is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    GCC is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    Under Section 7 of GPL version 3, you are granted additional
     16    permissions described in the GCC Runtime Library Exception, version
     17    3.1, as published by the Free Software Foundation.
     18 
     19    You should have received a copy of the GNU General Public License and
     20    a copy of the GCC Runtime Library Exception along with this program;
     21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     22    <http://www.gnu.org/licenses/>.  */
     23 
     24 #ifndef _IMMINTRIN_H_INCLUDED
     25 # error "Never use <fmaintrin.h> directly; include <immintrin.h> instead."
     26 #endif
     27 
     28 #ifndef _FMAINTRIN_H_INCLUDED
     29 #define _FMAINTRIN_H_INCLUDED
     30 
     31 #ifndef __FMA__
     32 # error "FMA instruction set not enabled"
     33 #else
     34 
     35 extern __inline __m128d
     36 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     37 _mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
     38 {
     39   return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
     40                                            (__v2df)__C);
     41 }
     42 
     43 extern __inline __m256d
     44 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     45 _mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
     46 {
     47   return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
     48                                               (__v4df)__C);
     49 }
     50 
     51 extern __inline __m128
     52 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     53 _mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
     54 {
     55   return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
     56                                           (__v4sf)__C);
     57 }
     58 
     59 extern __inline __m256
     60 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     61 _mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
     62 {
     63   return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
     64                                              (__v8sf)__C);
     65 }
     66 
     67 extern __inline __m128d
     68 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     69 _mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
     70 {
     71   return (__m128d) __builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
     72                                              (__v2df)__C);
     73 }
     74 
     75 extern __inline __m128
     76 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     77 _mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
     78 {
     79   return (__m128) __builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
     80                                             (__v4sf)__C);
     81 }
     82 
     83 extern __inline __m128d
     84 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     85 _mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
     86 {
     87   return (__m128d)__builtin_ia32_vfmaddpd ((__v2df)__A, (__v2df)__B,
     88                                            -(__v2df)__C);
     89 }
     90 
     91 extern __inline __m256d
     92 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     93 _mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
     94 {
     95   return (__m256d)__builtin_ia32_vfmaddpd256 ((__v4df)__A, (__v4df)__B,
     96                                               -(__v4df)__C);
     97 }
     98 
     99 extern __inline __m128
    100 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    101 _mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
    102 {
    103   return (__m128)__builtin_ia32_vfmaddps ((__v4sf)__A, (__v4sf)__B,
    104                                           -(__v4sf)__C);
    105 }
    106 
    107 extern __inline __m256
    108 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    109 _mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
    110 {
    111   return (__m256)__builtin_ia32_vfmaddps256 ((__v8sf)__A, (__v8sf)__B,
    112                                              -(__v8sf)__C);
    113 }
    114 
    115 extern __inline __m128d
    116 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    117 _mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
    118 {
    119   return (__m128d)__builtin_ia32_vfmaddsd3 ((__v2df)__A, (__v2df)__B,
    120                                             -(__v2df)__C);
    121 }
    122 
    123 extern __inline __m128
    124 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    125 _mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
    126 {
    127   return (__m128)__builtin_ia32_vfmaddss3 ((__v4sf)__A, (__v4sf)__B,
    128                                            -(__v4sf)__C);
    129 }
    130 
    131 extern __inline __m128d
    132 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    133 _mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
    134 {
    135   return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
    136                                            (__v2df)__C);
    137 }
    138 
    139 extern __inline __m256d
    140 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    141 _mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
    142 {
    143   return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
    144                                               (__v4df)__C);
    145 }
    146 
    147 extern __inline __m128
    148 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    149 _mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
    150 {
    151   return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
    152                                           (__v4sf)__C);
    153 }
    154 
    155 extern __inline __m256
    156 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    157 _mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
    158 {
    159   return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
    160                                              (__v8sf)__C);
    161 }
    162 
    163 extern __inline __m128d
    164 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    165 _mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
    166 {
    167   return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B,
    168                                             (__v2df)__C);
    169 }
    170 
    171 extern __inline __m128
    172 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    173 _mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
    174 {
    175   return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B,
    176                                            (__v4sf)__C);
    177 }
    178 
    179 extern __inline __m128d
    180 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    181 _mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
    182 {
    183   return (__m128d)__builtin_ia32_vfmaddpd (-(__v2df)__A, (__v2df)__B,
    184                                            -(__v2df)__C);
    185 }
    186 
    187 extern __inline __m256d
    188 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    189 _mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
    190 {
    191   return (__m256d)__builtin_ia32_vfmaddpd256 (-(__v4df)__A, (__v4df)__B,
    192                                               -(__v4df)__C);
    193 }
    194 
    195 extern __inline __m128
    196 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    197 _mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
    198 {
    199   return (__m128)__builtin_ia32_vfmaddps (-(__v4sf)__A, (__v4sf)__B,
    200                                           -(__v4sf)__C);
    201 }
    202 
    203 extern __inline __m256
    204 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    205 _mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
    206 {
    207   return (__m256)__builtin_ia32_vfmaddps256 (-(__v8sf)__A, (__v8sf)__B,
    208                                              -(__v8sf)__C);
    209 }
    210 
    211 extern __inline __m128d
    212 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    213 _mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
    214 {
    215   return (__m128d)__builtin_ia32_vfmaddsd3 (-(__v2df)__A, (__v2df)__B,
    216                                             -(__v2df)__C);
    217 }
    218 
    219 extern __inline __m128
    220 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    221 _mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
    222 {
    223   return (__m128)__builtin_ia32_vfmaddss3 (-(__v4sf)__A, (__v4sf)__B,
    224                                            -(__v4sf)__C);
    225 }
    226 
    227 extern __inline __m128d
    228 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    229 _mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
    230 {
    231   return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
    232                                               (__v2df)__C);
    233 }
    234 
    235 extern __inline __m256d
    236 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    237 _mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
    238 {
    239   return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
    240                                                  (__v4df)__B,
    241                                                  (__v4df)__C);
    242 }
    243 
    244 extern __inline __m128
    245 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    246 _mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
    247 {
    248   return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
    249                                              (__v4sf)__C);
    250 }
    251 
    252 extern __inline __m256
    253 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    254 _mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
    255 {
    256   return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
    257                                                 (__v8sf)__B,
    258                                                 (__v8sf)__C);
    259 }
    260 
    261 extern __inline __m128d
    262 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    263 _mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
    264 {
    265   return (__m128d)__builtin_ia32_vfmaddsubpd ((__v2df)__A, (__v2df)__B,
    266                                               -(__v2df)__C);
    267 }
    268 
    269 extern __inline __m256d
    270 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    271 _mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
    272 {
    273   return (__m256d)__builtin_ia32_vfmaddsubpd256 ((__v4df)__A,
    274                                                  (__v4df)__B,
    275                                                  -(__v4df)__C);
    276 }
    277 
    278 extern __inline __m128
    279 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    280 _mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
    281 {
    282   return (__m128)__builtin_ia32_vfmaddsubps ((__v4sf)__A, (__v4sf)__B,
    283                                              -(__v4sf)__C);
    284 }
    285 
    286 extern __inline __m256
    287 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    288 _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
    289 {
    290   return (__m256)__builtin_ia32_vfmaddsubps256 ((__v8sf)__A,
    291                                                 (__v8sf)__B,
    292                                                 -(__v8sf)__C);
    293 }
    294 
    295 #endif
    296 
    297 #endif
    298