Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Werror | FileCheck %s
      2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
      3 
      4 // Don't include mm_malloc.h, it's system specific.
      5 #define __MM_MALLOC_H
      6 
      7 #include <x86intrin.h>
      8 
      9 // NOTE: This should match the tests in llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll
     10 
     11 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) {
     12   // CHECK-LABEL: test_mm_maccs_epi16
     13   // CHECK: call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
     14   return _mm_maccs_epi16(a, b, c);
     15 }
     16 
     17 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) {
     18   // CHECK-LABEL: test_mm_macc_epi16
     19   // CHECK: call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
     20   return _mm_macc_epi16(a, b, c);
     21 }
     22 
     23 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) {
     24   // CHECK-LABEL: test_mm_maccsd_epi16
     25   // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
     26   return _mm_maccsd_epi16(a, b, c);
     27 }
     28 
     29 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) {
     30   // CHECK-LABEL: test_mm_maccd_epi16
     31   // CHECK: call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
     32   return _mm_maccd_epi16(a, b, c);
     33 }
     34 
     35 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) {
     36   // CHECK-LABEL: test_mm_maccs_epi32
     37   // CHECK: call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
     38   return _mm_maccs_epi32(a, b, c);
     39 }
     40 
     41 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) {
     42   // CHECK-LABEL: test_mm_macc_epi32
     43   // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
     44   return _mm_macc_epi32(a, b, c);
     45 }
     46 
     47 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) {
     48   // CHECK-LABEL: test_mm_maccslo_epi32
     49   // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
     50   return _mm_maccslo_epi32(a, b, c);
     51 }
     52 
     53 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) {
     54   // CHECK-LABEL: test_mm_macclo_epi32
     55   // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
     56   return _mm_macclo_epi32(a, b, c);
     57 }
     58 
     59 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) {
     60   // CHECK-LABEL: test_mm_maccshi_epi32
     61   // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
     62   return _mm_maccshi_epi32(a, b, c);
     63 }
     64 
     65 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) {
     66   // CHECK-LABEL: test_mm_macchi_epi32
     67   // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
     68   return _mm_macchi_epi32(a, b, c);
     69 }
     70 
     71 __m128i test_mm_maddsd_epi16(__m128i a, __m128i b, __m128i c) {
     72   // CHECK-LABEL: test_mm_maddsd_epi16
     73   // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
     74   return _mm_maddsd_epi16(a, b, c);
     75 }
     76 
     77 __m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) {
     78   // CHECK-LABEL: test_mm_maddd_epi16
     79   // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}})
     80   return _mm_maddd_epi16(a, b, c);
     81 }
     82 
     83 __m128i test_mm_haddw_epi8(__m128i a) {
     84   // CHECK-LABEL: test_mm_haddw_epi8
     85   // CHECK: call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %{{.*}})
     86   return _mm_haddw_epi8(a);
     87 }
     88 
     89 __m128i test_mm_haddd_epi8(__m128i a) {
     90   // CHECK-LABEL: test_mm_haddd_epi8
     91   // CHECK: call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %{{.*}})
     92   return _mm_haddd_epi8(a);
     93 }
     94 
     95 __m128i test_mm_haddq_epi8(__m128i a) {
     96   // CHECK-LABEL: test_mm_haddq_epi8
     97   // CHECK: call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %{{.*}})
     98   return _mm_haddq_epi8(a);
     99 }
    100 
    101 __m128i test_mm_haddd_epi16(__m128i a) {
    102   // CHECK-LABEL: test_mm_haddd_epi16
    103   // CHECK: call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %{{.*}})
    104   return _mm_haddd_epi16(a);
    105 }
    106 
    107 __m128i test_mm_haddq_epi16(__m128i a) {
    108   // CHECK-LABEL: test_mm_haddq_epi16
    109   // CHECK: call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %{{.*}})
    110   return _mm_haddq_epi16(a);
    111 }
    112 
    113 __m128i test_mm_haddq_epi32(__m128i a) {
    114   // CHECK-LABEL: test_mm_haddq_epi32
    115   // CHECK: call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %{{.*}})
    116   return _mm_haddq_epi32(a);
    117 }
    118 
    119 __m128i test_mm_haddw_epu8(__m128i a) {
    120   // CHECK-LABEL: test_mm_haddw_epu8
    121   // CHECK: call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %{{.*}})
    122   return _mm_haddw_epu8(a);
    123 }
    124 
    125 __m128i test_mm_haddd_epu8(__m128i a) {
    126   // CHECK-LABEL: test_mm_haddd_epu8
    127   // CHECK: call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %{{.*}})
    128   return _mm_haddd_epu8(a);
    129 }
    130 
    131 __m128i test_mm_haddq_epu8(__m128i a) {
    132   // CHECK-LABEL: test_mm_haddq_epu8
    133   // CHECK: call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %{{.*}})
    134   return _mm_haddq_epu8(a);
    135 }
    136 
    137 __m128i test_mm_haddd_epu16(__m128i a) {
    138   // CHECK-LABEL: test_mm_haddd_epu16
    139   // CHECK: call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %{{.*}})
    140   return _mm_haddd_epu16(a);
    141 }
    142 
    143 __m128i test_mm_haddq_epu16(__m128i a) {
    144   // CHECK-LABEL: test_mm_haddq_epu16
    145   // CHECK: call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %{{.*}})
    146   return _mm_haddq_epu16(a);
    147 }
    148 
    149 __m128i test_mm_haddq_epu32(__m128i a) {
    150   // CHECK-LABEL: test_mm_haddq_epu32
    151   // CHECK: call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %{{.*}})
    152   return _mm_haddq_epu32(a);
    153 }
    154 
    155 __m128i test_mm_hsubw_epi8(__m128i a) {
    156   // CHECK-LABEL: test_mm_hsubw_epi8
    157   // CHECK: call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %{{.*}})
    158   return _mm_hsubw_epi8(a);
    159 }
    160 
    161 __m128i test_mm_hsubd_epi16(__m128i a) {
    162   // CHECK-LABEL: test_mm_hsubd_epi16
    163   // CHECK: call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %{{.*}})
    164   return _mm_hsubd_epi16(a);
    165 }
    166 
    167 __m128i test_mm_hsubq_epi32(__m128i a) {
    168   // CHECK-LABEL: test_mm_hsubq_epi32
    169   // CHECK: call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %{{.*}})
    170   return _mm_hsubq_epi32(a);
    171 }
    172 
    173 __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) {
    174   // CHECK-LABEL: test_mm_cmov_si128
    175   // CHECK: call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
    176   return _mm_cmov_si128(a, b, c);
    177 }
    178 
    179 __m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) {
    180   // CHECK-LABEL: test_mm256_cmov_si256
    181   // CHECK: call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}})
    182   return _mm256_cmov_si256(a, b, c);
    183 }
    184 
    185 __m128i test_mm_perm_epi8(__m128i a, __m128i b, __m128i c) {
    186   // CHECK-LABEL: test_mm_perm_epi8
    187   // CHECK: call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
    188   return _mm_perm_epi8(a, b, c);
    189 }
    190 
    191 __m128i test_mm_rot_epi8(__m128i a, __m128i b) {
    192   // CHECK-LABEL: test_mm_rot_epi8
    193   // CHECK: call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
    194   return _mm_rot_epi8(a, b);
    195 }
    196 
    197 __m128i test_mm_rot_epi16(__m128i a, __m128i b) {
    198   // CHECK-LABEL: test_mm_rot_epi16
    199   // CHECK: call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
    200   return _mm_rot_epi16(a, b);
    201 }
    202 
    203 __m128i test_mm_rot_epi32(__m128i a, __m128i b) {
    204   // CHECK-LABEL: test_mm_rot_epi32
    205   // CHECK: call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
    206   return _mm_rot_epi32(a, b);
    207 }
    208 
    209 __m128i test_mm_rot_epi64(__m128i a, __m128i b) {
    210   // CHECK-LABEL: test_mm_rot_epi64
    211   // CHECK: call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
    212   return _mm_rot_epi64(a, b);
    213 }
    214 
    215 __m128i test_mm_roti_epi8(__m128i a) {
    216   // CHECK-LABEL: test_mm_roti_epi8
    217   // CHECK: call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %{{.*}}, i8 1)
    218   return _mm_roti_epi8(a, 1);
    219 }
    220 
    221 __m128i test_mm_roti_epi16(__m128i a) {
    222   // CHECK-LABEL: test_mm_roti_epi16
    223   // CHECK: call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %{{.*}}, i8 50)
    224   return _mm_roti_epi16(a, 50);
    225 }
    226 
    227 __m128i test_mm_roti_epi32(__m128i a) {
    228   // CHECK-LABEL: test_mm_roti_epi32
    229   // CHECK: call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %{{.*}}, i8 -30)
    230   return _mm_roti_epi32(a, -30);
    231 }
    232 
    233 __m128i test_mm_roti_epi64(__m128i a) {
    234   // CHECK-LABEL: test_mm_roti_epi64
    235   // CHECK: call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %{{.*}}, i8 100)
    236   return _mm_roti_epi64(a, 100);
    237 }
    238 
    239 __m128i test_mm_shl_epi8(__m128i a, __m128i b) {
    240   // CHECK-LABEL: test_mm_shl_epi8
    241   // CHECK: call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
    242   return _mm_shl_epi8(a, b);
    243 }
    244 
    245 __m128i test_mm_shl_epi16(__m128i a, __m128i b) {
    246   // CHECK-LABEL: test_mm_shl_epi16
    247   // CHECK: call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
    248   return _mm_shl_epi16(a, b);
    249 }
    250 
    251 __m128i test_mm_shl_epi32(__m128i a, __m128i b) {
    252   // CHECK-LABEL: test_mm_shl_epi32
    253   // CHECK: call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
    254   return _mm_shl_epi32(a, b);
    255 }
    256 
    257 __m128i test_mm_shl_epi64(__m128i a, __m128i b) {
    258   // CHECK-LABEL: test_mm_shl_epi64
    259   // CHECK: call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
    260   return _mm_shl_epi64(a, b);
    261 }
    262 
    263 __m128i test_mm_sha_epi8(__m128i a, __m128i b) {
    264   // CHECK-LABEL: test_mm_sha_epi8
    265   // CHECK: call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
    266   return _mm_sha_epi8(a, b);
    267 }
    268 
    269 __m128i test_mm_sha_epi16(__m128i a, __m128i b) {
    270   // CHECK-LABEL: test_mm_sha_epi16
    271   // CHECK: call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
    272   return _mm_sha_epi16(a, b);
    273 }
    274 
    275 __m128i test_mm_sha_epi32(__m128i a, __m128i b) {
    276   // CHECK-LABEL: test_mm_sha_epi32
    277   // CHECK: call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
    278   return _mm_sha_epi32(a, b);
    279 }
    280 
    281 __m128i test_mm_sha_epi64(__m128i a, __m128i b) {
    282   // CHECK-LABEL: test_mm_sha_epi64
    283   // CHECK: call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
    284   return _mm_sha_epi64(a, b);
    285 }
    286 
    287 __m128i test_mm_com_epu8(__m128i a, __m128i b) {
    288   // CHECK-LABEL: test_mm_com_epu8
    289   // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0)
    290   return _mm_com_epu8(a, b, 0);
    291 }
    292 
    293 __m128i test_mm_com_epu16(__m128i a, __m128i b) {
    294   // CHECK-LABEL: test_mm_com_epu16
    295   // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0)
    296   return _mm_com_epu16(a, b, 0);
    297 }
    298 
    299 __m128i test_mm_com_epu32(__m128i a, __m128i b) {
    300   // CHECK-LABEL: test_mm_com_epu32
    301   // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
    302   return _mm_com_epu32(a, b, 0);
    303 }
    304 
    305 __m128i test_mm_com_epu64(__m128i a, __m128i b) {
    306   // CHECK-LABEL: test_mm_com_epu64
    307   // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
    308   return _mm_com_epu64(a, b, 0);
    309 }
    310 
    311 __m128i test_mm_com_epi8(__m128i a, __m128i b) {
    312   // CHECK-LABEL: test_mm_com_epi8
    313   // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0)
    314   return _mm_com_epi8(a, b, 0);
    315 }
    316 
    317 __m128i test_mm_com_epi16(__m128i a, __m128i b) {
    318   // CHECK-LABEL: test_mm_com_epi16
    319   // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0)
    320   return _mm_com_epi16(a, b, 0);
    321 }
    322 
    323 __m128i test_mm_com_epi32(__m128i a, __m128i b) {
    324   // CHECK-LABEL: test_mm_com_epi32
    325   // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
    326   return _mm_com_epi32(a, b, 0);
    327 }
    328 
    329 __m128i test_mm_com_epi64(__m128i a, __m128i b) {
    330   // CHECK-LABEL: test_mm_com_epi64
    331   // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
    332   return _mm_com_epi64(a, b, 0);
    333 }
    334 
    335 __m128d test_mm_permute2_pd(__m128d a, __m128d b, __m128i c) {
    336   // CHECK-LABEL: test_mm_permute2_pd
    337   // CHECK: call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i64> %{{.*}}, i8 0)
    338   return _mm_permute2_pd(a, b, c, 0);
    339 }
    340 
    341 __m256d test_mm256_permute2_pd(__m256d a, __m256d b, __m256i c) {
    342   // CHECK-LABEL: test_mm256_permute2_pd
    343   // CHECK: call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i64> %{{.*}}, i8 0)
    344   return _mm256_permute2_pd(a, b, c, 0);
    345 }
    346 
    347 __m128 test_mm_permute2_ps(__m128 a, __m128 b, __m128i c) {
    348   // CHECK-LABEL: test_mm_permute2_ps
    349   // CHECK: call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> %{{.*}}, i8 0)
    350   return _mm_permute2_ps(a, b, c, 0);
    351 }
    352 
    353 __m256 test_mm256_permute2_ps(__m256 a, __m256 b, __m256i c) {
    354   // CHECK-LABEL: test_mm256_permute2_ps
    355   // CHECK: call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> %{{.*}}, i8 0)
    356   return _mm256_permute2_ps(a, b, c, 0);
    357 }
    358 
    359 __m128 test_mm_frcz_ss(__m128 a) {
    360   // CHECK-LABEL: test_mm_frcz_ss
    361   // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %{{.*}})
    362   return _mm_frcz_ss(a);
    363 }
    364 
    365 __m128d test_mm_frcz_sd(__m128d a) {
    366   // CHECK-LABEL: test_mm_frcz_sd
    367   // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %{{.*}})
    368   return _mm_frcz_sd(a);
    369 }
    370 
    371 __m128 test_mm_frcz_ps(__m128 a) {
    372   // CHECK-LABEL: test_mm_frcz_ps
    373   // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %{{.*}})
    374   return _mm_frcz_ps(a);
    375 }
    376 
    377 __m128d test_mm_frcz_pd(__m128d a) {
    378   // CHECK-LABEL: test_mm_frcz_pd
    379   // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %{{.*}})
    380   return _mm_frcz_pd(a);
    381 }
    382 
    383 __m256 test_mm256_frcz_ps(__m256 a) {
    384   // CHECK-LABEL: test_mm256_frcz_ps
    385   // CHECK: call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %{{.*}})
    386   return _mm256_frcz_ps(a);
    387 }
    388 
    389 __m256d test_mm256_frcz_pd(__m256d a) {
    390   // CHECK-LABEL: test_mm256_frcz_pd
    391   // CHECK: call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %{{.*}})
    392   return _mm256_frcz_pd(a);
    393 }
    394