Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Werror | FileCheck %s
      2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
      3 
      4 // Don't include mm_malloc.h, it's system specific.
      5 #define __MM_MALLOC_H
      6 
      7 #include <x86intrin.h>
      8 
      9 __m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
     10   // CHECK-LABEL: test_mm_blend_epi16
     11   // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
     12   return _mm_blend_epi16(V1, V2, 42);
     13 }
     14 
     15 __m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
     16   // CHECK-LABEL: test_mm_blend_pd
     17   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
     18   return _mm_blend_pd(V1, V2, 2);
     19 }
     20 
     21 __m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
     22   // CHECK-LABEL: test_mm_blend_ps
     23   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
     24   return _mm_blend_ps(V1, V2, 6);
     25 }
     26 
     27 __m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) {
     28   // CHECK-LABEL: test_mm_blendv_epi8
     29   // CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb
     30   return _mm_blendv_epi8(V1, V2, V3);
     31 }
     32 
     33 __m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
     34   // CHECK-LABEL: test_mm_blendv_pd
     35   // CHECK: call <2 x double> @llvm.x86.sse41.blendvpd
     36   return _mm_blendv_pd(V1, V2, V3);
     37 }
     38 
     39 __m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) {
     40   // CHECK-LABEL: test_mm_blendv_ps
     41   // CHECK: call <4 x float> @llvm.x86.sse41.blendvps
     42   return _mm_blendv_ps(V1, V2, V3);
     43 }
     44 
     45 __m128d test_mm_ceil_pd(__m128d x) {
     46   // CHECK-LABEL: test_mm_ceil_pd
     47   // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
     48   return _mm_ceil_pd(x);
     49 }
     50 
     51 __m128 test_mm_ceil_ps(__m128 x) {
     52   // CHECK-LABEL: test_mm_ceil_ps
     53   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
     54   return _mm_ceil_ps(x);
     55 }
     56 
     57 __m128d test_mm_ceil_sd(__m128d x, __m128d y) {
     58   // CHECK-LABEL: test_mm_ceil_sd
     59   // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
     60   return _mm_ceil_sd(x, y);
     61 }
     62 
     63 __m128 test_mm_ceil_ss(__m128 x, __m128 y) {
     64   // CHECK-LABEL: test_mm_ceil_ss
     65   // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
     66   return _mm_ceil_ss(x, y);
     67 }
     68 
     69 __m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) {
     70   // CHECK-LABEL: test_mm_cmpeq_epi64
     71   // CHECK: icmp eq <2 x i64>
     72   return _mm_cmpeq_epi64(A, B);
     73 }
     74 
     75 __m128i test_mm_cvtepi8_epi16(__m128i a) {
     76   // CHECK-LABEL: test_mm_cvtepi8_epi16
     77   // CHECK: sext <8 x i8> {{.*}} to <8 x i16>
     78   return _mm_cvtepi8_epi16(a);
     79 }
     80 
     81 __m128i test_mm_cvtepi8_epi32(__m128i a) {
     82   // CHECK-LABEL: test_mm_cvtepi8_epi32
     83   // CHECK: sext <4 x i8> {{.*}} to <4 x i32>
     84   return _mm_cvtepi8_epi32(a);
     85 }
     86 
     87 __m128i test_mm_cvtepi8_epi64(__m128i a) {
     88   // CHECK-LABEL: test_mm_cvtepi8_epi64
     89   // CHECK: sext <2 x i8> {{.*}} to <2 x i64>
     90   return _mm_cvtepi8_epi64(a);
     91 }
     92 
     93 __m128i test_mm_cvtepi16_epi32(__m128i a) {
     94   // CHECK-LABEL: test_mm_cvtepi16_epi32
     95   // CHECK: sext <4 x i16> {{.*}} to <4 x i32>
     96   return _mm_cvtepi16_epi32(a);
     97 }
     98 
     99 __m128i test_mm_cvtepi16_epi64(__m128i a) {
    100   // CHECK-LABEL: test_mm_cvtepi16_epi64
    101   // CHECK: sext <2 x i16> {{.*}} to <2 x i64>
    102   return _mm_cvtepi16_epi64(a);
    103 }
    104 
    105 __m128i test_mm_cvtepi32_epi64(__m128i a) {
    106   // CHECK-LABEL: test_mm_cvtepi32_epi64
    107   // CHECK: sext <2 x i32> {{.*}} to <2 x i64>
    108   return _mm_cvtepi32_epi64(a);
    109 }
    110 
    111 __m128i test_mm_cvtepu8_epi16(__m128i a) {
    112   // CHECK-LABEL: test_mm_cvtepu8_epi16
    113   // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}})
    114   return _mm_cvtepu8_epi16(a);
    115 }
    116 
    117 __m128i test_mm_cvtepu8_epi32(__m128i a) {
    118   // CHECK-LABEL: test_mm_cvtepu8_epi32
    119   // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}})
    120   return _mm_cvtepu8_epi32(a);
    121 }
    122 
    123 __m128i test_mm_cvtepu8_epi64(__m128i a) {
    124   // CHECK-LABEL: test_mm_cvtepu8_epi64
    125   // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}})
    126   return _mm_cvtepu8_epi64(a);
    127 }
    128 
    129 __m128i test_mm_cvtepu16_epi32(__m128i a) {
    130   // CHECK-LABEL: test_mm_cvtepu16_epi32
    131   // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}})
    132   return _mm_cvtepu16_epi32(a);
    133 }
    134 
    135 __m128i test_mm_cvtepu16_epi64(__m128i a) {
    136   // CHECK-LABEL: test_mm_cvtepu16_epi64
    137   // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}})
    138   return _mm_cvtepu16_epi64(a);
    139 }
    140 
    141 __m128i test_mm_cvtepu32_epi64(__m128i a) {
    142   // CHECK-LABEL: test_mm_cvtepu32_epi64
    143   // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}})
    144   return _mm_cvtepu32_epi64(a);
    145 }
    146 
    147 __m128d test_mm_dp_pd(__m128d x, __m128d y) {
    148   // CHECK-LABEL: test_mm_dp_pd
    149   // CHECK: call <2 x double> @llvm.x86.sse41.dppd
    150   return _mm_dp_pd(x, y, 2);
    151 }
    152 
    153 __m128 test_mm_dp_ps(__m128 x, __m128 y) {
    154   // CHECK-LABEL: test_mm_dp_ps
    155   // CHECK: call <4 x float> @llvm.x86.sse41.dpps
    156   return _mm_dp_ps(x, y, 2);
    157 }
    158 
    159 int test_mm_extract_epi8(__m128i x) {
    160   // CHECK-LABEL: test_mm_extract_epi8
    161   // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
    162   return _mm_extract_epi8(x, 16);
    163 }
    164 
    165 int test_mm_extract_epi32(__m128i x) {
    166   // CHECK-LABEL: test_mm_extract_epi32
    167   // CHECK: extractelement <4 x i32> %{{.*}}, i32 1
    168   return _mm_extract_epi32(x, 1);
    169 }
    170 
    171 long long test_mm_extract_epi64(__m128i x) {
    172   // CHECK-LABEL: test_mm_extract_epi64
    173   // CHECK: extractelement <2 x i64> %{{.*}}, i32 1
    174   return _mm_extract_epi64(x, 1);
    175 }
    176 
    177 //TODO
    178 //int test_mm_extract_ps(__m128i x) {
    179 //  return _mm_extract_ps(_mm_add_ps(x,x), 1);
    180 //}
    181 
    182 __m128d test_mm_floor_pd(__m128d x) {
    183   // CHECK-LABEL: test_mm_floor_pd
    184   // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
    185   return _mm_floor_pd(x);
    186 }
    187 
    188 __m128 test_mm_floor_ps(__m128 x) {
    189   // CHECK-LABEL: test_mm_floor_ps
    190   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
    191   return _mm_floor_ps(x);
    192 }
    193 
    194 __m128d test_mm_floor_sd(__m128d x, __m128d y) {
    195   // CHECK-LABEL: test_mm_floor_sd
    196   // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
    197   return _mm_floor_sd(x, y);
    198 }
    199 
    200 __m128 test_mm_floor_ss(__m128 x, __m128 y) {
    201   // CHECK-LABEL: test_mm_floor_ss
    202   // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
    203   return _mm_floor_ss(x, y);
    204 }
    205 
    206 __m128i test_mm_insert_epi8(__m128i x, char b) {
    207   // CHECK-LABEL: test_mm_insert_epi8
    208   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 0
    209   return _mm_insert_epi8(x, b, 16);
    210 }
    211 
    212 __m128i test_mm_insert_epi32(__m128i x, int b) {
    213   // CHECK-LABEL: test_mm_insert_epi32
    214   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
    215   return _mm_insert_epi32(x, b, 4);
    216 }
    217 
    218 __m128i test_mm_insert_epi64(__m128i x, long long b) {
    219   // CHECK-LABEL: test_mm_insert_epi64
    220   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 0
    221   return _mm_insert_epi64(x, b, 2);
    222 }
    223 
    224 __m128 test_mm_insert_ps(__m128 x, __m128 y) {
    225   // CHECK-LABEL: test_mm_insert_ps
    226   // CHECK: call <4 x float> @llvm.x86.sse41.insertps
    227   return _mm_insert_ps(x, y, 5);
    228 }
    229 
    230 __m128i test_mm_max_epi8(__m128i x, __m128i y) {
    231   // CHECK-LABEL: test_mm_max_epi8
    232   // CHECK: call <16 x i8> @llvm.x86.sse41.pmaxsb
    233   return _mm_max_epi8(x, y);
    234 }
    235 
    236 __m128i test_mm_max_epu16(__m128i x, __m128i y) {
    237   // CHECK-LABEL: test_mm_max_epu16
    238   // CHECK: call <8 x i16> @llvm.x86.sse41.pmaxuw
    239   return _mm_max_epu16(x, y);
    240 }
    241 
    242 __m128i test_mm_max_epi32(__m128i x, __m128i y) {
    243   // CHECK-LABEL: test_mm_max_epi32
    244   // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxsd
    245   return _mm_max_epi32(x, y);
    246 }
    247 
    248 __m128i test_mm_max_epu32(__m128i x, __m128i y) {
    249   // CHECK-LABEL: test_mm_max_epu32
    250   // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxud
    251   return _mm_max_epu32(x, y);
    252 }
    253 
    254 __m128i test_mm_min_epi8(__m128i x, __m128i y) {
    255   // CHECK-LABEL: test_mm_min_epi8
    256   // CHECK: call <16 x i8> @llvm.x86.sse41.pminsb
    257   return _mm_min_epi8(x, y);
    258 }
    259 
    260 __m128i test_mm_min_epu16(__m128i x, __m128i y) {
    261   // CHECK-LABEL: test_mm_min_epu16
    262   // CHECK: call <8 x i16> @llvm.x86.sse41.pminuw
    263   return _mm_min_epu16(x, y);
    264 }
    265 
    266 __m128i test_mm_min_epi32(__m128i x, __m128i y) {
    267   // CHECK-LABEL: test_mm_min_epi32
    268   // CHECK: call <4 x i32> @llvm.x86.sse41.pminsd
    269   return _mm_min_epi32(x, y);
    270 }
    271 
    272 __m128i test_mm_min_epu32(__m128i x, __m128i y) {
    273   // CHECK-LABEL: test_mm_min_epu32
    274   // CHECK: call <4 x i32> @llvm.x86.sse41.pminud
    275   return _mm_min_epu32(x, y);
    276 }
    277 
    278 __m128i test_mm_minpos_epu16(__m128i x) {
    279   // CHECK-LABEL: test_mm_minpos_epu16
    280   // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw
    281   return _mm_minpos_epu16(x);
    282 }
    283 
    284 __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
    285   // CHECK-LABEL: test_mm_mpsadbw_epu8
    286   // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw
    287   return _mm_mpsadbw_epu8(x, y, 1);
    288 }
    289 
    290 __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
    291   // CHECK-LABEL: test_mm_mul_epi32
    292   // CHECK: call <2 x i64> @llvm.x86.sse41.pmuldq
    293   return _mm_mul_epi32(x, y);
    294 }
    295 
    296 __m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
    297   // CHECK-LABEL: test_mm_mullo_epi32
    298   // CHECK: mul <4 x i32>
    299   return _mm_mullo_epi32(x, y);
    300 }
    301 
    302 __m128i test_mm_packus_epi32(__m128i x, __m128i y) {
    303   // CHECK-LABEL: test_mm_packus_epi32
    304   // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw
    305   return _mm_packus_epi32(x, y);
    306 }
    307 
    308 __m128d test_mm_round_pd(__m128d x) {
    309   // CHECK-LABEL: test_mm_round_pd
    310   // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
    311   return _mm_round_pd(x, 2);
    312 }
    313 
    314 __m128 test_mm_round_ps(__m128 x) {
    315   // CHECK-LABEL: test_mm_round_ps
    316   // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
    317   return _mm_round_ps(x, 2);
    318 }
    319 
    320 __m128d test_mm_round_sd(__m128d x, __m128d y) {
    321   // CHECK-LABEL: test_mm_round_sd
    322   // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
    323   return _mm_round_sd(x, y, 2);
    324 }
    325 
    326 __m128 test_mm_round_ss(__m128 x, __m128 y) {
    327   // CHECK-LABEL: test_mm_round_ss
    328   // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
    329   return _mm_round_ss(x, y, 2);
    330 }
    331 
    332 __m128i test_mm_stream_load_si128(__m128i const *a) {
    333   // CHECK-LABEL: test_mm_stream_load_si128
    334   // CHECK: call <2 x i64> @llvm.x86.sse41.movntdqa
    335   return _mm_stream_load_si128(a);
    336 }
    337 
    338 int test_mm_test_all_ones(__m128i x) {
    339   // CHECK-LABEL: test_mm_test_all_ones
    340   // CHECK: call i32 @llvm.x86.sse41.ptestc
    341   return _mm_test_all_ones(x);
    342 }
    343 
    344 int test_mm_test_all_zeros(__m128i x, __m128i y) {
    345   // CHECK-LABEL: test_mm_test_all_zeros
    346   // CHECK: call i32 @llvm.x86.sse41.ptestz
    347   return _mm_test_all_zeros(x, y);
    348 }
    349 
    350 int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
    351   // CHECK-LABEL: test_mm_test_mix_ones_zeros
    352   // CHECK: call i32 @llvm.x86.sse41.ptestnzc
    353   return _mm_test_mix_ones_zeros(x, y);
    354 }
    355 
    356 int test_mm_testc_si128(__m128i x, __m128i y) {
    357   // CHECK-LABEL: test_mm_testc_si128
    358   // CHECK: call i32 @llvm.x86.sse41.ptestc
    359   return _mm_testc_si128(x, y);
    360 }
    361 
    362 int test_mm_testnzc_si128(__m128i x, __m128i y) {
    363   // CHECK-LABEL: test_mm_testnzc_si128
    364   // CHECK: call i32 @llvm.x86.sse41.ptestnzc
    365   return _mm_testnzc_si128(x, y);
    366 }
    367 
    368 int test_mm_testz_si128(__m128i x, __m128i y) {
    369   // CHECK-LABEL: test_mm_testz_si128
    370   // CHECK: call i32 @llvm.x86.sse41.ptestz
    371   return _mm_testz_si128(x, y);
    372 }
    373