Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s
      2 
      3 #include <xmmintrin.h>
      4 #include <emmintrin.h>
      5 #include <smmintrin.h>
      6 
      7 __m128 test_rsqrt_ss(__m128 x) {
      8   // CHECK: define {{.*}} @test_rsqrt_ss
      9   // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss
     10   // CHECK: extractelement <4 x float> {{.*}}, i32 0
     11   // CHECK: extractelement <4 x float> {{.*}}, i32 1
     12   // CHECK: extractelement <4 x float> {{.*}}, i32 2
     13   // CHECK: extractelement <4 x float> {{.*}}, i32 3
     14   return _mm_rsqrt_ss(x);
     15 }
     16 
     17 __m128 test_rcp_ss(__m128 x) {
     18   // CHECK: define {{.*}} @test_rcp_ss
     19   // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss
     20   // CHECK: extractelement <4 x float> {{.*}}, i32 0
     21   // CHECK: extractelement <4 x float> {{.*}}, i32 1
     22   // CHECK: extractelement <4 x float> {{.*}}, i32 2
     23   // CHECK: extractelement <4 x float> {{.*}}, i32 3
     24   return _mm_rcp_ss(x);
     25 }
     26 
     27 __m128 test_sqrt_ss(__m128 x) {
     28   // CHECK: define {{.*}} @test_sqrt_ss
     29   // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
     30   // CHECK: extractelement <4 x float> {{.*}}, i32 0
     31   // CHECK: extractelement <4 x float> {{.*}}, i32 1
     32   // CHECK: extractelement <4 x float> {{.*}}, i32 2
     33   // CHECK: extractelement <4 x float> {{.*}}, i32 3
     34   return _mm_sqrt_ss(x);
     35 }
     36 
     37 __m128 test_loadl_pi(__m128 x, void* y) {
     38   // CHECK: define {{.*}} @test_loadl_pi
     39   // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
     40   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
     41   // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
     42   return _mm_loadl_pi(x,y);
     43 }
     44 
     45 __m128 test_loadh_pi(__m128 x, void* y) {
     46   // CHECK: define {{.*}} @test_loadh_pi
     47   // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
     48   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
     49   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
     50   return _mm_loadh_pi(x,y);
     51 }
     52 
     53 __m128 test_load_ss(void* y) {
     54   // CHECK: define {{.*}} @test_load_ss
     55   // CHECK: load float, float* {{.*}}, align 1{{$}}
     56   return _mm_load_ss(y);
     57 }
     58 
     59 __m128 test_load1_ps(void* y) {
     60   // CHECK: define {{.*}} @test_load1_ps
     61   // CHECK: load float, float* {{.*}}, align 1{{$}}
     62   return _mm_load1_ps(y);
     63 }
     64 
     65 void test_store_ss(__m128 x, void* y) {
     66   // CHECK-LABEL: define void @test_store_ss
     67   // CHECK: store {{.*}} float* {{.*}}, align 1{{$}}
     68   _mm_store_ss(y, x);
     69 }
     70 
     71 __m128d test_load1_pd(__m128 x, void* y) {
     72   // CHECK: define {{.*}} @test_load1_pd
     73   // CHECK: load double, double* {{.*}}, align 1{{$}}
     74   return _mm_load1_pd(y);
     75 }
     76 
     77 __m128d test_loadr_pd(__m128 x, void* y) {
     78   // CHECK: define {{.*}} @test_loadr_pd
     79   // CHECK: load <2 x double>, <2 x double>* {{.*}}, align 16{{$}}
     80   return _mm_loadr_pd(y);
     81 }
     82 
     83 __m128d test_load_sd(void* y) {
     84   // CHECK: define {{.*}} @test_load_sd
     85   // CHECK: load double, double* {{.*}}, align 1{{$}}
     86   return _mm_load_sd(y);
     87 }
     88 
     89 __m128d test_loadh_pd(__m128d x, void* y) {
     90   // CHECK: define {{.*}} @test_loadh_pd
     91   // CHECK: load double, double* {{.*}}, align 1{{$}}
     92   return _mm_loadh_pd(x, y);
     93 }
     94 
     95 __m128d test_loadl_pd(__m128d x, void* y) {
     96   // CHECK: define {{.*}} @test_loadl_pd
     97   // CHECK: load double, double* {{.*}}, align 1{{$}}
     98   return _mm_loadl_pd(x, y);
     99 }
    100 
    101 void test_store_sd(__m128d x, void* y) {
    102   // CHECK-LABEL: define void @test_store_sd
    103   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
    104   _mm_store_sd(y, x);
    105 }
    106 
    107 void test_store1_pd(__m128d x, void* y) {
    108   // CHECK-LABEL: define void @test_store1_pd
    109   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
    110   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
    111   _mm_store1_pd(y, x);
    112 }
    113 
    114 void test_storer_pd(__m128d x, void* y) {
    115   // CHECK-LABEL: define void @test_storer_pd
    116   // CHECK: store {{.*}} <2 x double>* {{.*}}, align 16{{$}}
    117   _mm_storer_pd(y, x);
    118 }
    119 
    120 void test_storeh_pd(__m128d x, void* y) {
    121   // CHECK-LABEL: define void @test_storeh_pd
    122   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
    123   _mm_storeh_pd(y, x);
    124 }
    125 
    126 void test_storel_pd(__m128d x, void* y) {
    127   // CHECK-LABEL: define void @test_storel_pd
    128   // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
    129   _mm_storel_pd(y, x);
    130 }
    131 
    132 __m128i test_loadl_epi64(void* y) {
    133   // CHECK: define {{.*}} @test_loadl_epi64
    134   // CHECK: load i64, i64* {{.*}}, align 1{{$}}
    135   return _mm_loadl_epi64(y);
    136 }
    137 
    138 void test_storel_epi64(__m128i x, void* y) {
    139   // CHECK-LABEL: define void @test_storel_epi64
    140   // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
    141   _mm_storel_epi64(y, x);
    142 }
    143 
    144 void test_stream_si32(int x, void *y) {
    145   // CHECK-LABEL: define void @test_stream_si32
    146   // CHECK: store {{.*}} i32* {{.*}}, align 1, !nontemporal
    147   _mm_stream_si32(y, x);
    148 }
    149 
    150 void test_stream_si64(long long x, void *y) {
    151   // CHECK-LABEL: define void @test_stream_si64
    152   // CHECK: store {{.*}} i64* {{.*}}, align 1, !nontemporal
    153   _mm_stream_si64(y, x);
    154 }
    155 
    156 void test_stream_si128(__m128i x, void *y) {
    157   // CHECK-LABEL: define void @test_stream_si128
    158   // CHECK: store {{.*}} <2 x i64>* {{.*}}, align 16, !nontemporal
    159   _mm_stream_si128(y, x);
    160 }
    161 
    162 void test_extract_epi16(__m128i __a) {
    163   // CHECK-LABEL: define void @test_extract_epi16
    164   // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
    165   // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
    166   _mm_extract_epi16(__a, 8);
    167 }
    168 
    169 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
    170   // CHECK-LABEL: @test_mm_cmpeq_ss
    171   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
    172   return _mm_cmpeq_ss(__a, __b);
    173 }
    174 
    175 __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
    176   // CHECK-LABEL: @test_mm_cmplt_ss
    177   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
    178   return _mm_cmplt_ss(__a, __b);
    179 }
    180 
    181 __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
    182   // CHECK-LABEL: @test_mm_cmple_ss
    183   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
    184   return _mm_cmple_ss(__a, __b);
    185 }
    186 
    187 __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
    188   // CHECK-LABEL: @test_mm_cmpunord_ss
    189   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
    190   return _mm_cmpunord_ss(__a, __b);
    191 }
    192 
    193 __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
    194   // CHECK-LABEL: @test_mm_cmpneq_ss
    195   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
    196   return _mm_cmpneq_ss(__a, __b);
    197 }
    198 
    199 __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
    200   // CHECK-LABEL: @test_mm_cmpnlt_ss
    201   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
    202   return _mm_cmpnlt_ss(__a, __b);
    203 }
    204 
    205 __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
    206   // CHECK-LABEL: @test_mm_cmpnle_ss
    207   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
    208   return _mm_cmpnle_ss(__a, __b);
    209 }
    210 
    211 __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
    212   // CHECK-LABEL: @test_mm_cmpord_ss
    213   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
    214   return _mm_cmpord_ss(__a, __b);
    215 }
    216 
    217 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
    218   // CHECK-LABEL: @test_mm_cmpgt_ss
    219   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
    220   return _mm_cmpgt_ss(__a, __b);
    221 }
    222 
    223 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
    224   // CHECK-LABEL: @test_mm_cmpge_ss
    225   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
    226   return _mm_cmpge_ss(__a, __b);
    227 }
    228 
    229 __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
    230   // CHECK-LABEL: @test_mm_cmpngt_ss
    231   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
    232   return _mm_cmpngt_ss(__a, __b);
    233 }
    234 
    235 __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
    236   // CHECK-LABEL: @test_mm_cmpnge_ss
    237   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
    238   return _mm_cmpnge_ss(__a, __b);
    239 }
    240 
    241 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
    242   // CHECK-LABEL: @test_mm_cmpeq_ps
    243   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
    244   return _mm_cmpeq_ps(__a, __b);
    245 }
    246 
    247 __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
    248   // CHECK-LABEL: @test_mm_cmplt_ps
    249   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
    250   return _mm_cmplt_ps(__a, __b);
    251 }
    252 
    253 __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
    254   // CHECK-LABEL: @test_mm_cmple_ps
    255   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
    256   return _mm_cmple_ps(__a, __b);
    257 }
    258 
    259 __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
    260   // CHECK-LABEL: @test_mm_cmpunord_ps
    261   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
    262   return _mm_cmpunord_ps(__a, __b);
    263 }
    264 
    265 __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
    266   // CHECK-LABEL: @test_mm_cmpneq_ps
    267   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
    268   return _mm_cmpneq_ps(__a, __b);
    269 }
    270 
    271 __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
    272   // CHECK-LABEL: @test_mm_cmpnlt_ps
    273   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
    274   return _mm_cmpnlt_ps(__a, __b);
    275 }
    276 
    277 __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
    278   // CHECK-LABEL: @test_mm_cmpnle_ps
    279   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
    280   return _mm_cmpnle_ps(__a, __b);
    281 }
    282 
    283 __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
    284   // CHECK-LABEL: @test_mm_cmpord_ps
    285   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
    286   return _mm_cmpord_ps(__a, __b);
    287 }
    288 
    289 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
    290   // CHECK-LABEL: @test_mm_cmpgt_ps
    291   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
    292   return _mm_cmpgt_ps(__a, __b);
    293 }
    294 
    295 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
    296   // CHECK-LABEL: @test_mm_cmpge_ps
    297   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
    298   return _mm_cmpge_ps(__a, __b);
    299 }
    300 
    301 __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
    302   // CHECK-LABEL: @test_mm_cmpngt_ps
    303   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
    304   return _mm_cmpngt_ps(__a, __b);
    305 }
    306 
    307 __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
    308   // CHECK-LABEL: @test_mm_cmpnge_ps
    309   // CHECK: @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
    310   return _mm_cmpnge_ps(__a, __b);
    311 }
    312 
    313 __m128d test_mm_cmpeq_sd(__m128d __a, __m128d __b) {
    314   // CHECK-LABEL: @test_mm_cmpeq_sd
    315   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
    316   return _mm_cmpeq_sd(__a, __b);
    317 }
    318 
    319 __m128d test_mm_cmplt_sd(__m128d __a, __m128d __b) {
    320   // CHECK-LABEL: @test_mm_cmplt_sd
    321   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
    322   return _mm_cmplt_sd(__a, __b);
    323 }
    324 
    325 __m128d test_mm_cmple_sd(__m128d __a, __m128d __b) {
    326   // CHECK-LABEL: @test_mm_cmple_sd
    327   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
    328   return _mm_cmple_sd(__a, __b);
    329 }
    330 
    331 __m128d test_mm_cmpunord_sd(__m128d __a, __m128d __b) {
    332   // CHECK-LABEL: @test_mm_cmpunord_sd
    333   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
    334   return _mm_cmpunord_sd(__a, __b);
    335 }
    336 
    337 __m128d test_mm_cmpneq_sd(__m128d __a, __m128d __b) {
    338   // CHECK-LABEL: @test_mm_cmpneq_sd
    339   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
    340   return _mm_cmpneq_sd(__a, __b);
    341 }
    342 
    343 __m128d test_mm_cmpnlt_sd(__m128d __a, __m128d __b) {
    344   // CHECK-LABEL: @test_mm_cmpnlt_sd
    345   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
    346   return _mm_cmpnlt_sd(__a, __b);
    347 }
    348 
    349 __m128d test_mm_cmpnle_sd(__m128d __a, __m128d __b) {
    350   // CHECK-LABEL: @test_mm_cmpnle_sd
    351   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
    352   return _mm_cmpnle_sd(__a, __b);
    353 }
    354 
    355 __m128d test_mm_cmpord_sd(__m128d __a, __m128d __b) {
    356   // CHECK-LABEL: @test_mm_cmpord_sd
    357   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
    358   return _mm_cmpord_sd(__a, __b);
    359 }
    360 
    361 __m128d test_mm_cmpgt_sd(__m128d __a, __m128d __b) {
    362   // CHECK-LABEL: @test_mm_cmpgt_sd
    363   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
    364   return _mm_cmpgt_sd(__a, __b);
    365 }
    366 
    367 __m128d test_mm_cmpge_sd(__m128d __a, __m128d __b) {
    368   // CHECK-LABEL: @test_mm_cmpge_sd
    369   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
    370   return _mm_cmpge_sd(__a, __b);
    371 }
    372 
    373 __m128d test_mm_cmpngt_sd(__m128d __a, __m128d __b) {
    374   // CHECK-LABEL: @test_mm_cmpngt_sd
    375   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
    376   return _mm_cmpngt_sd(__a, __b);
    377 }
    378 
    379 __m128d test_mm_cmpnge_sd(__m128d __a, __m128d __b) {
    380   // CHECK-LABEL: @test_mm_cmpnge_sd
    381   // CHECK: @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
    382   return _mm_cmpnge_sd(__a, __b);
    383 }
    384 
    385 __m128d test_mm_cmpeq_pd(__m128d __a, __m128d __b) {
    386   // CHECK-LABEL: @test_mm_cmpeq_pd
    387   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
    388   return _mm_cmpeq_pd(__a, __b);
    389 }
    390 
    391 __m128d test_mm_cmplt_pd(__m128d __a, __m128d __b) {
    392   // CHECK-LABEL: @test_mm_cmplt_pd
    393   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
    394   return _mm_cmplt_pd(__a, __b);
    395 }
    396 
    397 __m128d test_mm_cmple_pd(__m128d __a, __m128d __b) {
    398   // CHECK-LABEL: @test_mm_cmple_pd
    399   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
    400   return _mm_cmple_pd(__a, __b);
    401 }
    402 
    403 __m128d test_mm_cmpunord_pd(__m128d __a, __m128d __b) {
    404   // CHECK-LABEL: @test_mm_cmpunord_pd
    405   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
    406   return _mm_cmpunord_pd(__a, __b);
    407 }
    408 
    409 __m128d test_mm_cmpneq_pd(__m128d __a, __m128d __b) {
    410   // CHECK-LABEL: @test_mm_cmpneq_pd
    411   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
    412   return _mm_cmpneq_pd(__a, __b);
    413 }
    414 
    415 __m128d test_mm_cmpnlt_pd(__m128d __a, __m128d __b) {
    416   // CHECK-LABEL: @test_mm_cmpnlt_pd
    417   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
    418   return _mm_cmpnlt_pd(__a, __b);
    419 }
    420 
    421 __m128d test_mm_cmpnle_pd(__m128d __a, __m128d __b) {
    422   // CHECK-LABEL: @test_mm_cmpnle_pd
    423   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
    424   return _mm_cmpnle_pd(__a, __b);
    425 }
    426 
    427 __m128d test_mm_cmpord_pd(__m128d __a, __m128d __b) {
    428   // CHECK-LABEL: @test_mm_cmpord_pd
    429   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
    430   return _mm_cmpord_pd(__a, __b);
    431 }
    432 
    433 __m128d test_mm_cmpgt_pd(__m128d __a, __m128d __b) {
    434   // CHECK-LABEL: @test_mm_cmpgt_pd
    435   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
    436   return _mm_cmpgt_pd(__a, __b);
    437 }
    438 
    439 __m128d test_mm_cmpge_pd(__m128d __a, __m128d __b) {
    440   // CHECK-LABEL: @test_mm_cmpge_pd
    441   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
    442   return _mm_cmpge_pd(__a, __b);
    443 }
    444 
    445 __m128d test_mm_cmpngt_pd(__m128d __a, __m128d __b) {
    446   // CHECK-LABEL: @test_mm_cmpngt_pd
    447   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
    448   return _mm_cmpngt_pd(__a, __b);
    449 }
    450 
    451 __m128d test_mm_cmpnge_pd(__m128d __a, __m128d __b) {
    452   // CHECK-LABEL: @test_mm_cmpnge_pd
    453   // CHECK: @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
    454   return _mm_cmpnge_pd(__a, __b);
    455 }
    456 
    457 __m128 test_mm_slli_si128(__m128 a) {
    458   // CHECK-LABEL: @test_mm_slli_si128
    459   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
    460   return _mm_slli_si128(a, 5);
    461 }
    462 
    463 __m128 test_mm_bslli_si128(__m128 a) {
    464   // CHECK-LABEL: @test_mm_bslli_si128
    465   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
    466   return _mm_bslli_si128(a, 5);
    467 }
    468 
    469 __m128 test_mm_srli_si128(__m128 a) {
    470   // CHECK-LABEL: @test_mm_srli_si128
    471   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
    472   return _mm_srli_si128(a, 5);
    473 }
    474 
    475 __m128 test_mm_bsrli_si128(__m128 a) {
    476   // CHECK-LABEL: @test_mm_bsrli_si128
    477   // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> {{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
    478   return _mm_bsrli_si128(a, 5);
    479 }
    480 
    481 __m128 test_mm_undefined_ps() {
    482   // CHECK-LABEL: @test_mm_undefined_ps
    483   // CHECK: ret <4 x float> undef
    484   return _mm_undefined_ps();
    485 }
    486 
    487 __m128d test_mm_undefined_pd() {
    488   // CHECK-LABEL: @test_mm_undefined_pd
    489   // CHECK: ret <2 x double> undef
    490   return _mm_undefined_pd();
    491 }
    492 
    493 __m128i test_mm_undefined_si128() {
    494   // CHECK-LABEL: @test_mm_undefined_si128
    495   // CHECK: ret <2 x i64> undef
    496   return _mm_undefined_si128();
    497 }
    498 
    499 __m64 test_mm_add_si64(__m64 __a, __m64 __b) {
    500   // CHECK-LABEL: @test_mm_add_si64
    501   // CHECK @llvm.x86.mmx.padd.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
    502   return _mm_add_si64(__a, __b);
    503 }
    504 
    505 __m64 test_mm_sub_si64(__m64 __a, __m64 __b) {
    506   // CHECK-LABEL: @test_mm_sub_si64
    507   // CHECK @llvm.x86.mmx.psub.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
    508   return _mm_sub_si64(__a, __b);
    509 }
    510 
    511 __m64 test_mm_mul_su32(__m64 __a, __m64 __b) {
    512   // CHECK-LABEL: @test_mm_mul_su32
    513   // CHECK @llvm.x86.mmx.pmulu.dq(x86_mmx %{{.*}}, x86_mmx %{{.*}})
    514   return _mm_mul_su32(__a, __b);
    515 }
    516 
    517 void test_mm_pause() {
    518   // CHECK-LABEL: @test_mm_pause
    519   // CHECK @llvm.x86.sse2.pause()
    520   return _mm_pause();
    521 }
    522