Home | History | Annotate | Download | only in CodeGen
      1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Werror | FileCheck %s
      2 
      3 // Don't include mm_malloc.h, it's system specific.
      4 #define __MM_MALLOC_H
      5 
      6 #include <x86intrin.h>
      7 
      8 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
      9 
     10 __m128 test_mm_add_ps(__m128 A, __m128 B) {
     11   // CHECK-LABEL: test_mm_add_ps
     12   // CHECK: fadd <4 x float>
     13   return _mm_add_ps(A, B);
     14 }
     15 
     16 __m128 test_mm_add_ss(__m128 A, __m128 B) {
     17   // CHECK-LABEL: test_mm_add_ss
     18   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
     19   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
     20   // CHECK: fadd float
     21   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
     22   return _mm_add_ss(A, B);
     23 }
     24 
     25 __m128 test_mm_and_ps(__m128 A, __m128 B) {
     26   // CHECK-LABEL: test_mm_and_ps
     27   // CHECK: and <4 x i32>
     28   return _mm_and_ps(A, B);
     29 }
     30 
     31 __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
     32   // CHECK-LABEL: test_mm_andnot_ps
     33   // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
     34   // CHECK: and <4 x i32>
     35   return _mm_andnot_ps(A, B);
     36 }
     37 
     38 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
     39   // CHECK-LABEL: @test_mm_cmpeq_ps
     40   // CHECK:         [[CMP:%.*]] = fcmp oeq <4 x float>
     41   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
     42   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
     43   // CHECK-NEXT:    ret <4 x float> [[BC]]
     44   return _mm_cmpeq_ps(__a, __b);
     45 }
     46 
     47 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
     48   // CHECK-LABEL: @test_mm_cmpeq_ss
     49   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
     50   return _mm_cmpeq_ss(__a, __b);
     51 }
     52 
     53 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
     54   // CHECK-LABEL: @test_mm_cmpge_ps
     55   // CHECK:         [[CMP:%.*]] = fcmp ole <4 x float>
     56   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
     57   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
     58   // CHECK-NEXT:    ret <4 x float> [[BC]]
     59   return _mm_cmpge_ps(__a, __b);
     60 }
     61 
     62 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) {
     63   // CHECK-LABEL: @test_mm_cmpge_ss
     64   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
     65   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
     66   return _mm_cmpge_ss(__a, __b);
     67 }
     68 
     69 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
     70   // CHECK-LABEL: @test_mm_cmpgt_ps
     71   // CHECK:         [[CMP:%.*]] = fcmp olt <4 x float>
     72   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
     73   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
     74   // CHECK-NEXT:    ret <4 x float> [[BC]]
     75   return _mm_cmpgt_ps(__a, __b);
     76 }
     77 
     78 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) {
     79   // CHECK-LABEL: @test_mm_cmpgt_ss
     80   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
     81   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
     82   return _mm_cmpgt_ss(__a, __b);
     83 }
     84 
     85 __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
     86   // CHECK-LABEL: @test_mm_cmple_ps
     87   // CHECK:         [[CMP:%.*]] = fcmp ole <4 x float>
     88   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
     89   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
     90   // CHECK-NEXT:    ret <4 x float> [[BC]]
     91   return _mm_cmple_ps(__a, __b);
     92 }
     93 
     94 __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) {
     95   // CHECK-LABEL: @test_mm_cmple_ss
     96   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2)
     97   return _mm_cmple_ss(__a, __b);
     98 }
     99 
    100 __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
    101   // CHECK-LABEL: @test_mm_cmplt_ps
    102   // CHECK:         [[CMP:%.*]] = fcmp olt <4 x float>
    103   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
    104   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
    105   // CHECK-NEXT:    ret <4 x float> [[BC]]
    106   return _mm_cmplt_ps(__a, __b);
    107 }
    108 
    109 __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) {
    110   // CHECK-LABEL: @test_mm_cmplt_ss
    111   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1)
    112   return _mm_cmplt_ss(__a, __b);
    113 }
    114 
    115 __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
    116   // CHECK-LABEL: @test_mm_cmpneq_ps
    117   // CHECK:         [[CMP:%.*]] = fcmp une <4 x float>
    118   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
    119   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
    120   // CHECK-NEXT:    ret <4 x float> [[BC]]
    121   return _mm_cmpneq_ps(__a, __b);
    122 }
    123 
    124 __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) {
    125   // CHECK-LABEL: @test_mm_cmpneq_ss
    126   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4)
    127   return _mm_cmpneq_ss(__a, __b);
    128 }
    129 
    130 __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
    131   // CHECK-LABEL: @test_mm_cmpnge_ps
    132   // CHECK:         [[CMP:%.*]] = fcmp ugt <4 x float>
    133   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
    134   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
    135   // CHECK-NEXT:    ret <4 x float> [[BC]]
    136   return _mm_cmpnge_ps(__a, __b);
    137 }
    138 
    139 __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) {
    140   // CHECK-LABEL: @test_mm_cmpnge_ss
    141   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
    142   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
    143   return _mm_cmpnge_ss(__a, __b);
    144 }
    145 
    146 __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
    147   // CHECK-LABEL: @test_mm_cmpngt_ps
    148   // CHECK:         [[CMP:%.*]] = fcmp uge <4 x float>
    149   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
    150   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
    151   // CHECK-NEXT:    ret <4 x float> [[BC]]
    152   return _mm_cmpngt_ps(__a, __b);
    153 }
    154 
    155 __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) {
    156   // CHECK-LABEL: @test_mm_cmpngt_ss
    157   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
    158   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
    159   return _mm_cmpngt_ss(__a, __b);
    160 }
    161 
    162 __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
    163   // CHECK-LABEL: @test_mm_cmpnle_ps
    164   // CHECK:         [[CMP:%.*]] = fcmp ugt <4 x float>
    165   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
    166   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
    167   // CHECK-NEXT:    ret <4 x float> [[BC]]
    168   return _mm_cmpnle_ps(__a, __b);
    169 }
    170 
    171 __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) {
    172   // CHECK-LABEL: @test_mm_cmpnle_ss
    173   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6)
    174   return _mm_cmpnle_ss(__a, __b);
    175 }
    176 
    177 __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
    178   // CHECK-LABEL: @test_mm_cmpnlt_ps
    179   // CHECK:         [[CMP:%.*]] = fcmp uge <4 x float>
    180   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
    181   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
    182   // CHECK-NEXT:    ret <4 x float> [[BC]]
    183   return _mm_cmpnlt_ps(__a, __b);
    184 }
    185 
    186 __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) {
    187   // CHECK-LABEL: @test_mm_cmpnlt_ss
    188   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5)
    189   return _mm_cmpnlt_ss(__a, __b);
    190 }
    191 
    192 __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
    193   // CHECK-LABEL: @test_mm_cmpord_ps
    194   // CHECK:         [[CMP:%.*]] = fcmp ord <4 x float>
    195   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
    196   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
    197   // CHECK-NEXT:    ret <4 x float> [[BC]]
    198   return _mm_cmpord_ps(__a, __b);
    199 }
    200 
    201 __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) {
    202   // CHECK-LABEL: @test_mm_cmpord_ss
    203   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7)
    204   return _mm_cmpord_ss(__a, __b);
    205 }
    206 
    207 __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
    208   // CHECK-LABEL: @test_mm_cmpunord_ps
    209   // CHECK:         [[CMP:%.*]] = fcmp uno <4 x float>
    210   // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
    211   // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
    212   // CHECK-NEXT:    ret <4 x float> [[BC]]
    213   return _mm_cmpunord_ps(__a, __b);
    214 }
    215 
    216 __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) {
    217   // CHECK-LABEL: @test_mm_cmpunord_ss
    218   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3)
    219   return _mm_cmpunord_ss(__a, __b);
    220 }
    221 
    222 int test_mm_comieq_ss(__m128 A, __m128 B) {
    223   // CHECK-LABEL: test_mm_comieq_ss
    224   // CHECK: call i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    225   return _mm_comieq_ss(A, B);
    226 }
    227 
    228 int test_mm_comige_ss(__m128 A, __m128 B) {
    229   // CHECK-LABEL: test_mm_comige_ss
    230   // CHECK: call i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    231   return _mm_comige_ss(A, B);
    232 }
    233 
    234 int test_mm_comigt_ss(__m128 A, __m128 B) {
    235   // CHECK-LABEL: test_mm_comigt_ss
    236   // CHECK: call i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    237   return _mm_comigt_ss(A, B);
    238 }
    239 
    240 int test_mm_comile_ss(__m128 A, __m128 B) {
    241   // CHECK-LABEL: test_mm_comile_ss
    242   // CHECK: call i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    243   return _mm_comile_ss(A, B);
    244 }
    245 
    246 int test_mm_comilt_ss(__m128 A, __m128 B) {
    247   // CHECK-LABEL: test_mm_comilt_ss
    248   // CHECK: call i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    249   return _mm_comilt_ss(A, B);
    250 }
    251 
    252 int test_mm_comineq_ss(__m128 A, __m128 B) {
    253   // CHECK-LABEL: test_mm_comineq_ss
    254   // CHECK: call i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    255   return _mm_comineq_ss(A, B);
    256 }
    257 
    258 int test_mm_cvt_ss2si(__m128 A) {
    259   // CHECK-LABEL: test_mm_cvt_ss2si
    260   // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
    261   return _mm_cvt_ss2si(A);
    262 }
    263 
    264 __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
    265   // CHECK-LABEL: test_mm_cvtsi32_ss
    266   // CHECK: sitofp i32 %{{.*}} to float
    267   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
    268   return _mm_cvtsi32_ss(A, B);
    269 }
    270 
    271 __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
    272   // CHECK-LABEL: test_mm_cvtsi64_ss
    273   // CHECK: sitofp i64 %{{.*}} to float
    274   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
    275   return _mm_cvtsi64_ss(A, B);
    276 }
    277 
    278 float test_mm_cvtss_f32(__m128 A) {
    279   // CHECK-LABEL: test_mm_cvtss_f32
    280   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    281   return _mm_cvtss_f32(A);
    282 }
    283 
    284 int test_mm_cvtss_si32(__m128 A) {
    285   // CHECK-LABEL: test_mm_cvtss_si32
    286   // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}})
    287   return _mm_cvtss_si32(A);
    288 }
    289 
    290 long long test_mm_cvtss_si64(__m128 A) {
    291   // CHECK-LABEL: test_mm_cvtss_si64
    292   // CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}})
    293   return _mm_cvtss_si64(A);
    294 }
    295 
    296 int test_mm_cvtt_ss2si(__m128 A) {
    297   // CHECK-LABEL: test_mm_cvtt_ss2si
    298   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    299   // CHECK: fptosi float %{{.*}} to i32
    300   return _mm_cvtt_ss2si(A);
    301 }
    302 
    303 int test_mm_cvttss_si32(__m128 A) {
    304   // CHECK-LABEL: test_mm_cvttss_si32
    305   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    306   // CHECK: fptosi float %{{.*}} to i32
    307   return _mm_cvttss_si32(A);
    308 }
    309 
    310 long long test_mm_cvttss_si64(__m128 A) {
    311   // CHECK-LABEL: test_mm_cvttss_si64
    312   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    313   // CHECK: fptosi float %{{.*}} to i64
    314   return _mm_cvttss_si64(A);
    315 }
    316 
    317 __m128 test_mm_div_ps(__m128 A, __m128 B) {
    318   // CHECK-LABEL: test_mm_div_ps
    319   // CHECK: fdiv <4 x float>
    320   return _mm_div_ps(A, B);
    321 }
    322 
    323 __m128 test_mm_div_ss(__m128 A, __m128 B) {
    324   // CHECK-LABEL: test_mm_div_ss
    325   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    326   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    327   // CHECK: fdiv float
    328   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
    329   return _mm_div_ss(A, B);
    330 }
    331 
    332 unsigned int test_MM_GET_EXCEPTION_MASK() {
    333   // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
    334   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
    335   // CHECK: and i32 %{{.*}}, 8064
    336   return _MM_GET_EXCEPTION_MASK();
    337 }
    338 
    339 unsigned int test_MM_GET_EXCEPTION_STATE() {
    340   // CHECK-LABEL: test_MM_GET_EXCEPTION_STATE
    341   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
    342   // CHECK: and i32 %{{.*}}, 63
    343   return _MM_GET_EXCEPTION_STATE();
    344 }
    345 
    346 unsigned int test_MM_GET_FLUSH_ZERO_MODE() {
    347   // CHECK-LABEL: test_MM_GET_FLUSH_ZERO_MODE
    348   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
    349   // CHECK: and i32 %{{.*}}, 32768
    350   return _MM_GET_FLUSH_ZERO_MODE();
    351 }
    352 
    353 unsigned int test_MM_GET_ROUNDING_MODE() {
    354   // CHECK-LABEL: test_MM_GET_ROUNDING_MODE
    355   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
    356   // CHECK: and i32 %{{.*}}, 24576
    357   return _MM_GET_ROUNDING_MODE();
    358 }
    359 
    360 unsigned int test_mm_getcsr() {
    361   // CHECK-LABEL: test_mm_getcsr
    362   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}})
    363   // CHECK: load i32
    364   return _mm_getcsr();
    365 }
    366 
    367 __m128 test_mm_load_ps(float* y) {
    368   // CHECK-LABEL: test_mm_load_ps
    369   // CHECK: load <4 x float>, <4 x float>* {{.*}}, align 16
    370   return _mm_load_ps(y);
    371 }
    372 
    373 __m128 test_mm_load_ps1(float* y) {
    374   // CHECK-LABEL: test_mm_load_ps1
    375   // CHECK: load float, float* %{{.*}}, align 4
    376   // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
    377   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
    378   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
    379   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
    380   return _mm_load_ps1(y);
    381 }
    382 
    383 __m128 test_mm_load_ss(float* y) {
    384   // CHECK-LABEL: test_mm_load_ss
    385   // CHECK: load float, float* {{.*}}, align 1{{$}}
    386   // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
    387   // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1
    388   // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2
    389   // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3
    390   return _mm_load_ss(y);
    391 }
    392 
    393 __m128 test_mm_load1_ps(float* y) {
    394   // CHECK-LABEL: test_mm_load1_ps
    395   // CHECK: load float, float* %{{.*}}, align 4
    396   // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0
    397   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1
    398   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2
    399   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3
    400   return _mm_load1_ps(y);
    401 }
    402 
    403 __m128 test_mm_loadh_pi(__m128 x, __m64* y) {
    404   // CHECK-LABEL: test_mm_loadh_pi
    405   // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
    406   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
    407   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    408   return _mm_loadh_pi(x,y);
    409 }
    410 
    411 __m128 test_mm_loadl_pi(__m128 x, __m64* y) {
    412   // CHECK-LABEL: test_mm_loadl_pi
    413   // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}}
    414   // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1
    415   // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3>
    416   return _mm_loadl_pi(x,y);
    417 }
    418 
    419 __m128 test_mm_loadr_ps(float* A) {
    420   // CHECK-LABEL: test_mm_loadr_ps
    421   // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16
    422   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    423   return _mm_loadr_ps(A);
    424 }
    425 
    426 __m128 test_mm_loadu_ps(float* A) {
    427   // CHECK-LABEL: test_mm_loadu_ps
    428   // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}}
    429   return _mm_loadu_ps(A);
    430 }
    431 
    432 __m128 test_mm_max_ps(__m128 A, __m128 B) {
    433   // CHECK-LABEL: test_mm_max_ps
    434   // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    435   return _mm_max_ps(A, B);
    436 }
    437 
    438 __m128 test_mm_max_ss(__m128 A, __m128 B) {
    439   // CHECK-LABEL: test_mm_max_ss
    440   // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    441   return _mm_max_ss(A, B);
    442 }
    443 
    444 __m128 test_mm_min_ps(__m128 A, __m128 B) {
    445   // CHECK-LABEL: test_mm_min_ps
    446   // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    447   return _mm_min_ps(A, B);
    448 }
    449 
    450 __m128 test_mm_min_ss(__m128 A, __m128 B) {
    451   // CHECK-LABEL: test_mm_min_ss
    452   // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    453   return _mm_min_ss(A, B);
    454 }
    455 
    456 __m128 test_mm_move_ss(__m128 A, __m128 B) {
    457   // CHECK-LABEL: test_mm_move_ss
    458   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
    459   return _mm_move_ss(A, B);
    460 }
    461 
    462 __m128 test_mm_movehl_ps(__m128 A, __m128 B) {
    463   // CHECK-LABEL: test_mm_movehl_ps
    464   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
    465   return _mm_movehl_ps(A, B);
    466 }
    467 
    468 __m128 test_mm_movelh_ps(__m128 A, __m128 B) {
    469   // CHECK-LABEL: test_mm_movelh_ps
    470   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    471   return _mm_movelh_ps(A, B);
    472 }
    473 
    474 int test_mm_movemask_ps(__m128 A) {
    475   // CHECK-LABEL: test_mm_movemask_ps
    476   // CHECK: call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}})
    477   return _mm_movemask_ps(A);
    478 }
    479 
    480 __m128 test_mm_mul_ps(__m128 A, __m128 B) {
    481   // CHECK-LABEL: test_mm_mul_ps
    482   // CHECK: fmul <4 x float>
    483   return _mm_mul_ps(A, B);
    484 }
    485 
    486 __m128 test_mm_mul_ss(__m128 A, __m128 B) {
    487   // CHECK-LABEL: test_mm_mul_ss
    488   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    489   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    490   // CHECK: fmul float
    491   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
    492   return _mm_mul_ss(A, B);
    493 }
    494 
    495 __m128 test_mm_or_ps(__m128 A, __m128 B) {
    496   // CHECK-LABEL: test_mm_or_ps
    497   // CHECK: or <4 x i32>
    498   return _mm_or_ps(A, B);
    499 }
    500 
    501 void test_mm_prefetch(char const* p) {
    502   // CHECK-LABEL: test_mm_prefetch
    503   // CHECK: call void @llvm.prefetch(i8* {{.*}}, i32 0, i32 0, i32 1)
    504   _mm_prefetch(p, 0);
    505 }
    506 
    507 __m128 test_mm_rcp_ps(__m128 x) {
    508   // CHECK-LABEL: test_mm_rcp_ps
    509   // CHECK: call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> {{.*}})
    510   return _mm_rcp_ps(x);
    511 }
    512 
    513 __m128 test_mm_rcp_ss(__m128 x) {
    514   // CHECK-LABEL: test_mm_rcp_ss
    515   // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}})
    516   // CHECK: extractelement <4 x float> {{.*}}, i32 0
    517   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
    518   // CHECK: extractelement <4 x float> {{.*}}, i32 1
    519   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
    520   // CHECK: extractelement <4 x float> {{.*}}, i32 2
    521   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
    522   // CHECK: extractelement <4 x float> {{.*}}, i32 3
    523   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    524   return _mm_rcp_ss(x);
    525 }
    526 
    527 __m128 test_mm_rsqrt_ps(__m128 x) {
    528   // CHECK-LABEL: test_mm_rsqrt_ps
    529   // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> {{.*}})
    530   return _mm_rsqrt_ps(x);
    531 }
    532 
    533 __m128 test_mm_rsqrt_ss(__m128 x) {
    534   // CHECK-LABEL: test_mm_rsqrt_ss
    535   // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}})
    536   // CHECK: extractelement <4 x float> {{.*}}, i32 0
    537   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
    538   // CHECK: extractelement <4 x float> {{.*}}, i32 1
    539   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
    540   // CHECK: extractelement <4 x float> {{.*}}, i32 2
    541   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
    542   // CHECK: extractelement <4 x float> {{.*}}, i32 3
    543   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    544   return _mm_rsqrt_ss(x);
    545 }
    546 
    547 void test_MM_SET_EXCEPTION_MASK(unsigned int A) {
    548   // CHECK-LABEL: test_MM_SET_EXCEPTION_MASK
    549   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
    550   // CHECK: load i32
    551   // CHECK: and i32 {{.*}}, -8065
    552   // CHECK: or i32
    553   // CHECK: store i32
    554   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
    555   _MM_SET_EXCEPTION_MASK(A);
    556 }
    557 
    558 void test_MM_SET_EXCEPTION_STATE(unsigned int A) {
    559   // CHECK-LABEL: test_MM_SET_EXCEPTION_STATE
    560   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
    561   // CHECK: load i32
    562   // CHECK: and i32 {{.*}}, -64
    563   // CHECK: or i32
    564   // CHECK: store i32
    565   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
    566   _MM_SET_EXCEPTION_STATE(A);
    567 }
    568 
    569 void test_MM_SET_FLUSH_ZERO_MODE(unsigned int A) {
    570   // CHECK-LABEL: test_MM_SET_FLUSH_ZERO_MODE
    571   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
    572   // CHECK: load i32
    573   // CHECK: and i32 {{.*}}, -32769
    574   // CHECK: or i32
    575   // CHECK: store i32
    576   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
    577   _MM_SET_FLUSH_ZERO_MODE(A);
    578 }
    579 
    580 __m128 test_mm_set_ps(float A, float B, float C, float D) {
    581   // CHECK-LABEL: test_mm_set_ps
    582   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
    583   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
    584   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
    585   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    586   return _mm_set_ps(A, B, C, D);
    587 }
    588 
    589 __m128 test_mm_set_ps1(float A) {
    590   // CHECK-LABEL: test_mm_set_ps1
    591   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
    592   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
    593   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
    594   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    595   return _mm_set_ps1(A);
    596 }
    597 
    598 void test_MM_SET_ROUNDING_MODE(unsigned int A) {
    599   // CHECK-LABEL: test_MM_SET_ROUNDING_MODE
    600   // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}})
    601   // CHECK: load i32
    602   // CHECK: and i32 {{.*}}, -24577
    603   // CHECK: or i32
    604   // CHECK: store i32
    605   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
    606   _MM_SET_ROUNDING_MODE(A);
    607 }
    608 
    609 __m128 test_mm_set_ss(float A) {
    610   // CHECK-LABEL: test_mm_set_ss
    611   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
    612   // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 1
    613   // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 2
    614   // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
    615   return _mm_set_ss(A);
    616 }
    617 
    618 __m128 test_mm_set1_ps(float A) {
    619   // CHECK-LABEL: test_mm_set1_ps
    620   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
    621   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
    622   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
    623   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    624   return _mm_set1_ps(A);
    625 }
    626 
    627 void test_mm_setcsr(unsigned int A) {
    628   // CHECK-LABEL: test_mm_setcsr
    629   // CHECK: store i32
    630   // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}})
    631   _mm_setcsr(A);
    632 }
    633 
    634 __m128 test_mm_setr_ps(float A, float B, float C, float D) {
    635   // CHECK-LABEL: test_mm_setr_ps
    636   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
    637   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
    638   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
    639   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    640   return _mm_setr_ps(A, B, C, D);
    641 }
    642 
    643 __m128 test_mm_setzero_ps() {
    644   // CHECK-LABEL: test_mm_setzero_ps
    645   // CHECK: store <4 x float> zeroinitializer
    646   return _mm_setzero_ps();
    647 }
    648 
    649 void test_mm_sfence() {
    650   // CHECK-LABEL: test_mm_sfence
    651   // CHECK: call void @llvm.x86.sse.sfence()
    652   _mm_sfence();
    653 }
    654 
    655 __m128 test_mm_shuffle_ps(__m128 A, __m128 B) {
    656   // CHECK-LABEL: test_mm_shuffle_ps
    657   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
    658   return _mm_shuffle_ps(A, B, 0);
    659 }
    660 
    661 __m128 test_mm_sqrt_ps(__m128 x) {
    662   // CHECK-LABEL: test_mm_sqrt_ps
    663   // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> {{.*}})
    664   return _mm_sqrt_ps(x);
    665 }
    666 
    667 __m128 test_sqrt_ss(__m128 x) {
    668   // CHECK: define {{.*}} @test_sqrt_ss
    669   // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
    670   // CHECK: extractelement <4 x float> {{.*}}, i32 0
    671   // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0
    672   // CHECK: extractelement <4 x float> {{.*}}, i32 1
    673   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1
    674   // CHECK: extractelement <4 x float> {{.*}}, i32 2
    675   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2
    676   // CHECK: extractelement <4 x float> {{.*}}, i32 3
    677   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
    678   return _mm_sqrt_ss(x);
    679 }
    680 
    681 void test_mm_store_ps(float* x, __m128 y) {
    682   // CHECK-LABEL: test_mm_store_ps
    683   // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
    684   _mm_store_ps(x, y);
    685 }
    686 
    687 void test_mm_store_ps1(float* x, __m128 y) {
    688   // CHECK-LABEL: test_mm_store_ps1
    689   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
    690   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
    691   _mm_store_ps1(x, y);
    692 }
    693 
    694 void test_mm_store_ss(float* x, __m128 y) {
    695   // CHECK-LABEL: test_mm_store_ss
    696   // CHECK: extractelement <4 x float> {{.*}}, i32 0
    697   // CHECK: store float %{{.*}}, float* {{.*}}, align 1{{$}}
    698   _mm_store_ss(x, y);
    699 }
    700 
    701 void test_mm_store1_ps(float* x, __m128 y) {
    702   // CHECK-LABEL: test_mm_store1_ps
    703   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
    704   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16
    705   _mm_store1_ps(x, y);
    706 }
    707 
    708 void test_mm_storeh_pi(__m64* x,  __m128 y) {
    709   // CHECK-LABEL: test_mm_storeh_pi
    710   // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
    711   // CHECK: extractelement <2 x i64> %{{.*}}, i64 1
    712   // CHECK: store i64 %{{.*}}, i64* {{.*}}
    713   _mm_storeh_pi(x, y);
    714 }
    715 
    716 void test_mm_storel_pi(__m64* x,  __m128 y) {
    717   // CHECK-LABEL: test_mm_storel_pi
    718   // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64>
    719   // CHECK: extractelement <2 x i64> %{{.*}}, i64 0
    720   // CHECK: store i64 %{{.*}}, i64* {{.*}}
    721   _mm_storel_pi(x, y);
    722 }
    723 
    724 void test_mm_storer_ps(float* x,  __m128 y) {
    725   // CHECK-LABEL: test_mm_storer_ps
    726   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    727   // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16
    728   _mm_storer_ps(x, y);
    729 }
    730 
    731 void test_mm_storeu_ps(float* x,  __m128 y) {
    732   // CHECK-LABEL: test_mm_storeu_ps
    733   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}}
    734   // CHECK-NEXT: ret void
    735   _mm_storeu_ps(x, y);
    736 }
    737 
    738 void test_mm_stream_ps(float*A, __m128d B) {
    739   // CHECK-LABEL: test_mm_stream_ps
    740   // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16, !nontemporal
    741   _mm_stream_ps(A, B);
    742 }
    743 
    744 __m128 test_mm_sub_ps(__m128 A, __m128 B) {
    745   // CHECK-LABEL: test_mm_sub_ps
    746   // CHECK: fsub <4 x float>
    747   return _mm_sub_ps(A, B);
    748 }
    749 
    750 __m128 test_mm_sub_ss(__m128 A, __m128 B) {
    751   // CHECK-LABEL: test_mm_sub_ss
    752   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    753   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
    754   // CHECK: fsub float
    755   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
    756   return _mm_sub_ss(A, B);
    757 }
    758 
    759 void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
    760   // CHECK-LABEL: test_MM_TRANSPOSE4_PS
    761   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    762   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    763   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    764   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    765   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    766   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
    767   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    768   // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
    769   _MM_TRANSPOSE4_PS(*A, *B, *C, *D);
    770 }
    771 
    772 int test_mm_ucomieq_ss(__m128 A, __m128 B) {
    773   // CHECK-LABEL: test_mm_ucomieq_ss
    774   // CHECK: call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    775   return _mm_ucomieq_ss(A, B);
    776 }
    777 
    778 int test_mm_ucomige_ss(__m128 A, __m128 B) {
    779   // CHECK-LABEL: test_mm_ucomige_ss
    780   // CHECK: call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    781   return _mm_ucomige_ss(A, B);
    782 }
    783 
    784 int test_mm_ucomigt_ss(__m128 A, __m128 B) {
    785   // CHECK-LABEL: test_mm_ucomigt_ss
    786   // CHECK: call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    787   return _mm_ucomigt_ss(A, B);
    788 }
    789 
    790 int test_mm_ucomile_ss(__m128 A, __m128 B) {
    791   // CHECK-LABEL: test_mm_ucomile_ss
    792   // CHECK: call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    793   return _mm_ucomile_ss(A, B);
    794 }
    795 
    796 int test_mm_ucomilt_ss(__m128 A, __m128 B) {
    797   // CHECK-LABEL: test_mm_ucomilt_ss
    798   // CHECK: call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    799   return _mm_ucomilt_ss(A, B);
    800 }
    801 
    802 int test_mm_ucomineq_ss(__m128 A, __m128 B) {
    803   // CHECK-LABEL: test_mm_ucomineq_ss
    804   // CHECK: call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}})
    805   return _mm_ucomineq_ss(A, B);
    806 }
    807 
    808 __m128 test_mm_undefined_ps() {
    809   // CHECK-LABEL: @test_mm_undefined_ps
    810   // CHECK: ret <4 x float> undef
    811   return _mm_undefined_ps();
    812 }
    813 
    814 __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
    815   // CHECK-LABEL: test_mm_unpackhi_ps
    816   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    817   return _mm_unpackhi_ps(A, B);
    818 }
    819 
    820 __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
    821   // CHECK-LABEL: test_mm_unpacklo_ps
    822   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    823   return _mm_unpacklo_ps(A, B);
    824 }
    825 
    826 __m128 test_mm_xor_ps(__m128 A, __m128 B) {
    827   // CHECK-LABEL: test_mm_xor_ps
    828   // CHECK: xor <4 x i32>
    829   return _mm_xor_ps(A, B);
    830 }
    831