Home | History | Annotate | Download | only in InstCombine
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt < %s -instcombine -S | FileCheck %s
      3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      4 
      5 define float @test_rcp_ss_0(float %a) {
      6 ; CHECK-LABEL: @test_rcp_ss_0(
      7 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
      8 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
      9 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
     10 ; CHECK-NEXT:    ret float [[TMP3]]
     11 ;
     12   %1 = insertelement <4 x float> undef, float %a, i32 0
     13   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
     14   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
     15   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
     16   %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
     17   %6 = extractelement <4 x float> %5, i32 0
     18   ret float %6
     19 }
     20 
     21 define float @test_rcp_ss_1(float %a) {
     22 ; CHECK-LABEL: @test_rcp_ss_1(
     23 ; CHECK-NEXT:    ret float 1.000000e+00
     24 ;
     25   %1 = insertelement <4 x float> undef, float %a, i32 0
     26   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
     27   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
     28   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
     29   %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
     30   %6 = extractelement <4 x float> %5, i32 1
     31   ret float %6
     32 }
     33 
     34 define float @test_sqrt_ss_0(float %a) {
     35 ; CHECK-LABEL: @test_sqrt_ss_0(
     36 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
     37 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP1]])
     38 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
     39 ; CHECK-NEXT:    ret float [[TMP3]]
     40 ;
     41   %1 = insertelement <4 x float> undef, float %a, i32 0
     42   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
     43   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
     44   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
     45   %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
     46   %6 = extractelement <4 x float> %5, i32 0
     47   ret float %6
     48 }
     49 
     50 define float @test_sqrt_ss_2(float %a) {
     51 ; CHECK-LABEL: @test_sqrt_ss_2(
     52 ; CHECK-NEXT:    ret float 2.000000e+00
     53 ;
     54   %1 = insertelement <4 x float> undef, float %a, i32 0
     55   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
     56   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
     57   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
     58   %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
     59   %6 = extractelement <4 x float> %5, i32 2
     60   ret float %6
     61 }
     62 
     63 define float @test_rsqrt_ss_0(float %a) {
     64 ; CHECK-LABEL: @test_rsqrt_ss_0(
     65 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
     66 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
     67 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
     68 ; CHECK-NEXT:    ret float [[TMP3]]
     69 ;
     70   %1 = insertelement <4 x float> undef, float %a, i32 0
     71   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
     72   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
     73   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
     74   %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
     75   %6 = extractelement <4 x float> %5, i32 0
     76   ret float %6
     77 }
     78 
     79 define float @test_rsqrt_ss_3(float %a) {
     80 ; CHECK-LABEL: @test_rsqrt_ss_3(
     81 ; CHECK-NEXT:    ret float 3.000000e+00
     82 ;
     83   %1 = insertelement <4 x float> undef, float %a, i32 0
     84   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
     85   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
     86   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
     87   %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
     88   %6 = extractelement <4 x float> %5, i32 3
     89   ret float %6
     90 }
     91 
     92 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
     93 ; CHECK-LABEL: @test_add_ss(
     94 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %b)
     95 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
     96 ;
     97   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
     98   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
     99   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
    100   %4 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %3)
    101   ret <4 x float> %4
    102 }
    103 
    104 define float @test_add_ss_0(float %a, float %b) {
    105 ; CHECK-LABEL: @test_add_ss_0(
    106 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd float %a, %b
    107 ; CHECK-NEXT:    ret float [[TMP1]]
    108 ;
    109   %1 = insertelement <4 x float> undef, float %a, i32 0
    110   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    111   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    112   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    113   %5 = insertelement <4 x float> undef, float %b, i32 0
    114   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    115   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    116   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    117   %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
    118   %r = extractelement <4 x float> %9, i32 0
    119   ret float %r
    120 }
    121 
    122 define float @test_add_ss_1(float %a, float %b) {
    123 ; CHECK-LABEL: @test_add_ss_1(
    124 ; CHECK-NEXT:    ret float 1.000000e+00
    125 ;
    126   %1 = insertelement <4 x float> undef, float %a, i32 0
    127   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    128   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    129   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    130   %5 = insertelement <4 x float> undef, float %b, i32 0
    131   %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
    132   %7 = extractelement <4 x float> %6, i32 1
    133   ret float %7
    134 }
    135 
    136 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
    137 ; CHECK-LABEL: @test_sub_ss(
    138 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %b)
    139 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
    140 ;
    141   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
    142   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
    143   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
    144   %4 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %3)
    145   ret <4 x float> %4
    146 }
    147 
    148 define float @test_sub_ss_0(float %a, float %b) {
    149 ; CHECK-LABEL: @test_sub_ss_0(
    150 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub float %a, %b
    151 ; CHECK-NEXT:    ret float [[TMP1]]
    152 ;
    153   %1 = insertelement <4 x float> undef, float %a, i32 0
    154   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    155   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    156   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    157   %5 = insertelement <4 x float> undef, float %b, i32 0
    158   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    159   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    160   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    161   %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
    162   %r = extractelement <4 x float> %9, i32 0
    163   ret float %r
    164 }
    165 
    166 define float @test_sub_ss_2(float %a, float %b) {
    167 ; CHECK-LABEL: @test_sub_ss_2(
    168 ; CHECK-NEXT:    ret float 2.000000e+00
    169 ;
    170   %1 = insertelement <4 x float> undef, float %a, i32 0
    171   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    172   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    173   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    174   %5 = insertelement <4 x float> undef, float %b, i32 0
    175   %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
    176   %7 = extractelement <4 x float> %6, i32 2
    177   ret float %7
    178 }
    179 
    180 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
    181 ; CHECK-LABEL: @test_mul_ss(
    182 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %b)
    183 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
    184 ;
    185   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
    186   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
    187   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
    188   %4 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %3)
    189   ret <4 x float> %4
    190 }
    191 
    192 define float @test_mul_ss_0(float %a, float %b) {
    193 ; CHECK-LABEL: @test_mul_ss_0(
    194 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul float %a, %b
    195 ; CHECK-NEXT:    ret float [[TMP1]]
    196 ;
    197   %1 = insertelement <4 x float> undef, float %a, i32 0
    198   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    199   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    200   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    201   %5 = insertelement <4 x float> undef, float %b, i32 0
    202   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    203   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    204   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    205   %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
    206   %r = extractelement <4 x float> %9, i32 0
    207   ret float %r
    208 }
    209 
    210 define float @test_mul_ss_3(float %a, float %b) {
    211 ; CHECK-LABEL: @test_mul_ss_3(
    212 ; CHECK-NEXT:    ret float 3.000000e+00
    213 ;
    214   %1 = insertelement <4 x float> undef, float %a, i32 0
    215   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    216   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    217   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    218   %5 = insertelement <4 x float> undef, float %b, i32 0
    219   %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
    220   %7 = extractelement <4 x float> %6, i32 3
    221   ret float %7
    222 }
    223 
    224 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
    225 ; CHECK-LABEL: @test_div_ss(
    226 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %b)
    227 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
    228 ;
    229   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
    230   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
    231   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
    232   %4 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %3)
    233   ret <4 x float> %4
    234 }
    235 
    236 define float @test_div_ss_0(float %a, float %b) {
    237 ; CHECK-LABEL: @test_div_ss_0(
    238 ; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float %a, %b
    239 ; CHECK-NEXT:    ret float [[TMP1]]
    240 ;
    241   %1 = insertelement <4 x float> undef, float %a, i32 0
    242   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    243   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    244   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    245   %5 = insertelement <4 x float> undef, float %b, i32 0
    246   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    247   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    248   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    249   %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
    250   %r = extractelement <4 x float> %9, i32 0
    251   ret float %r
    252 }
    253 
    254 define float @test_div_ss_1(float %a, float %b) {
    255 ; CHECK-LABEL: @test_div_ss_1(
    256 ; CHECK-NEXT:    ret float 1.000000e+00
    257 ;
    258   %1 = insertelement <4 x float> undef, float %a, i32 0
    259   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    260   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    261   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    262   %5 = insertelement <4 x float> undef, float %b, i32 0
    263   %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
    264   %7 = extractelement <4 x float> %6, i32 1
    265   ret float %7
    266 }
    267 
    268 define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
    269 ; CHECK-LABEL: @test_min_ss(
    270 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b)
    271 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
    272 ;
    273   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
    274   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
    275   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
    276   %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
    277   ret <4 x float> %4
    278 }
    279 
    280 define float @test_min_ss_0(float %a, float %b) {
    281 ; CHECK-LABEL: @test_min_ss_0(
    282 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    283 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    284 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    285 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
    286 ; CHECK-NEXT:    ret float [[TMP4]]
    287 ;
    288   %1 = insertelement <4 x float> undef, float %a, i32 0
    289   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    290   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    291   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    292   %5 = insertelement <4 x float> undef, float %b, i32 0
    293   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    294   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    295   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    296   %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
    297   %10 = extractelement <4 x float> %9, i32 0
    298   ret float %10
    299 }
    300 
    301 define float @test_min_ss_2(float %a, float %b) {
    302 ; CHECK-LABEL: @test_min_ss_2(
    303 ; CHECK-NEXT:    ret float 2.000000e+00
    304 ;
    305   %1 = insertelement <4 x float> undef, float %a, i32 0
    306   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    307   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    308   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    309   %5 = insertelement <4 x float> undef, float %b, i32 0
    310   %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
    311   %7 = extractelement <4 x float> %6, i32 2
    312   ret float %7
    313 }
    314 
    315 define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
    316 ; CHECK-LABEL: @test_max_ss(
    317 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
    318 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
    319 ;
    320   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
    321   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
    322   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
    323   %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
    324   ret <4 x float> %4
    325 }
    326 
    327 define float @test_max_ss_0(float %a, float %b) {
    328 ; CHECK-LABEL: @test_max_ss_0(
    329 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    330 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    331 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    332 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
    333 ; CHECK-NEXT:    ret float [[TMP4]]
    334 ;
    335   %1 = insertelement <4 x float> undef, float %a, i32 0
    336   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    337   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    338   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    339   %5 = insertelement <4 x float> undef, float %b, i32 0
    340   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    341   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    342   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    343   %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
    344   %10 = extractelement <4 x float> %9, i32 0
    345   ret float %10
    346 }
    347 
    348 define float @test_max_ss_3(float %a, float %b) {
    349 ; CHECK-LABEL: @test_max_ss_3(
    350 ; CHECK-NEXT:    ret float 3.000000e+00
    351 ;
    352   %1 = insertelement <4 x float> undef, float %a, i32 0
    353   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    354   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    355   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    356   %5 = insertelement <4 x float> undef, float %b, i32 0
    357   %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
    358   %7 = extractelement <4 x float> %6, i32 3
    359   ret float %7
    360 }
    361 
    362 define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
    363 ; CHECK-LABEL: @test_cmp_ss(
    364 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0)
    365 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
    366 ;
    367   %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
    368   %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
    369   %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
    370   %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
    371   ret <4 x float> %4
    372 }
    373 
    374 define float @test_cmp_ss_0(float %a, float %b) {
    375 ; CHECK-LABEL: @test_cmp_ss_0(
    376 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    377 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    378 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
    379 ; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
    380 ; CHECK-NEXT:    ret float [[R]]
    381 ;
    382   %1 = insertelement <4 x float> undef, float %a, i32 0
    383   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    384   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    385   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    386   %5 = insertelement <4 x float> undef, float %b, i32 0
    387   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    388   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    389   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    390   %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
    391   %r = extractelement <4 x float> %9, i32 0
    392   ret float %r
    393 }
    394 
    395 define float @test_cmp_ss_1(float %a, float %b) {
    396 ; CHECK-LABEL: @test_cmp_ss_1(
    397 ; CHECK-NEXT:    ret float 1.000000e+00
    398 ;
    399   %1 = insertelement <4 x float> undef, float %a, i32 0
    400   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    401   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    402   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    403   %5 = insertelement <4 x float> undef, float %b, i32 0
    404   %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
    405   %7 = extractelement <4 x float> %6, i32 1
    406   ret float %7
    407 }
    408 
    409 define i32 @test_comieq_ss_0(float %a, float %b) {
    410 ; CHECK-LABEL: @test_comieq_ss_0(
    411 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    412 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    413 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    414 ; CHECK-NEXT:    ret i32 [[TMP3]]
    415 ;
    416   %1 = insertelement <4 x float> undef, float %a, i32 0
    417   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    418   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    419   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    420   %5 = insertelement <4 x float> undef, float %b, i32 0
    421   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    422   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    423   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    424   %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
    425   ret i32 %9
    426 }
    427 
    428 define i32 @test_comige_ss_0(float %a, float %b) {
    429 ; CHECK-LABEL: @test_comige_ss_0(
    430 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    431 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    432 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    433 ; CHECK-NEXT:    ret i32 [[TMP3]]
    434 ;
    435   %1 = insertelement <4 x float> undef, float %a, i32 0
    436   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    437   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    438   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    439   %5 = insertelement <4 x float> undef, float %b, i32 0
    440   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    441   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    442   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    443   %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
    444   ret i32 %9
    445 }
    446 
    447 define i32 @test_comigt_ss_0(float %a, float %b) {
    448 ; CHECK-LABEL: @test_comigt_ss_0(
    449 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    450 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    451 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    452 ; CHECK-NEXT:    ret i32 [[TMP3]]
    453 ;
    454   %1 = insertelement <4 x float> undef, float %a, i32 0
    455   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    456   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    457   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    458   %5 = insertelement <4 x float> undef, float %b, i32 0
    459   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    460   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    461   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    462   %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
    463   ret i32 %9
    464 }
    465 
    466 define i32 @test_comile_ss_0(float %a, float %b) {
    467 ; CHECK-LABEL: @test_comile_ss_0(
    468 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    469 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    470 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    471 ; CHECK-NEXT:    ret i32 [[TMP3]]
    472 ;
    473   %1 = insertelement <4 x float> undef, float %a, i32 0
    474   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    475   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    476   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    477   %5 = insertelement <4 x float> undef, float %b, i32 0
    478   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    479   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    480   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    481   %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
    482   ret i32 %9
    483 }
    484 
    485 define i32 @test_comilt_ss_0(float %a, float %b) {
    486 ; CHECK-LABEL: @test_comilt_ss_0(
    487 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    488 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    489 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    490 ; CHECK-NEXT:    ret i32 [[TMP3]]
    491 ;
    492   %1 = insertelement <4 x float> undef, float %a, i32 0
    493   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    494   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    495   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    496   %5 = insertelement <4 x float> undef, float %b, i32 0
    497   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    498   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    499   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    500   %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
    501   ret i32 %9
    502 }
    503 
    504 define i32 @test_comineq_ss_0(float %a, float %b) {
    505 ; CHECK-LABEL: @test_comineq_ss_0(
    506 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    507 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    508 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    509 ; CHECK-NEXT:    ret i32 [[TMP3]]
    510 ;
    511   %1 = insertelement <4 x float> undef, float %a, i32 0
    512   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    513   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    514   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    515   %5 = insertelement <4 x float> undef, float %b, i32 0
    516   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    517   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    518   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    519   %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
    520   ret i32 %9
    521 }
    522 
    523 define i32 @test_ucomieq_ss_0(float %a, float %b) {
    524 ; CHECK-LABEL: @test_ucomieq_ss_0(
    525 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    526 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    527 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    528 ; CHECK-NEXT:    ret i32 [[TMP3]]
    529 ;
    530   %1 = insertelement <4 x float> undef, float %a, i32 0
    531   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    532   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    533   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    534   %5 = insertelement <4 x float> undef, float %b, i32 0
    535   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    536   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    537   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    538   %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
    539   ret i32 %9
    540 }
    541 
    542 define i32 @test_ucomige_ss_0(float %a, float %b) {
    543 ; CHECK-LABEL: @test_ucomige_ss_0(
    544 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    545 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    546 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    547 ; CHECK-NEXT:    ret i32 [[TMP3]]
    548 ;
    549   %1 = insertelement <4 x float> undef, float %a, i32 0
    550   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    551   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    552   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    553   %5 = insertelement <4 x float> undef, float %b, i32 0
    554   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    555   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    556   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    557   %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
    558   ret i32 %9
    559 }
    560 
    561 define i32 @test_ucomigt_ss_0(float %a, float %b) {
    562 ; CHECK-LABEL: @test_ucomigt_ss_0(
    563 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    564 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    565 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    566 ; CHECK-NEXT:    ret i32 [[TMP3]]
    567 ;
    568   %1 = insertelement <4 x float> undef, float %a, i32 0
    569   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    570   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    571   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    572   %5 = insertelement <4 x float> undef, float %b, i32 0
    573   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    574   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    575   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    576   %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
    577   ret i32 %9
    578 }
    579 
    580 define i32 @test_ucomile_ss_0(float %a, float %b) {
    581 ; CHECK-LABEL: @test_ucomile_ss_0(
    582 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    583 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    584 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    585 ; CHECK-NEXT:    ret i32 [[TMP3]]
    586 ;
    587   %1 = insertelement <4 x float> undef, float %a, i32 0
    588   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    589   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    590   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    591   %5 = insertelement <4 x float> undef, float %b, i32 0
    592   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    593   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    594   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    595   %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
    596   ret i32 %9
    597 }
    598 
    599 define i32 @test_ucomilt_ss_0(float %a, float %b) {
    600 ; CHECK-LABEL: @test_ucomilt_ss_0(
    601 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    602 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    603 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    604 ; CHECK-NEXT:    ret i32 [[TMP3]]
    605 ;
    606   %1 = insertelement <4 x float> undef, float %a, i32 0
    607   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    608   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    609   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    610   %5 = insertelement <4 x float> undef, float %b, i32 0
    611   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    612   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    613   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    614   %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
    615   ret i32 %9
    616 }
    617 
    618 define i32 @test_ucomineq_ss_0(float %a, float %b) {
    619 ; CHECK-LABEL: @test_ucomineq_ss_0(
    620 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
    621 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
    622 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
    623 ; CHECK-NEXT:    ret i32 [[TMP3]]
    624 ;
    625   %1 = insertelement <4 x float> undef, float %a, i32 0
    626   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
    627   %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
    628   %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
    629   %5 = insertelement <4 x float> undef, float %b, i32 0
    630   %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
    631   %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
    632   %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
    633   %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
    634   ret i32 %9
    635 }
    636 
    637 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
    638 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
    639 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
    640 
    641 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
    642 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
    643 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
    644 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
    645 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
    646 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
    647 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
    648 
    649 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
    650 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
    651 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
    652 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
    653 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
    654 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
    655 
    656 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
    657 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
    658 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
    659 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
    660 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
    661 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)
    662