Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
      2 
      3 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
      4   ; CHECK: vaesdec
      5   %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
      6   ret <2 x i64> %res
      7 }
      8 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
      9 
     10 
     11 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
     12   ; CHECK: vaesdeclast
     13   %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     14   ret <2 x i64> %res
     15 }
     16 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
     17 
     18 
     19 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
     20   ; CHECK: vaesenc
     21   %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     22   ret <2 x i64> %res
     23 }
     24 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
     25 
     26 
     27 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
     28   ; CHECK: vaesenclast
     29   %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     30   ret <2 x i64> %res
     31 }
     32 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
     33 
     34 
     35 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
     36   ; CHECK: vaesimc
     37   %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
     38   ret <2 x i64> %res
     39 }
     40 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
     41 
     42 
     43 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
     44   ; CHECK: vaeskeygenassist
     45   %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
     46   ret <2 x i64> %res
     47 }
     48 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
     49 
     50 
     51 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
     52   ; CHECK: vaddsd
     53   %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
     54   ret <2 x double> %res
     55 }
     56 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
     57 
     58 
     59 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
     60   ; CHECK: vcmpordpd
     61   %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     62   ret <2 x double> %res
     63 }
     64 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
     65 
     66 
     67 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
     68   ; CHECK: vcmpordsd
     69   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     70   ret <2 x double> %res
     71 }
     72 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
     73 
     74 
     75 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
     76   ; CHECK: vcomisd
     77   ; CHECK: sete
     78   ; CHECK: movzbl
     79   %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     80   ret i32 %res
     81 }
     82 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
     83 
     84 
     85 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
     86   ; CHECK: vcomisd
     87   ; CHECK: setae
     88   ; CHECK: movzbl
     89   %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     90   ret i32 %res
     91 }
     92 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
     93 
     94 
     95 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
     96   ; CHECK: vcomisd
     97   ; CHECK: seta
     98   ; CHECK: movzbl
     99   %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    100   ret i32 %res
    101 }
    102 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
    103 
    104 
    105 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
    106   ; CHECK: vcomisd
    107   ; CHECK: setbe
    108   ; CHECK: movzbl
    109   %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    110   ret i32 %res
    111 }
    112 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
    113 
    114 
    115 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
    116   ; CHECK: vcomisd
    117   ; CHECK: sbbl    %eax, %eax
    118   ; CHECK: andl    $1, %eax
    119   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    120   ret i32 %res
    121 }
    122 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
    123 
    124 
    125 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
    126   ; CHECK: vcomisd
    127   ; CHECK: setne
    128   ; CHECK: movzbl
    129   %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    130   ret i32 %res
    131 }
    132 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
    133 
    134 
    135 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
    136   ; CHECK: vcvtdq2pd
    137   %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
    138   ret <2 x double> %res
    139 }
    140 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
    141 
    142 
    143 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
    144   ; CHECK: vcvtdq2ps
    145   %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
    146   ret <4 x float> %res
    147 }
    148 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
    149 
    150 
    151 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
    152   ; CHECK: vcvtpd2dq
    153   %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    154   ret <4 x i32> %res
    155 }
    156 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
    157 
    158 
    159 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
    160   ; CHECK: vcvtpd2ps
    161   %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
    162   ret <4 x float> %res
    163 }
    164 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
    165 
    166 
    167 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
    168   ; CHECK: vcvtps2dq
    169   %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    170   ret <4 x i32> %res
    171 }
    172 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
    173 
    174 
    175 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
    176   ; CHECK: vcvtps2pd
    177   %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
    178   ret <2 x double> %res
    179 }
    180 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
    181 
    182 
    183 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
    184   ; CHECK: vcvtsd2si
    185   %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    186   ret i32 %res
    187 }
    188 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
    189 
    190 
    191 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
    192   ; CHECK: vcvtsd2ss
    193   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
    194   ret <4 x float> %res
    195 }
    196 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
    197 
    198 
    199 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
    200   ; CHECK: movl
    201   ; CHECK: vcvtsi2sd
    202   %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
    203   ret <2 x double> %res
    204 }
    205 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
    206 
    207 
    208 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
    209   ; CHECK: vcvtss2sd
    210   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
    211   ret <2 x double> %res
    212 }
    213 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
    214 
    215 
    216 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
    217   ; CHECK: vcvttpd2dq
    218   %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    219   ret <4 x i32> %res
    220 }
    221 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
    222 
    223 
    224 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
    225   ; CHECK: vcvttps2dq
    226   %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    227   ret <4 x i32> %res
    228 }
    229 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
    230 
    231 
    232 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
    233   ; CHECK: vcvttsd2si
    234   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    235   ret i32 %res
    236 }
    237 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
    238 
    239 
    240 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
    241   ; CHECK: vdivsd
    242   %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    243   ret <2 x double> %res
    244 }
    245 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
    246 
    247 
    248 
    249 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
    250   ; CHECK: vmaxpd
    251   %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    252   ret <2 x double> %res
    253 }
    254 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
    255 
    256 
    257 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
    258   ; CHECK: vmaxsd
    259   %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    260   ret <2 x double> %res
    261 }
    262 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
    263 
    264 
    265 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
    266   ; CHECK: vminpd
    267   %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    268   ret <2 x double> %res
    269 }
    270 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
    271 
    272 
    273 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
    274   ; CHECK: vminsd
    275   %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    276   ret <2 x double> %res
    277 }
    278 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
    279 
    280 
    281 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
    282   ; CHECK: vmovmskpd
    283   %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
    284   ret i32 %res
    285 }
    286 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
    287 
    288 
    289 
    290 
    291 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
    292   ; CHECK: test_x86_sse2_mul_sd
    293   ; CHECK: vmulsd
    294   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    295   ret <2 x double> %res
    296 }
    297 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
    298 
    299 
    300 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
    301   ; CHECK: vpackssdw
    302   %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
    303   ret <8 x i16> %res
    304 }
    305 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
    306 
    307 
    308 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    309   ; CHECK: vpacksswb
    310   %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    311   ret <16 x i8> %res
    312 }
    313 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    314 
    315 
    316 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    317   ; CHECK: vpackuswb
    318   %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    319   ret <16 x i8> %res
    320 }
    321 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    322 
    323 
    324 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
    325   ; CHECK: vpaddsb
    326   %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    327   ret <16 x i8> %res
    328 }
    329 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
    330 
    331 
    332 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
    333   ; CHECK: vpaddsw
    334   %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    335   ret <8 x i16> %res
    336 }
    337 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
    338 
    339 
    340 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
    341   ; CHECK: vpaddusb
    342   %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    343   ret <16 x i8> %res
    344 }
    345 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
    346 
    347 
    348 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
    349   ; CHECK: vpaddusw
    350   %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    351   ret <8 x i16> %res
    352 }
    353 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
    354 
    355 
    356 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
    357   ; CHECK: vpavgb
    358   %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    359   ret <16 x i8> %res
    360 }
    361 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
    362 
    363 
    364 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
    365   ; CHECK: vpavgw
    366   %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    367   ret <8 x i16> %res
    368 }
    369 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
    370 
    371 
    372 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
    373   ; CHECK: vpmaddwd
    374   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
    375   ret <4 x i32> %res
    376 }
    377 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
    378 
    379 
    380 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
    381   ; CHECK: vpmaxsw
    382   %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    383   ret <8 x i16> %res
    384 }
    385 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
    386 
    387 
    388 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
    389   ; CHECK: vpmaxub
    390   %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    391   ret <16 x i8> %res
    392 }
    393 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
    394 
    395 
    396 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
    397   ; CHECK: vpminsw
    398   %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    399   ret <8 x i16> %res
    400 }
    401 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
    402 
    403 
    404 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
    405   ; CHECK: vpminub
    406   %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    407   ret <16 x i8> %res
    408 }
    409 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
    410 
    411 
    412 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
    413   ; CHECK: vpmovmskb
    414   %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
    415   ret i32 %res
    416 }
    417 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
    418 
    419 
    420 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
    421   ; CHECK: vpmulhw
    422   %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    423   ret <8 x i16> %res
    424 }
    425 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
    426 
    427 
    428 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
    429   ; CHECK: vpmulhuw
    430   %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    431   ret <8 x i16> %res
    432 }
    433 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
    434 
    435 
    436 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
    437   ; CHECK: vpmuludq
    438   %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    439   ret <2 x i64> %res
    440 }
    441 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
    442 
    443 
    444 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
    445   ; CHECK: vpsadbw
    446   %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
    447   ret <2 x i64> %res
    448 }
    449 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
    450 
    451 
    452 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
    453   ; CHECK: vpslld
    454   %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    455   ret <4 x i32> %res
    456 }
    457 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
    458 
    459 
    460 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
    461   ; CHECK: vpslldq
    462   %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    463   ret <2 x i64> %res
    464 }
    465 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
    466 
    467 
    468 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
    469   ; CHECK: vpslldq
    470   %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    471   ret <2 x i64> %res
    472 }
    473 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
    474 
    475 
    476 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
    477   ; CHECK: vpsllq
    478   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    479   ret <2 x i64> %res
    480 }
    481 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
    482 
    483 
    484 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
    485   ; CHECK: vpsllw
    486   %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    487   ret <8 x i16> %res
    488 }
    489 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
    490 
    491 
    492 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
    493   ; CHECK: vpslld
    494   %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    495   ret <4 x i32> %res
    496 }
    497 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
    498 
    499 
    500 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
    501   ; CHECK: vpsllq
    502   %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    503   ret <2 x i64> %res
    504 }
    505 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
    506 
    507 
    508 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
    509   ; CHECK: vpsllw
    510   %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    511   ret <8 x i16> %res
    512 }
    513 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
    514 
    515 
    516 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
    517   ; CHECK: vpsrad
    518   %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    519   ret <4 x i32> %res
    520 }
    521 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
    522 
    523 
    524 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
    525   ; CHECK: vpsraw
    526   %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    527   ret <8 x i16> %res
    528 }
    529 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
    530 
    531 
    532 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
    533   ; CHECK: vpsrad
    534   %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    535   ret <4 x i32> %res
    536 }
    537 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
    538 
    539 
    540 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
    541   ; CHECK: vpsraw
    542   %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    543   ret <8 x i16> %res
    544 }
    545 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
    546 
    547 
    548 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
    549   ; CHECK: vpsrld
    550   %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    551   ret <4 x i32> %res
    552 }
    553 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
    554 
    555 
    556 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
    557   ; CHECK: vpsrldq
    558   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    559   ret <2 x i64> %res
    560 }
    561 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
    562 
    563 
    564 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
    565   ; CHECK: vpsrldq
    566   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    567   ret <2 x i64> %res
    568 }
    569 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
    570 
    571 
    572 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
    573   ; CHECK: vpsrlq
    574   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    575   ret <2 x i64> %res
    576 }
    577 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
    578 
    579 
    580 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
    581   ; CHECK: vpsrlw
    582   %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    583   ret <8 x i16> %res
    584 }
    585 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
    586 
    587 
    588 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
    589   ; CHECK: vpsrld
    590   %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    591   ret <4 x i32> %res
    592 }
    593 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
    594 
    595 
    596 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
    597   ; CHECK: vpsrlq
    598   %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    599   ret <2 x i64> %res
    600 }
    601 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
    602 
    603 
    604 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
    605   ; CHECK: vpsrlw
    606   %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    607   ret <8 x i16> %res
    608 }
    609 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
    610 
    611 
    612 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
    613   ; CHECK: vpsubsb
    614   %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    615   ret <16 x i8> %res
    616 }
    617 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
    618 
    619 
    620 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
    621   ; CHECK: vpsubsw
    622   %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    623   ret <8 x i16> %res
    624 }
    625 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
    626 
    627 
    628 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
    629   ; CHECK: vpsubusb
    630   %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    631   ret <16 x i8> %res
    632 }
    633 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
    634 
    635 
    636 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
    637   ; CHECK: vpsubusw
    638   %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    639   ret <8 x i16> %res
    640 }
    641 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
    642 
    643 
    644 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
    645   ; CHECK: vsqrtpd
    646   %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    647   ret <2 x double> %res
    648 }
    649 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
    650 
    651 
    652 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
    653   ; CHECK: vsqrtsd
    654   %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    655   ret <2 x double> %res
    656 }
    657 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
    658 
    659 
    660 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
    661   ; CHECK: test_x86_sse2_storel_dq
    662   ; CHECK: movl
    663   ; CHECK: vmovq
    664   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
    665   ret void
    666 }
    667 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
    668 
    669 
    670 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
    671   ; CHECK: test_x86_sse2_storeu_dq
    672   ; CHECK: movl
    673   ; CHECK: vmovdqu
    674   ; add operation forces the execution domain.
    675   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    676   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
    677   ret void
    678 }
    679 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
    680 
    681 
    682 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
    683   ; CHECK: test_x86_sse2_storeu_pd
    684   ; CHECK: movl
    685   ; CHECK: vmovupd
    686   ; fadd operation forces the execution domain.
    687   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
    688   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
    689   ret void
    690 }
    691 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
    692 
    693 
    694 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
    695   ; CHECK: test_x86_sse2_sub_sd
    696   ; CHECK: vsubsd
    697   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    698   ret <2 x double> %res
    699 }
    700 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
    701 
    702 
    703 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
    704   ; CHECK: vucomisd
    705   ; CHECK: sete
    706   ; CHECK: movzbl
    707   %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    708   ret i32 %res
    709 }
    710 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
    711 
    712 
    713 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
    714   ; CHECK: vucomisd
    715   ; CHECK: setae
    716   ; CHECK: movzbl
    717   %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    718   ret i32 %res
    719 }
    720 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
    721 
    722 
    723 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
    724   ; CHECK: vucomisd
    725   ; CHECK: seta
    726   ; CHECK: movzbl
    727   %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    728   ret i32 %res
    729 }
    730 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
    731 
    732 
    733 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
    734   ; CHECK: vucomisd
    735   ; CHECK: setbe
    736   ; CHECK: movzbl
    737   %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    738   ret i32 %res
    739 }
    740 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
    741 
    742 
    743 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
    744   ; CHECK: vucomisd
    745   ; CHECK: sbbl
    746   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    747   ret i32 %res
    748 }
    749 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
    750 
    751 
    752 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
    753   ; CHECK: vucomisd
    754   ; CHECK: setne
    755   ; CHECK: movzbl
    756   %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    757   ret i32 %res
    758 }
    759 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
    760 
    761 
    762 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
    763   ; CHECK: vaddsubpd
    764   %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    765   ret <2 x double> %res
    766 }
    767 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
    768 
    769 
    770 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
    771   ; CHECK: vaddsubps
    772   %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
    773   ret <4 x float> %res
    774 }
    775 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
    776 
    777 
    778 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
    779   ; CHECK: vhaddpd
    780   %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    781   ret <2 x double> %res
    782 }
    783 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
    784 
    785 
    786 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
    787   ; CHECK: vhaddps
    788   %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
    789   ret <4 x float> %res
    790 }
    791 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
    792 
    793 
    794 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
    795   ; CHECK: vhsubpd
    796   %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    797   ret <2 x double> %res
    798 }
    799 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
    800 
    801 
    802 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
    803   ; CHECK: vhsubps
    804   %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
    805   ret <4 x float> %res
    806 }
    807 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
    808 
    809 
    810 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
    811   ; CHECK: movl
    812   ; CHECK: vlddqu
    813   %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
    814   ret <16 x i8> %res
    815 }
    816 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
    817 
    818 
    819 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
    820   ; CHECK: vblendpd
    821   %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
    822   ret <2 x double> %res
    823 }
    824 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
    825 
    826 
    827 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
    828   ; CHECK: vblendps
    829   %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
    830   ret <4 x float> %res
    831 }
    832 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
    833 
    834 
    835 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    836   ; CHECK: vblendvpd
    837   %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
    838   ret <2 x double> %res
    839 }
    840 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    841 
    842 
    843 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    844   ; CHECK: vblendvps
    845   %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
    846   ret <4 x float> %res
    847 }
    848 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    849 
    850 
    851 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
    852   ; CHECK: vdppd
    853   %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
    854   ret <2 x double> %res
    855 }
    856 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
    857 
    858 
    859 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
    860   ; CHECK: vdpps
    861   %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
    862   ret <4 x float> %res
    863 }
    864 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
    865 
    866 
    867 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
    868   ; CHECK: vinsertps
    869   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
    870   ret <4 x float> %res
    871 }
    872 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
    873 
    874 
    875 
    876 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
    877   ; CHECK: vmpsadbw
    878   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
    879   ret <8 x i16> %res
    880 }
    881 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
    882 
    883 
    884 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
    885   ; CHECK: vpackusdw
    886   %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
    887   ret <8 x i16> %res
    888 }
    889 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
    890 
    891 
    892 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
    893   ; CHECK: vpblendvb
    894   %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
    895   ret <16 x i8> %res
    896 }
    897 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
    898 
    899 
    900 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
    901   ; CHECK: vpblendw
    902   %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
    903   ret <8 x i16> %res
    904 }
    905 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
    906 
    907 
    908 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
    909   ; CHECK: vphminposuw
    910   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
    911   ret <8 x i16> %res
    912 }
    913 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
    914 
    915 
    916 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
    917   ; CHECK: vpmaxsb
    918   %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    919   ret <16 x i8> %res
    920 }
    921 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
    922 
    923 
    924 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
    925   ; CHECK: vpmaxsd
    926   %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    927   ret <4 x i32> %res
    928 }
    929 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
    930 
    931 
    932 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
    933   ; CHECK: vpmaxud
    934   %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    935   ret <4 x i32> %res
    936 }
    937 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
    938 
    939 
    940 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
    941   ; CHECK: vpmaxuw
    942   %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    943   ret <8 x i16> %res
    944 }
    945 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
    946 
    947 
    948 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
    949   ; CHECK: vpminsb
    950   %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    951   ret <16 x i8> %res
    952 }
    953 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
    954 
    955 
    956 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
    957   ; CHECK: vpminsd
    958   %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    959   ret <4 x i32> %res
    960 }
    961 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
    962 
    963 
    964 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
    965   ; CHECK: vpminud
    966   %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    967   ret <4 x i32> %res
    968 }
    969 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
    970 
    971 
    972 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
    973   ; CHECK: vpminuw
    974   %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    975   ret <8 x i16> %res
    976 }
    977 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
    978 
    979 
    980 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
    981   ; CHECK: vpmovsxbd
    982   %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
    983   ret <4 x i32> %res
    984 }
    985 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
    986 
    987 
    988 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
    989   ; CHECK: vpmovsxbq
    990   %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
    991   ret <2 x i64> %res
    992 }
    993 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
    994 
    995 
    996 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
    997   ; CHECK: vpmovsxbw
    998   %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    999   ret <8 x i16> %res
   1000 }
   1001 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
   1002 
   1003 
   1004 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
   1005   ; CHECK: vpmovsxdq
   1006   %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
   1007   ret <2 x i64> %res
   1008 }
   1009 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
   1010 
   1011 
   1012 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
   1013   ; CHECK: vpmovsxwd
   1014   %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
   1015   ret <4 x i32> %res
   1016 }
   1017 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
   1018 
   1019 
   1020 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
   1021   ; CHECK: vpmovsxwq
   1022   %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
   1023   ret <2 x i64> %res
   1024 }
   1025 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
   1026 
   1027 
   1028 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
   1029   ; CHECK: vpmovzxbd
   1030   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
   1031   ret <4 x i32> %res
   1032 }
   1033 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
   1034 
   1035 
   1036 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
   1037   ; CHECK: vpmovzxbq
   1038   %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
   1039   ret <2 x i64> %res
   1040 }
   1041 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
   1042 
   1043 
   1044 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
   1045   ; CHECK: vpmovzxbw
   1046   %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
   1047   ret <8 x i16> %res
   1048 }
   1049 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
   1050 
   1051 
   1052 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
   1053   ; CHECK: vpmovzxdq
   1054   %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
   1055   ret <2 x i64> %res
   1056 }
   1057 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
   1058 
   1059 
   1060 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
   1061   ; CHECK: vpmovzxwd
   1062   %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
   1063   ret <4 x i32> %res
   1064 }
   1065 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
   1066 
   1067 
   1068 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
   1069   ; CHECK: vpmovzxwq
   1070   %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
   1071   ret <2 x i64> %res
   1072 }
   1073 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
   1074 
   1075 
   1076 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
   1077   ; CHECK: vpmuldq
   1078   %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
   1079   ret <2 x i64> %res
   1080 }
   1081 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
   1082 
   1083 
   1084 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
   1085   ; CHECK: vptest 
   1086   ; CHECK: sbbl
   1087   %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   1088   ret i32 %res
   1089 }
   1090 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
   1091 
   1092 
   1093 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
   1094   ; CHECK: vptest 
   1095   ; CHECK: seta
   1096   ; CHECK: movzbl
   1097   %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   1098   ret i32 %res
   1099 }
   1100 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
   1101 
   1102 
   1103 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
   1104   ; CHECK: vptest 
   1105   ; CHECK: sete
   1106   ; CHECK: movzbl
   1107   %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   1108   ret i32 %res
   1109 }
   1110 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
   1111 
   1112 
   1113 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
   1114   ; CHECK: vroundpd
   1115   %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
   1116   ret <2 x double> %res
   1117 }
   1118 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
   1119 
   1120 
   1121 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
   1122   ; CHECK: vroundps
   1123   %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
   1124   ret <4 x float> %res
   1125 }
   1126 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
   1127 
   1128 
   1129 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
   1130   ; CHECK: vroundsd
   1131   %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
   1132   ret <2 x double> %res
   1133 }
   1134 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
   1135 
   1136 
   1137 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
   1138   ; CHECK: vroundss
   1139   %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
   1140   ret <4 x float> %res
   1141 }
   1142 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
   1143 
   1144 
   1145 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
   1146   ; CHECK: movl $7
   1147   ; CHECK: movl $7
   1148   ; CHECK: vpcmpestri $7
   1149   ; CHECK: movl
   1150   %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1151   ret i32 %res
   1152 }
   1153 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1154 
   1155 
   1156 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
   1157   ; CHECK: movl $7
   1158   ; CHECK: movl $7
   1159   ; CHECK: vpcmpestri $7, (
   1160   ; CHECK: movl
   1161   %1 = load <16 x i8>* %a0
   1162   %2 = load <16 x i8>* %a2
   1163   %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
   1164   ret i32 %res
   1165 }
   1166 
   1167 
   1168 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
   1169   ; CHECK: movl
   1170   ; CHECK: movl
   1171   ; CHECK: vpcmpestri
   1172   ; CHECK: seta
   1173   %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1174   ret i32 %res
   1175 }
   1176 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1177 
   1178 
   1179 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
   1180   ; CHECK: movl
   1181   ; CHECK: movl
   1182   ; CHECK: vpcmpestri
   1183   ; CHECK: sbbl
   1184   %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1185   ret i32 %res
   1186 }
   1187 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1188 
   1189 
   1190 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
   1191   ; CHECK: movl
   1192   ; CHECK: movl
   1193   ; CHECK: vpcmpestri
   1194   ; CHECK: seto
   1195   %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1196   ret i32 %res
   1197 }
   1198 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1199 
   1200 
   1201 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
   1202   ; CHECK: movl
   1203   ; CHECK: movl
   1204   ; CHECK: vpcmpestri
   1205   ; CHECK: sets
   1206   %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1207   ret i32 %res
   1208 }
   1209 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1210 
   1211 
   1212 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
   1213   ; CHECK: movl
   1214   ; CHECK: movl
   1215   ; CHECK: vpcmpestri
   1216   ; CHECK: sete
   1217   %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1218   ret i32 %res
   1219 }
   1220 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1221 
   1222 
   1223 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
   1224   ; CHECK: movl
   1225   ; CHECK: movl
   1226   ; CHECK: vpcmpestrm
   1227   ; CHECK-NOT: vmov
   1228   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   1229   ret <16 x i8> %res
   1230 }
   1231 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1232 
   1233 
   1234 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
   1235   ; CHECK: movl $7
   1236   ; CHECK: movl $7
   1237   ; CHECK: vpcmpestrm $7,
   1238   ; CHECK-NOT: vmov
   1239   %1 = load <16 x i8>* %a2
   1240   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   1241   ret <16 x i8> %res
   1242 }
   1243 
   1244 
   1245 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
   1246   ; CHECK: vpcmpistri $7
   1247   ; CHECK: movl
   1248   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1249   ret i32 %res
   1250 }
   1251 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1252 
   1253 
   1254 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
   1255   ; CHECK: vpcmpistri $7, (
   1256   ; CHECK: movl
   1257   %1 = load <16 x i8>* %a0
   1258   %2 = load <16 x i8>* %a1
   1259   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
   1260   ret i32 %res
   1261 }
   1262 
   1263 
   1264 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
   1265   ; CHECK: vpcmpistri
   1266   ; CHECK: seta
   1267   %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1268   ret i32 %res
   1269 }
   1270 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1271 
   1272 
   1273 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
   1274   ; CHECK: vpcmpistri
   1275   ; CHECK: sbbl
   1276   %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1277   ret i32 %res
   1278 }
   1279 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1280 
   1281 
   1282 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
   1283   ; CHECK: vpcmpistri
   1284   ; CHECK: seto
   1285   %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1286   ret i32 %res
   1287 }
   1288 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1289 
   1290 
   1291 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
   1292   ; CHECK: vpcmpistri
   1293   ; CHECK: sets
   1294   %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1295   ret i32 %res
   1296 }
   1297 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1298 
   1299 
   1300 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
   1301   ; CHECK: vpcmpistri
   1302   ; CHECK: sete
   1303   %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1304   ret i32 %res
   1305 }
   1306 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1307 
   1308 
   1309 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
   1310   ; CHECK: vpcmpistrm $7
   1311   ; CHECK-NOT: vmov
   1312   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
   1313   ret <16 x i8> %res
   1314 }
   1315 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1316 
   1317 
   1318 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
   1319   ; CHECK: vpcmpistrm $7, (
   1320   ; CHECK-NOT: vmov
   1321   %1 = load <16 x i8>* %a1
   1322   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
   1323   ret <16 x i8> %res
   1324 }
   1325 
   1326 
   1327 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
   1328   ; CHECK: vaddss
   1329   %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1330   ret <4 x float> %res
   1331 }
   1332 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
   1333 
   1334 
   1335 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
   1336   ; CHECK: vcmpordps
   1337   %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1338   ret <4 x float> %res
   1339 }
   1340 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
   1341 
   1342 
   1343 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
   1344   ; CHECK: vcmpordss
   1345   %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1346   ret <4 x float> %res
   1347 }
   1348 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
   1349 
   1350 
   1351 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
   1352   ; CHECK: vcomiss
   1353   ; CHECK: sete
   1354   ; CHECK: movzbl
   1355   %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1356   ret i32 %res
   1357 }
   1358 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
   1359 
   1360 
   1361 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
   1362   ; CHECK: vcomiss
   1363   ; CHECK: setae
   1364   ; CHECK: movzbl
   1365   %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1366   ret i32 %res
   1367 }
   1368 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
   1369 
   1370 
   1371 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
   1372   ; CHECK: vcomiss
   1373   ; CHECK: seta
   1374   ; CHECK: movzbl
   1375   %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1376   ret i32 %res
   1377 }
   1378 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
   1379 
   1380 
   1381 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
   1382   ; CHECK: vcomiss
   1383   ; CHECK: setbe
   1384   ; CHECK: movzbl
   1385   %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1386   ret i32 %res
   1387 }
   1388 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
   1389 
   1390 
   1391 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
   1392   ; CHECK: vcomiss
   1393   ; CHECK: sbb
   1394   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1395   ret i32 %res
   1396 }
   1397 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
   1398 
   1399 
   1400 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
   1401   ; CHECK: vcomiss
   1402   ; CHECK: setne
   1403   ; CHECK: movzbl
   1404   %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1405   ret i32 %res
   1406 }
   1407 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
   1408 
   1409 
   1410 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
   1411   ; CHECK: movl
   1412   ; CHECK: vcvtsi2ss
   1413   %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
   1414   ret <4 x float> %res
   1415 }
   1416 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
   1417 
   1418 
   1419 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
   1420   ; CHECK: vcvtss2si
   1421   %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
   1422   ret i32 %res
   1423 }
   1424 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
   1425 
   1426 
   1427 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
   1428   ; CHECK: vcvttss2si
   1429   %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
   1430   ret i32 %res
   1431 }
   1432 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
   1433 
   1434 
   1435 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
   1436   ; CHECK: vdivss
   1437   %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1438   ret <4 x float> %res
   1439 }
   1440 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
   1441 
   1442 
   1443 define void @test_x86_sse_ldmxcsr(i8* %a0) {
   1444   ; CHECK: movl
   1445   ; CHECK: vldmxcsr
   1446   call void @llvm.x86.sse.ldmxcsr(i8* %a0)
   1447   ret void
   1448 }
   1449 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
   1450 
   1451 
   1452 
   1453 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
   1454   ; CHECK: vmaxps
   1455   %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1456   ret <4 x float> %res
   1457 }
   1458 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
   1459 
   1460 
   1461 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
   1462   ; CHECK: vmaxss
   1463   %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1464   ret <4 x float> %res
   1465 }
   1466 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
   1467 
   1468 
   1469 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
   1470   ; CHECK: vminps
   1471   %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1472   ret <4 x float> %res
   1473 }
   1474 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
   1475 
   1476 
   1477 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
   1478   ; CHECK: vminss
   1479   %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1480   ret <4 x float> %res
   1481 }
   1482 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
   1483 
   1484 
   1485 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
   1486   ; CHECK: vmovmskps
   1487   %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
   1488   ret i32 %res
   1489 }
   1490 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
   1491 
   1492 
   1493 
   1494 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
   1495   ; CHECK: vmulss
   1496   %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1497   ret <4 x float> %res
   1498 }
   1499 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
   1500 
   1501 
   1502 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
   1503   ; CHECK: vrcpps
   1504   %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1505   ret <4 x float> %res
   1506 }
   1507 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
   1508 
   1509 
   1510 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
   1511   ; CHECK: vrcpss
   1512   %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1513   ret <4 x float> %res
   1514 }
   1515 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
   1516 
   1517 
   1518 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
   1519   ; CHECK: vrsqrtps
   1520   %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1521   ret <4 x float> %res
   1522 }
   1523 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
   1524 
   1525 
   1526 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
   1527   ; CHECK: vrsqrtss
   1528   %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1529   ret <4 x float> %res
   1530 }
   1531 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
   1532 
   1533 
   1534 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
   1535   ; CHECK: vsqrtps
   1536   %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1537   ret <4 x float> %res
   1538 }
   1539 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
   1540 
   1541 
   1542 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
   1543   ; CHECK: vsqrtss
   1544   %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1545   ret <4 x float> %res
   1546 }
   1547 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
   1548 
   1549 
   1550 define void @test_x86_sse_stmxcsr(i8* %a0) {
   1551   ; CHECK: movl
   1552   ; CHECK: vstmxcsr
   1553   call void @llvm.x86.sse.stmxcsr(i8* %a0)
   1554   ret void
   1555 }
   1556 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
   1557 
   1558 
   1559 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
   1560   ; CHECK: movl
   1561   ; CHECK: vmovups
   1562   call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
   1563   ret void
   1564 }
   1565 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
   1566 
   1567 
   1568 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
   1569   ; CHECK: vsubss
   1570   %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1571   ret <4 x float> %res
   1572 }
   1573 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
   1574 
   1575 
   1576 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
   1577   ; CHECK: vucomiss
   1578   ; CHECK: sete
   1579   ; CHECK: movzbl
   1580   %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1581   ret i32 %res
   1582 }
   1583 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
   1584 
   1585 
   1586 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
   1587   ; CHECK: vucomiss
   1588   ; CHECK: setae
   1589   ; CHECK: movzbl
   1590   %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1591   ret i32 %res
   1592 }
   1593 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
   1594 
   1595 
   1596 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
   1597   ; CHECK: vucomiss
   1598   ; CHECK: seta
   1599   ; CHECK: movzbl
   1600   %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1601   ret i32 %res
   1602 }
   1603 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
   1604 
   1605 
   1606 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
   1607   ; CHECK: vucomiss
   1608   ; CHECK: setbe
   1609   ; CHECK: movzbl
   1610   %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1611   ret i32 %res
   1612 }
   1613 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
   1614 
   1615 
   1616 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
   1617   ; CHECK: vucomiss
   1618   ; CHECK: sbbl
   1619   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1620   ret i32 %res
   1621 }
   1622 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
   1623 
   1624 
   1625 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
   1626   ; CHECK: vucomiss
   1627   ; CHECK: setne
   1628   ; CHECK: movzbl
   1629   %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1630   ret i32 %res
   1631 }
   1632 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
   1633 
   1634 
   1635 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
   1636   ; CHECK: vpabsb
   1637   %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
   1638   ret <16 x i8> %res
   1639 }
   1640 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
   1641 
   1642 
   1643 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
   1644   ; CHECK: vpabsd
   1645   %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
   1646   ret <4 x i32> %res
   1647 }
   1648 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
   1649 
   1650 
   1651 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
   1652   ; CHECK: vpabsw
   1653   %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
   1654   ret <8 x i16> %res
   1655 }
   1656 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
   1657 
   1658 
   1659 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   1660   ; CHECK: vphaddd
   1661   %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1662   ret <4 x i32> %res
   1663 }
   1664 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   1665 
   1666 
   1667 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   1668   ; CHECK: vphaddsw
   1669   %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1670   ret <8 x i16> %res
   1671 }
   1672 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   1673 
   1674 
   1675 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   1676   ; CHECK: vphaddw
   1677   %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1678   ret <8 x i16> %res
   1679 }
   1680 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   1681 
   1682 
   1683 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   1684   ; CHECK: vphsubd
   1685   %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1686   ret <4 x i32> %res
   1687 }
   1688 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   1689 
   1690 
   1691 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   1692   ; CHECK: vphsubsw
   1693   %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1694   ret <8 x i16> %res
   1695 }
   1696 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   1697 
   1698 
   1699 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   1700   ; CHECK: vphsubw
   1701   %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1702   ret <8 x i16> %res
   1703 }
   1704 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   1705 
   1706 
   1707 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
   1708   ; CHECK: vpmaddubsw
   1709   %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
   1710   ret <8 x i16> %res
   1711 }
   1712 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
   1713 
   1714 
   1715 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   1716   ; CHECK: vpmulhrsw
   1717   %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1718   ret <8 x i16> %res
   1719 }
   1720 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   1721 
   1722 
   1723 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   1724   ; CHECK: vpshufb
   1725   %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1726   ret <16 x i8> %res
   1727 }
   1728 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   1729 
   1730 
   1731 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   1732   ; CHECK: vpsignb
   1733   %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1734   ret <16 x i8> %res
   1735 }
   1736 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   1737 
   1738 
   1739 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   1740   ; CHECK: vpsignd
   1741   %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1742   ret <4 x i32> %res
   1743 }
   1744 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   1745 
   1746 
   1747 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   1748   ; CHECK: vpsignw
   1749   %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1750   ret <8 x i16> %res
   1751 }
   1752 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   1753 
   1754 
   1755 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1756   ; CHECK: vaddsubpd
   1757   %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   1758   ret <4 x double> %res
   1759 }
   1760 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
   1761 
   1762 
   1763 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1764   ; CHECK: vaddsubps
   1765   %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   1766   ret <8 x float> %res
   1767 }
   1768 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
   1769 
   1770 
   1771 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1772   ; CHECK: vblendpd
   1773   %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
   1774   ret <4 x double> %res
   1775 }
   1776 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
   1777 
   1778 
   1779 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1780   ; CHECK: vblendps
   1781   %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
   1782   ret <8 x float> %res
   1783 }
   1784 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
   1785 
   1786 
   1787 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   1788   ; CHECK: vblendvpd
   1789   %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
   1790   ret <4 x double> %res
   1791 }
   1792 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
   1793 
   1794 
   1795 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   1796   ; CHECK: vblendvps
   1797   %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
   1798   ret <8 x float> %res
   1799 }
   1800 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
   1801 
   1802 
   1803 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1804   ; CHECK: vcmpordpd
   1805   %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   1806   ret <4 x double> %res
   1807 }
   1808 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
   1809 
   1810 
   1811 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1812   ; CHECK: vcmpordps
   1813   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   1814   ret <8 x float> %res
   1815 }
   1816 
   1817 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
   1818   ; CHECK: vcmpeqps
   1819   %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
   1820   ; CHECK: vcmpltps
   1821   %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
   1822   ; CHECK: vcmpleps
   1823   %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
   1824   ; CHECK: vcmpunordps
   1825   %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
   1826   ; CHECK: vcmpneqps
   1827   %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
   1828   ; CHECK: vcmpnltps
   1829   %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
   1830   ; CHECK: vcmpnleps
   1831   %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
   1832   ; CHECK: vcmpordps
   1833   %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
   1834   ; CHECK: vcmpeq_uqps
   1835   %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
   1836   ; CHECK: vcmpngeps
   1837   %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
   1838   ; CHECK: vcmpngtps
   1839   %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
   1840   ; CHECK: vcmpfalseps
   1841   %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
   1842   ; CHECK: vcmpneq_oqps
   1843   %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
   1844   ; CHECK: vcmpgeps
   1845   %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
   1846   ; CHECK: vcmpgtps
   1847   %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
   1848   ; CHECK: vcmptrueps
   1849   %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
   1850   ; CHECK: vcmpeq_osps
   1851   %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
   1852   ; CHECK: vcmplt_oqps
   1853   %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
   1854   ; CHECK: vcmple_oqps
   1855   %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
   1856   ; CHECK: vcmpunord_sps
   1857   %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
   1858   ; CHECK: vcmpneq_usps
   1859   %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
   1860   ; CHECK: vcmpnlt_uqps
   1861   %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
   1862   ; CHECK: vcmpnle_uqps
   1863   %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
   1864   ; CHECK: vcmpord_sps
   1865   %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
   1866   ; CHECK: vcmpeq_usps
   1867   %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
   1868   ; CHECK: vcmpnge_uqps
   1869   %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
   1870   ; CHECK: vcmpngt_uqps
   1871   %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
   1872   ; CHECK: vcmpfalse_osps
   1873   %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
   1874   ; CHECK: vcmpneq_osps
   1875   %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
   1876   ; CHECK: vcmpge_oqps
   1877   %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
   1878   ; CHECK: vcmpgt_oqps
   1879   %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
   1880   ; CHECK: vcmptrue_usps
   1881   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
   1882   ret <8 x float> %res
   1883 }
   1884 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   1885 
   1886 
   1887 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
   1888   ; CHECK: vcvtpd2psy
   1889   %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
   1890   ret <4 x float> %res
   1891 }
   1892 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
   1893 
   1894 
   1895 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
   1896   ; CHECK: vcvtpd2dqy
   1897   %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
   1898   ret <4 x i32> %res
   1899 }
   1900 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
   1901 
   1902 
   1903 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
   1904   ; CHECK: vcvtps2pd
   1905   %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
   1906   ret <4 x double> %res
   1907 }
   1908 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
   1909 
   1910 
   1911 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
   1912   ; CHECK: vcvtps2dq
   1913   %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
   1914   ret <8 x i32> %res
   1915 }
   1916 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
   1917 
   1918 
   1919 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
   1920   ; CHECK: vcvtdq2pd
   1921   %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
   1922   ret <4 x double> %res
   1923 }
   1924 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
   1925 
   1926 
   1927 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
   1928   ; CHECK: vcvtdq2ps
   1929   %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
   1930   ret <8 x float> %res
   1931 }
   1932 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
   1933 
   1934 
   1935 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
   1936   ; CHECK: vcvttpd2dqy
   1937   %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
   1938   ret <4 x i32> %res
   1939 }
   1940 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
   1941 
   1942 
   1943 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
   1944   ; CHECK: vcvttps2dq
   1945   %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
   1946   ret <8 x i32> %res
   1947 }
   1948 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
   1949 
   1950 
   1951 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1952   ; CHECK: vdpps
   1953   %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
   1954   ret <8 x float> %res
   1955 }
   1956 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
   1957 
   1958 
   1959 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1960   ; CHECK: vhaddpd
   1961   %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   1962   ret <4 x double> %res
   1963 }
   1964 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
   1965 
   1966 
   1967 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1968   ; CHECK: vhaddps
   1969   %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   1970   ret <8 x float> %res
   1971 }
   1972 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
   1973 
   1974 
   1975 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1976   ; CHECK: vhsubpd
   1977   %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   1978   ret <4 x double> %res
   1979 }
   1980 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
   1981 
   1982 
   1983 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1984   ; CHECK: vhsubps
   1985   %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   1986   ret <8 x float> %res
   1987 }
   1988 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
   1989 
   1990 
   1991 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
   1992   ; CHECK: vlddqu
   1993   %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
   1994   ret <32 x i8> %res
   1995 }
   1996 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
   1997 
   1998 
   1999 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
   2000   ; CHECK: vmaskmovpd
   2001   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   2002   ret <2 x double> %res
   2003 }
   2004 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
   2005 
   2006 
   2007 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
   2008   ; CHECK: vmaskmovpd
   2009   %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2010   ret <4 x double> %res
   2011 }
   2012 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
   2013 
   2014 
   2015 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
   2016   ; CHECK: vmaskmovps
   2017   %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   2018   ret <4 x float> %res
   2019 }
   2020 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
   2021 
   2022 
   2023 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
   2024   ; CHECK: vmaskmovps
   2025   %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2026   ret <8 x float> %res
   2027 }
   2028 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
   2029 
   2030 
   2031 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
   2032   ; CHECK: vmaskmovpd
   2033   call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
   2034   ret void
   2035 }
   2036 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
   2037 
   2038 
   2039 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
   2040   ; CHECK: vmaskmovpd
   2041   call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
   2042   ret void
   2043 }
   2044 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
   2045 
   2046 
   2047 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
   2048   ; CHECK: vmaskmovps
   2049   call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
   2050   ret void
   2051 }
   2052 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
   2053 
   2054 
   2055 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
   2056   ; CHECK: vmaskmovps
   2057   call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
   2058   ret void
   2059 }
   2060 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
   2061 
   2062 
   2063 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2064   ; CHECK: vmaxpd
   2065   %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2066   ret <4 x double> %res
   2067 }
   2068 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2069 
   2070 
   2071 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2072   ; CHECK: vmaxps
   2073   %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2074   ret <8 x float> %res
   2075 }
   2076 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2077 
   2078 
   2079 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2080   ; CHECK: vminpd
   2081   %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2082   ret <4 x double> %res
   2083 }
   2084 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2085 
   2086 
   2087 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2088   ; CHECK: vminps
   2089   %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2090   ret <8 x float> %res
   2091 }
   2092 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2093 
   2094 
   2095 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
   2096   ; CHECK: vmovmskpd
   2097   %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
   2098   ret i32 %res
   2099 }
   2100 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
   2101 
   2102 
   2103 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
   2104   ; CHECK: vmovmskps
   2105   %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
   2106   ret i32 %res
   2107 }
   2108 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
   2109 
   2110 
   2111 
   2112 
   2113 
   2114 
   2115 
   2116 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   2117   ; CHECK: vptest
   2118   ; CHECK: sbbl
   2119   %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2120   ret i32 %res
   2121 }
   2122 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
   2123 
   2124 
   2125 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
   2126   ; CHECK: vptest
   2127   ; CHECK: seta
   2128   ; CHECK: movzbl
   2129   %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2130   ret i32 %res
   2131 }
   2132 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
   2133 
   2134 
   2135 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
   2136   ; CHECK: vptest
   2137   ; CHECK: sete
   2138   ; CHECK: movzbl
   2139   %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2140   ret i32 %res
   2141 }
   2142 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
   2143 
   2144 
   2145 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
   2146   ; CHECK: vrcpps
   2147   %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2148   ret <8 x float> %res
   2149 }
   2150 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
   2151 
   2152 
   2153 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
   2154   ; CHECK: vroundpd
   2155   %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
   2156   ret <4 x double> %res
   2157 }
   2158 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
   2159 
   2160 
   2161 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
   2162   ; CHECK: vroundps
   2163   %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
   2164   ret <8 x float> %res
   2165 }
   2166 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
   2167 
   2168 
   2169 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
   2170   ; CHECK: vrsqrtps
   2171   %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2172   ret <8 x float> %res
   2173 }
   2174 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
   2175 
   2176 
   2177 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
   2178   ; CHECK: vsqrtpd
   2179   %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
   2180   ret <4 x double> %res
   2181 }
   2182 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
   2183 
   2184 
   2185 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
   2186   ; CHECK: vsqrtps
   2187   %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2188   ret <8 x float> %res
   2189 }
   2190 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
   2191 
   2192 
   2193 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
   2194   ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
   2195   ; CHECK: vmovups
   2196   ; add operation forces the execution domain.
   2197   %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   2198   call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
   2199   ret void
   2200 }
   2201 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
   2202 
   2203 
   2204 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
   2205   ; CHECK: vmovupd
   2206   ; add operation forces the execution domain.
   2207   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
   2208   call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
   2209   ret void
   2210 }
   2211 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
   2212 
   2213 
   2214 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
   2215   ; CHECK: vmovups
   2216   call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
   2217   ret void
   2218 }
   2219 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
   2220 
   2221 
   2222 define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
   2223   ; CHECK: vbroadcastsd
   2224   %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
   2225   ret <4 x double> %res
   2226 }
   2227 declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
   2228 
   2229 
   2230 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
   2231   ; CHECK: vbroadcastf128
   2232   %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
   2233   ret <4 x double> %res
   2234 }
   2235 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
   2236 
   2237 
   2238 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
   2239   ; CHECK: vbroadcastf128
   2240   %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
   2241   ret <8 x float> %res
   2242 }
   2243 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
   2244 
   2245 
   2246 define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
   2247   ; CHECK: vbroadcastss
   2248   %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
   2249   ret <4 x float> %res
   2250 }
   2251 declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
   2252 
   2253 
   2254 define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
   2255   ; CHECK: vbroadcastss
   2256   %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
   2257   ret <8 x float> %res
   2258 }
   2259 declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
   2260 
   2261 
   2262 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
   2263   ; CHECK: vextractf128
   2264   %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
   2265   ret <2 x double> %res
   2266 }
   2267 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
   2268 
   2269 
   2270 define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
   2271   ; CHECK: vextractf128
   2272   %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   2273   ret <4 x float> %res
   2274 }
   2275 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
   2276 
   2277 
   2278 define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
   2279   ; CHECK: vextractf128
   2280   %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
   2281   ret <4 x i32> %res
   2282 }
   2283 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
   2284 
   2285 
   2286 define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) {
   2287   ; CHECK: vinsertf128
   2288   %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   2289   ret <4 x double> %res
   2290 }
   2291 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
   2292 
   2293 
   2294 define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) {
   2295   ; CHECK: vinsertf128
   2296   %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2297   ret <8 x float> %res
   2298 }
   2299 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
   2300 
   2301 
   2302 define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) {
   2303   ; CHECK: vinsertf128
   2304   %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
   2305   ret <8 x i32> %res
   2306 }
   2307 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
   2308 
   2309 
   2310 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2311   ; CHECK: vperm2f128
   2312   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   2313   ret <4 x double> %res
   2314 }
   2315 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
   2316 
   2317 
   2318 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2319   ; CHECK: vperm2f128
   2320   %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2321   ret <8 x float> %res
   2322 }
   2323 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   2324 
   2325 
   2326 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
   2327   ; CHECK: vperm2f128
   2328   %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
   2329   ret <8 x i32> %res
   2330 }
   2331 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
   2332 
   2333 
   2334 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
   2335   ; CHECK: vpermilpd
   2336   %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
   2337   ret <2 x double> %res
   2338 }
   2339 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
   2340 
   2341 
   2342 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
   2343   ; CHECK: vpermilpd
   2344   %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
   2345   ret <4 x double> %res
   2346 }
   2347 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
   2348 
   2349 
   2350 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
   2351   ; CHECK: vpshufd
   2352   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   2353   ret <4 x float> %res
   2354 }
   2355 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
   2356 
   2357 
   2358 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
   2359   ; CHECK: vpermilps
   2360   %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
   2361   ret <8 x float> %res
   2362 }
   2363 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
   2364 
   2365 
   2366 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
   2367   ; CHECK: vpermilpd
   2368   %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
   2369   ret <2 x double> %res
   2370 }
   2371 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
   2372 
   2373 
   2374 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
   2375   ; CHECK: vpermilpd
   2376   %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
   2377   ret <4 x double> %res
   2378 }
   2379 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
   2380 
   2381 
   2382 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
   2383   ; CHECK: vpermilps
   2384   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
   2385   ret <4 x float> %res
   2386 }
   2387 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
   2388   ; CHECK: vpermilps
   2389   %a2 = load <4 x i32>* %a1
   2390   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
   2391   ret <4 x float> %res
   2392 }
   2393 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
   2394 
   2395 
   2396 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
   2397   ; CHECK: vpermilps
   2398   %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
   2399   ret <8 x float> %res
   2400 }
   2401 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
   2402 
   2403 
   2404 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
   2405   ; CHECK: vtestpd
   2406   ; CHECK: sbbl
   2407   %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   2408   ret i32 %res
   2409 }
   2410 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
   2411 
   2412 
   2413 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2414   ; CHECK: vtestpd
   2415   ; CHECK: sbbl
   2416   %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   2417   ret i32 %res
   2418 }
   2419 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2420 
   2421 
   2422 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
   2423   ; CHECK: vtestps
   2424   ; CHECK: sbbl
   2425   %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2426   ret i32 %res
   2427 }
   2428 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
   2429 
   2430 
   2431 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2432   ; CHECK: vtestps
   2433   ; CHECK: sbbl
   2434   %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   2435   ret i32 %res
   2436 }
   2437 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2438 
   2439 
   2440 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
   2441   ; CHECK: vtestpd
   2442   ; CHECK: seta
   2443   ; CHECK: movzbl
   2444   %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   2445   ret i32 %res
   2446 }
   2447 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
   2448 
   2449 
   2450 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2451   ; CHECK: vtestpd
   2452   ; CHECK: seta
   2453   ; CHECK: movzbl
   2454   %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   2455   ret i32 %res
   2456 }
   2457 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2458 
   2459 
   2460 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
   2461   ; CHECK: vtestps
   2462   ; CHECK: seta
   2463   ; CHECK: movzbl
   2464   %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2465   ret i32 %res
   2466 }
   2467 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
   2468 
   2469 
   2470 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2471   ; CHECK: vtestps
   2472   ; CHECK: seta
   2473   ; CHECK: movzbl
   2474   %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   2475   ret i32 %res
   2476 }
   2477 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2478 
   2479 
   2480 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
   2481   ; CHECK: vtestpd
   2482   ; CHECK: sete
   2483   ; CHECK: movzbl
   2484   %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   2485   ret i32 %res
   2486 }
   2487 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
   2488 
   2489 
   2490 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2491   ; CHECK: vtestpd
   2492   ; CHECK: sete
   2493   ; CHECK: movzbl
   2494   %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   2495   ret i32 %res
   2496 }
   2497 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2498 
   2499 
   2500 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
   2501   ; CHECK: vtestps
   2502   ; CHECK: sete
   2503   ; CHECK: movzbl
   2504   %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2505   ret i32 %res
   2506 }
   2507 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
   2508 
   2509 
   2510 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2511   ; CHECK: vtestps
   2512   ; CHECK: sete
   2513   ; CHECK: movzbl
   2514   %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   2515   ret i32 %res
   2516 }
   2517 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2518 
   2519 
   2520 define void @test_x86_avx_vzeroall() {
   2521   ; CHECK: vzeroall
   2522   call void @llvm.x86.avx.vzeroall()
   2523   ret void
   2524 }
   2525 declare void @llvm.x86.avx.vzeroall() nounwind
   2526 
   2527 
   2528 define void @test_x86_avx_vzeroupper() {
   2529   ; CHECK: vzeroupper
   2530   call void @llvm.x86.avx.vzeroupper()
   2531   ret void
   2532 }
   2533 declare void @llvm.x86.avx.vzeroupper() nounwind
   2534 
   2535 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
   2536 
   2537 ; CHECK: monitor
   2538 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
   2539 entry:
   2540   tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
   2541   ret void
   2542 }
   2543 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
   2544 
   2545 ; CHECK: mwait
   2546 define void @mwait(i32 %E, i32 %H) nounwind {
   2547 entry:
   2548   tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
   2549   ret void
   2550 }
   2551 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
   2552 
   2553 ; CHECK: sfence
   2554 define void @sfence() nounwind {
   2555 entry:
   2556   tail call void @llvm.x86.sse.sfence()
   2557   ret void
   2558 }
   2559 declare void @llvm.x86.sse.sfence() nounwind
   2560 
   2561 ; CHECK: lfence
   2562 define void @lfence() nounwind {
   2563 entry:
   2564   tail call void @llvm.x86.sse2.lfence()
   2565   ret void
   2566 }
   2567 declare void @llvm.x86.sse2.lfence() nounwind
   2568 
   2569 ; CHECK: mfence
   2570 define void @mfence() nounwind {
   2571 entry:
   2572   tail call void @llvm.x86.sse2.mfence()
   2573   ret void
   2574 }
   2575 declare void @llvm.x86.sse2.mfence() nounwind
   2576 
   2577 ; CHECK: clflush
   2578 define void @clflush(i8* %p) nounwind {
   2579 entry:
   2580   tail call void @llvm.x86.sse2.clflush(i8* %p)
   2581   ret void
   2582 }
   2583 declare void @llvm.x86.sse2.clflush(i8*) nounwind
   2584 
   2585 ; CHECK: crc32b
   2586 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
   2587   %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
   2588   ret i32 %tmp
   2589 }
   2590 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
   2591 
   2592 ; CHECK: crc32w
   2593 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
   2594   %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
   2595   ret i32 %tmp
   2596 }
   2597 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
   2598 
   2599 ; CHECK: crc32l
   2600 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
   2601   %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
   2602   ret i32 %tmp
   2603 }
   2604 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
   2605 
   2606 ; CHECK: movntdq
   2607 define void @movnt_dq(i8* %p, <4 x i64> %a1) nounwind {
   2608   %a2 = add <4 x i64> %a1, <i64 1, i64 1, i64 1, i64 1>
   2609   tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a2) nounwind
   2610   ret void
   2611 }
   2612 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
   2613 
   2614 ; CHECK: movntps
   2615 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
   2616   tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
   2617   ret void
   2618 }
   2619 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
   2620 
   2621 ; CHECK: movntpd
   2622 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
   2623   ; add operation forces the execution domain.
   2624   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
   2625   tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
   2626   ret void
   2627 }
   2628 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
   2629 
   2630 
   2631 ; Check for pclmulqdq
   2632 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
   2633 ; CHECK: vpclmulqdq
   2634   %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
   2635   ret <2 x i64> %res
   2636 }
   2637 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
   2638