Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
      2 
      3 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
      4   ; CHECK: vaesdec
      5   %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
      6   ret <2 x i64> %res
      7 }
      8 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
      9 
     10 
     11 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
     12   ; CHECK: vaesdeclast
     13   %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     14   ret <2 x i64> %res
     15 }
     16 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
     17 
     18 
     19 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
     20   ; CHECK: vaesenc
     21   %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     22   ret <2 x i64> %res
     23 }
     24 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
     25 
     26 
     27 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
     28   ; CHECK: vaesenclast
     29   %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     30   ret <2 x i64> %res
     31 }
     32 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
     33 
     34 
     35 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
     36   ; CHECK: vaesimc
     37   %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
     38   ret <2 x i64> %res
     39 }
     40 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
     41 
     42 
     43 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
     44   ; CHECK: vaeskeygenassist
     45   %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
     46   ret <2 x i64> %res
     47 }
     48 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
     49 
     50 
     51 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
     52   ; CHECK: vaddsd
     53   %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
     54   ret <2 x double> %res
     55 }
     56 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
     57 
     58 
     59 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
     60   ; CHECK: vcmpordpd
     61   %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     62   ret <2 x double> %res
     63 }
     64 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
     65 
     66 
     67 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
     68   ; CHECK: vcmpordsd
     69   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     70   ret <2 x double> %res
     71 }
     72 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
     73 
     74 
     75 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
     76   ; CHECK: vcomisd
     77   ; CHECK: sete
     78   ; CHECK: movzbl
     79   %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     80   ret i32 %res
     81 }
     82 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
     83 
     84 
     85 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
     86   ; CHECK: vcomisd
     87   ; CHECK: setae
     88   ; CHECK: movzbl
     89   %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
     90   ret i32 %res
     91 }
     92 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
     93 
     94 
     95 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
     96   ; CHECK: vcomisd
     97   ; CHECK: seta
     98   ; CHECK: movzbl
     99   %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    100   ret i32 %res
    101 }
    102 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
    103 
    104 
    105 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
    106   ; CHECK: vcomisd
    107   ; CHECK: setbe
    108   ; CHECK: movzbl
    109   %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    110   ret i32 %res
    111 }
    112 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
    113 
    114 
    115 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
    116   ; CHECK: vcomisd
    117   ; CHECK: sbbl    %eax, %eax
    118   ; CHECK: andl    $1, %eax
    119   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    120   ret i32 %res
    121 }
    122 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
    123 
    124 
    125 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
    126   ; CHECK: vcomisd
    127   ; CHECK: setne
    128   ; CHECK: movzbl
    129   %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    130   ret i32 %res
    131 }
    132 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
    133 
    134 
    135 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
    136   ; CHECK: vcvtdq2pd
    137   %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
    138   ret <2 x double> %res
    139 }
    140 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
    141 
    142 
    143 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
    144   ; CHECK: vcvtdq2ps
    145   %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
    146   ret <4 x float> %res
    147 }
    148 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
    149 
    150 
    151 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
    152   ; CHECK: vcvtpd2dq
    153   %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    154   ret <4 x i32> %res
    155 }
    156 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
    157 
    158 
    159 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
    160   ; CHECK: vcvtpd2ps
    161   %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
    162   ret <4 x float> %res
    163 }
    164 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
    165 
    166 
    167 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
    168   ; CHECK: vcvtps2dq
    169   %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    170   ret <4 x i32> %res
    171 }
    172 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
    173 
    174 
    175 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
    176   ; CHECK: vcvtps2pd
    177   %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
    178   ret <2 x double> %res
    179 }
    180 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
    181 
    182 
    183 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
    184   ; CHECK: vcvtsd2si
    185   %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    186   ret i32 %res
    187 }
    188 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
    189 
    190 
    191 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
    192   ; CHECK: vcvtsd2ss
    193   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
    194   ret <4 x float> %res
    195 }
    196 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
    197 
    198 
    199 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
    200   ; CHECK: movl
    201   ; CHECK: vcvtsi2sd
    202   %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
    203   ret <2 x double> %res
    204 }
    205 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
    206 
    207 
    208 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
    209   ; CHECK: vcvtss2sd
    210   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
    211   ret <2 x double> %res
    212 }
    213 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
    214 
    215 
    216 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
    217   ; CHECK: vcvttpd2dq
    218   %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    219   ret <4 x i32> %res
    220 }
    221 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
    222 
    223 
    224 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
    225   ; CHECK: vcvttps2dq
    226   %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    227   ret <4 x i32> %res
    228 }
    229 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
    230 
    231 
    232 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
    233   ; CHECK: vcvttsd2si
    234   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    235   ret i32 %res
    236 }
    237 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
    238 
    239 
    240 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
    241   ; CHECK: vdivsd
    242   %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    243   ret <2 x double> %res
    244 }
    245 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
    246 
    247 
    248 define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
    249   ; CHECK: movl
    250   ; CHECK: vmovups
    251   %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
    252   ret <16 x i8> %res
    253 }
    254 declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
    255 
    256 
    257 define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
    258   ; CHECK: movl
    259   ; CHECK: vmovups
    260   %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
    261   ret <2 x double> %res
    262 }
    263 declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
    264 
    265 
    266 define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
    267   ; CHECK: pushl
    268   ; CHECK: movl
    269   ; CHECK: vmaskmovdqu
    270   ; CHECK: popl
    271   call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
    272   ret void
    273 }
    274 declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
    275 
    276 
    277 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
    278   ; CHECK: vmaxpd
    279   %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    280   ret <2 x double> %res
    281 }
    282 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
    283 
    284 
    285 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
    286   ; CHECK: vmaxsd
    287   %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    288   ret <2 x double> %res
    289 }
    290 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
    291 
    292 
    293 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
    294   ; CHECK: vminpd
    295   %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    296   ret <2 x double> %res
    297 }
    298 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
    299 
    300 
    301 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
    302   ; CHECK: vminsd
    303   %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    304   ret <2 x double> %res
    305 }
    306 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
    307 
    308 
    309 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
    310   ; CHECK: vmovmskpd
    311   %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
    312   ret i32 %res
    313 }
    314 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
    315 
    316 
    317 define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
    318   ; CHECK: movl
    319   ; CHECK: vmovntdq
    320   call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
    321   ret void
    322 }
    323 declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
    324 
    325 
    326 define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
    327   ; CHECK: movl
    328   ; CHECK: vmovntpd
    329   call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
    330   ret void
    331 }
    332 declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
    333 
    334 
    335 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
    336   ; CHECK: vmulsd
    337   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    338   ret <2 x double> %res
    339 }
    340 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
    341 
    342 
    343 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
    344   ; CHECK: vpackssdw
    345   %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
    346   ret <8 x i16> %res
    347 }
    348 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
    349 
    350 
    351 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    352   ; CHECK: vpacksswb
    353   %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    354   ret <16 x i8> %res
    355 }
    356 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    357 
    358 
    359 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    360   ; CHECK: vpackuswb
    361   %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    362   ret <16 x i8> %res
    363 }
    364 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    365 
    366 
    367 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
    368   ; CHECK: vpaddsb
    369   %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    370   ret <16 x i8> %res
    371 }
    372 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
    373 
    374 
    375 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
    376   ; CHECK: vpaddsw
    377   %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    378   ret <8 x i16> %res
    379 }
    380 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
    381 
    382 
    383 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
    384   ; CHECK: vpaddusb
    385   %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    386   ret <16 x i8> %res
    387 }
    388 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
    389 
    390 
    391 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
    392   ; CHECK: vpaddusw
    393   %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    394   ret <8 x i16> %res
    395 }
    396 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
    397 
    398 
    399 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
    400   ; CHECK: vpavgb
    401   %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    402   ret <16 x i8> %res
    403 }
    404 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
    405 
    406 
    407 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
    408   ; CHECK: vpavgw
    409   %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    410   ret <8 x i16> %res
    411 }
    412 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
    413 
    414 
    415 define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
    416   ; CHECK: vpcmpeqb
    417   %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    418   ret <16 x i8> %res
    419 }
    420 declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
    421 
    422 
    423 define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
    424   ; CHECK: vpcmpeqd
    425   %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    426   ret <4 x i32> %res
    427 }
    428 declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
    429 
    430 
    431 define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
    432   ; CHECK: vpcmpeqw
    433   %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    434   ret <8 x i16> %res
    435 }
    436 declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
    437 
    438 
    439 define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
    440   ; CHECK: vpcmpgtb
    441   %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    442   ret <16 x i8> %res
    443 }
    444 declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
    445 
    446 
    447 define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
    448   ; CHECK: vpcmpgtd
    449   %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    450   ret <4 x i32> %res
    451 }
    452 declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
    453 
    454 
    455 define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
    456   ; CHECK: vpcmpgtw
    457   %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    458   ret <8 x i16> %res
    459 }
    460 declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
    461 
    462 
    463 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
    464   ; CHECK: vpmaddwd
    465   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
    466   ret <4 x i32> %res
    467 }
    468 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
    469 
    470 
    471 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
    472   ; CHECK: vpmaxsw
    473   %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    474   ret <8 x i16> %res
    475 }
    476 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
    477 
    478 
    479 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
    480   ; CHECK: vpmaxub
    481   %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    482   ret <16 x i8> %res
    483 }
    484 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
    485 
    486 
    487 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
    488   ; CHECK: vpminsw
    489   %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    490   ret <8 x i16> %res
    491 }
    492 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
    493 
    494 
    495 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
    496   ; CHECK: vpminub
    497   %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    498   ret <16 x i8> %res
    499 }
    500 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
    501 
    502 
    503 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
    504   ; CHECK: vpmovmskb
    505   %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
    506   ret i32 %res
    507 }
    508 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
    509 
    510 
    511 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
    512   ; CHECK: vpmulhw
    513   %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    514   ret <8 x i16> %res
    515 }
    516 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
    517 
    518 
    519 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
    520   ; CHECK: vpmulhuw
    521   %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    522   ret <8 x i16> %res
    523 }
    524 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
    525 
    526 
    527 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
    528   ; CHECK: vpmuludq
    529   %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    530   ret <2 x i64> %res
    531 }
    532 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
    533 
    534 
    535 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
    536   ; CHECK: vpsadbw
    537   %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
    538   ret <2 x i64> %res
    539 }
    540 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
    541 
    542 
    543 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
    544   ; CHECK: vpslld
    545   %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    546   ret <4 x i32> %res
    547 }
    548 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
    549 
    550 
    551 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
    552   ; CHECK: vpslldq
    553   %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    554   ret <2 x i64> %res
    555 }
    556 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
    557 
    558 
    559 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
    560   ; CHECK: vpslldq
    561   %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    562   ret <2 x i64> %res
    563 }
    564 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
    565 
    566 
    567 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
    568   ; CHECK: vpsllq
    569   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    570   ret <2 x i64> %res
    571 }
    572 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
    573 
    574 
    575 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
    576   ; CHECK: vpsllw
    577   %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    578   ret <8 x i16> %res
    579 }
    580 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
    581 
    582 
    583 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
    584   ; CHECK: vpslld
    585   %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    586   ret <4 x i32> %res
    587 }
    588 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
    589 
    590 
    591 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
    592   ; CHECK: vpsllq
    593   %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    594   ret <2 x i64> %res
    595 }
    596 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
    597 
    598 
    599 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
    600   ; CHECK: vpsllw
    601   %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    602   ret <8 x i16> %res
    603 }
    604 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
    605 
    606 
    607 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
    608   ; CHECK: vpsrad
    609   %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    610   ret <4 x i32> %res
    611 }
    612 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
    613 
    614 
    615 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
    616   ; CHECK: vpsraw
    617   %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    618   ret <8 x i16> %res
    619 }
    620 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
    621 
    622 
    623 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
    624   ; CHECK: vpsrad
    625   %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    626   ret <4 x i32> %res
    627 }
    628 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
    629 
    630 
    631 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
    632   ; CHECK: vpsraw
    633   %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    634   ret <8 x i16> %res
    635 }
    636 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
    637 
    638 
    639 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
    640   ; CHECK: vpsrld
    641   %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    642   ret <4 x i32> %res
    643 }
    644 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
    645 
    646 
    647 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
    648   ; CHECK: vpsrldq
    649   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    650   ret <2 x i64> %res
    651 }
    652 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
    653 
    654 
    655 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
    656   ; CHECK: vpsrldq
    657   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    658   ret <2 x i64> %res
    659 }
    660 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
    661 
    662 
    663 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
    664   ; CHECK: vpsrlq
    665   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    666   ret <2 x i64> %res
    667 }
    668 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
    669 
    670 
    671 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
    672   ; CHECK: vpsrlw
    673   %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    674   ret <8 x i16> %res
    675 }
    676 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
    677 
    678 
    679 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
    680   ; CHECK: vpsrld
    681   %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    682   ret <4 x i32> %res
    683 }
    684 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
    685 
    686 
    687 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
    688   ; CHECK: vpsrlq
    689   %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    690   ret <2 x i64> %res
    691 }
    692 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
    693 
    694 
    695 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
    696   ; CHECK: vpsrlw
    697   %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    698   ret <8 x i16> %res
    699 }
    700 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
    701 
    702 
    703 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
    704   ; CHECK: vpsubsb
    705   %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    706   ret <16 x i8> %res
    707 }
    708 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
    709 
    710 
    711 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
    712   ; CHECK: vpsubsw
    713   %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    714   ret <8 x i16> %res
    715 }
    716 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
    717 
    718 
    719 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
    720   ; CHECK: vpsubusb
    721   %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    722   ret <16 x i8> %res
    723 }
    724 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
    725 
    726 
    727 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
    728   ; CHECK: vpsubusw
    729   %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    730   ret <8 x i16> %res
    731 }
    732 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
    733 
    734 
    735 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
    736   ; CHECK: vsqrtpd
    737   %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    738   ret <2 x double> %res
    739 }
    740 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
    741 
    742 
    743 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
    744   ; CHECK: vsqrtsd
    745   %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    746   ret <2 x double> %res
    747 }
    748 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
    749 
    750 
    751 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
    752   ; CHECK: movl
    753   ; CHECK: vmovq
    754   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
    755   ret void
    756 }
    757 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
    758 
    759 
    760 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
    761   ; CHECK: movl
    762   ; CHECK: vmovdqu
    763   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
    764   ret void
    765 }
    766 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
    767 
    768 
    769 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
    770   ; CHECK: movl
    771   ; CHECK: vmovupd
    772   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
    773   ret void
    774 }
    775 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
    776 
    777 
    778 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
    779   ; CHECK: vsubsd
    780   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    781   ret <2 x double> %res
    782 }
    783 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
    784 
    785 
    786 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
    787   ; CHECK: vucomisd
    788   ; CHECK: sete
    789   ; CHECK: movzbl
    790   %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    791   ret i32 %res
    792 }
    793 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
    794 
    795 
    796 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
    797   ; CHECK: vucomisd
    798   ; CHECK: setae
    799   ; CHECK: movzbl
    800   %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    801   ret i32 %res
    802 }
    803 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
    804 
    805 
    806 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
    807   ; CHECK: vucomisd
    808   ; CHECK: seta
    809   ; CHECK: movzbl
    810   %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    811   ret i32 %res
    812 }
    813 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
    814 
    815 
    816 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
    817   ; CHECK: vucomisd
    818   ; CHECK: setbe
    819   ; CHECK: movzbl
    820   %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    821   ret i32 %res
    822 }
    823 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
    824 
    825 
    826 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
    827   ; CHECK: vucomisd
    828   ; CHECK: sbbl
    829   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    830   ret i32 %res
    831 }
    832 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
    833 
    834 
    835 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
    836   ; CHECK: vucomisd
    837   ; CHECK: setne
    838   ; CHECK: movzbl
    839   %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    840   ret i32 %res
    841 }
    842 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
    843 
    844 
    845 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
    846   ; CHECK: vaddsubpd
    847   %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    848   ret <2 x double> %res
    849 }
    850 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
    851 
    852 
    853 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
    854   ; CHECK: vaddsubps
    855   %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
    856   ret <4 x float> %res
    857 }
    858 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
    859 
    860 
    861 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
    862   ; CHECK: vhaddpd
    863   %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    864   ret <2 x double> %res
    865 }
    866 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
    867 
    868 
    869 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
    870   ; CHECK: vhaddps
    871   %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
    872   ret <4 x float> %res
    873 }
    874 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
    875 
    876 
    877 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
    878   ; CHECK: vhsubpd
    879   %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    880   ret <2 x double> %res
    881 }
    882 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
    883 
    884 
    885 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
    886   ; CHECK: vhsubps
    887   %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
    888   ret <4 x float> %res
    889 }
    890 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
    891 
    892 
    893 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
    894   ; CHECK: movl
    895   ; CHECK: vlddqu
    896   %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
    897   ret <16 x i8> %res
    898 }
    899 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
    900 
    901 
    902 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
    903   ; CHECK: vblendpd
    904   %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
    905   ret <2 x double> %res
    906 }
    907 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
    908 
    909 
    910 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
    911   ; CHECK: vblendps
    912   %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
    913   ret <4 x float> %res
    914 }
    915 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
    916 
    917 
    918 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
    919   ; CHECK: vblendvpd
    920   %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
    921   ret <2 x double> %res
    922 }
    923 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    924 
    925 
    926 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
    927   ; CHECK: vblendvps
    928   %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
    929   ret <4 x float> %res
    930 }
    931 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    932 
    933 
    934 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
    935   ; CHECK: vdppd
    936   %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
    937   ret <2 x double> %res
    938 }
    939 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
    940 
    941 
    942 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
    943   ; CHECK: vdpps
    944   %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
    945   ret <4 x float> %res
    946 }
    947 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
    948 
    949 
    950 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
    951   ; CHECK: vinsertps
    952   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
    953   ret <4 x float> %res
    954 }
    955 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
    956 
    957 
    958 define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
    959   ; CHECK: movl
    960   ; CHECK: vmovntdqa
    961   %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
    962   ret <2 x i64> %res
    963 }
    964 declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
    965 
    966 
    967 define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
    968   ; CHECK: vmpsadbw
    969   %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1]
    970   ret <16 x i8> %res
    971 }
    972 declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
    973 
    974 
    975 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
    976   ; CHECK: vpackusdw
    977   %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
    978   ret <8 x i16> %res
    979 }
    980 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
    981 
    982 
    983 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
    984   ; CHECK: vpblendvb
    985   %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
    986   ret <16 x i8> %res
    987 }
    988 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
    989 
    990 
    991 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
    992   ; CHECK: vpblendw
    993   %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
    994   ret <8 x i16> %res
    995 }
    996 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
    997 
    998 
    999 define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
   1000   ; CHECK: vpcmpeqq
   1001   %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
   1002   ret <2 x i64> %res
   1003 }
   1004 declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
   1005 
   1006 
   1007 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
   1008   ; CHECK: vphminposuw
   1009   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
   1010   ret <8 x i16> %res
   1011 }
   1012 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
   1013 
   1014 
   1015 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
   1016   ; CHECK: vpmaxsb
   1017   %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1018   ret <16 x i8> %res
   1019 }
   1020 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
   1021 
   1022 
   1023 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
   1024   ; CHECK: vpmaxsd
   1025   %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1026   ret <4 x i32> %res
   1027 }
   1028 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
   1029 
   1030 
   1031 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
   1032   ; CHECK: vpmaxud
   1033   %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1034   ret <4 x i32> %res
   1035 }
   1036 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
   1037 
   1038 
   1039 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
   1040   ; CHECK: vpmaxuw
   1041   %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1042   ret <8 x i16> %res
   1043 }
   1044 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
   1045 
   1046 
   1047 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
   1048   ; CHECK: vpminsb
   1049   %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1050   ret <16 x i8> %res
   1051 }
   1052 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
   1053 
   1054 
   1055 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
   1056   ; CHECK: vpminsd
   1057   %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1058   ret <4 x i32> %res
   1059 }
   1060 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
   1061 
   1062 
   1063 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
   1064   ; CHECK: vpminud
   1065   %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1066   ret <4 x i32> %res
   1067 }
   1068 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
   1069 
   1070 
   1071 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
   1072   ; CHECK: vpminuw
   1073   %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1074   ret <8 x i16> %res
   1075 }
   1076 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
   1077 
   1078 
   1079 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
   1080   ; CHECK: vpmovsxbd
   1081   %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
   1082   ret <4 x i32> %res
   1083 }
   1084 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
   1085 
   1086 
   1087 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
   1088   ; CHECK: vpmovsxbq
   1089   %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
   1090   ret <2 x i64> %res
   1091 }
   1092 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
   1093 
   1094 
   1095 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
   1096   ; CHECK: vpmovsxbw
   1097   %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
   1098   ret <8 x i16> %res
   1099 }
   1100 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
   1101 
   1102 
   1103 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
   1104   ; CHECK: vpmovsxdq
   1105   %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
   1106   ret <2 x i64> %res
   1107 }
   1108 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
   1109 
   1110 
   1111 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
   1112   ; CHECK: vpmovsxwd
   1113   %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
   1114   ret <4 x i32> %res
   1115 }
   1116 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
   1117 
   1118 
   1119 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
   1120   ; CHECK: vpmovsxwq
   1121   %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
   1122   ret <2 x i64> %res
   1123 }
   1124 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
   1125 
   1126 
   1127 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
   1128   ; CHECK: vpmovzxbd
   1129   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
   1130   ret <4 x i32> %res
   1131 }
   1132 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
   1133 
   1134 
   1135 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
   1136   ; CHECK: vpmovzxbq
   1137   %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
   1138   ret <2 x i64> %res
   1139 }
   1140 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
   1141 
   1142 
   1143 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
   1144   ; CHECK: vpmovzxbw
   1145   %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
   1146   ret <8 x i16> %res
   1147 }
   1148 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
   1149 
   1150 
   1151 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
   1152   ; CHECK: vpmovzxdq
   1153   %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
   1154   ret <2 x i64> %res
   1155 }
   1156 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
   1157 
   1158 
   1159 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
   1160   ; CHECK: vpmovzxwd
   1161   %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
   1162   ret <4 x i32> %res
   1163 }
   1164 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
   1165 
   1166 
   1167 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
   1168   ; CHECK: vpmovzxwq
   1169   %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
   1170   ret <2 x i64> %res
   1171 }
   1172 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
   1173 
   1174 
   1175 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
   1176   ; CHECK: vpmuldq
   1177   %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
   1178   ret <2 x i64> %res
   1179 }
   1180 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
   1181 
   1182 
   1183 define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
   1184   ; CHECK: vptest 
   1185   ; CHECK: sbbl
   1186   %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1187   ret i32 %res
   1188 }
   1189 declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
   1190 
   1191 
   1192 define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
   1193   ; CHECK: vptest 
   1194   ; CHECK: seta
   1195   ; CHECK: movzbl
   1196   %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1197   ret i32 %res
   1198 }
   1199 declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
   1200 
   1201 
   1202 define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
   1203   ; CHECK: vptest 
   1204   ; CHECK: sete
   1205   ; CHECK: movzbl
   1206   %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1207   ret i32 %res
   1208 }
   1209 declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
   1210 
   1211 
   1212 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
   1213   ; CHECK: vroundpd
   1214   %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
   1215   ret <2 x double> %res
   1216 }
   1217 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
   1218 
   1219 
   1220 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
   1221   ; CHECK: vroundps
   1222   %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
   1223   ret <4 x float> %res
   1224 }
   1225 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
   1226 
   1227 
   1228 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
   1229   ; CHECK: vroundsd
   1230   %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
   1231   ret <2 x double> %res
   1232 }
   1233 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
   1234 
   1235 
   1236 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
   1237   ; CHECK: vroundss
   1238   %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
   1239   ret <4 x float> %res
   1240 }
   1241 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
   1242 
   1243 
   1244 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
   1245   ; CHECK: movl
   1246   ; CHECK: movl
   1247   ; CHECK: vpcmpestri
   1248   ; CHECK: movl
   1249   %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1250   ret i32 %res
   1251 }
   1252 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1253 
   1254 
   1255 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
   1256   ; CHECK: movl
   1257   ; CHECK: movl
   1258   ; CHECK: vpcmpestri
   1259   ; CHECK: movl
   1260   %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1261   ret i32 %res
   1262 }
   1263 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1264 
   1265 
   1266 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
   1267   ; CHECK: movl
   1268   ; CHECK: movl
   1269   ; CHECK: vpcmpestri
   1270   ; CHECK: movl
   1271   %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1272   ret i32 %res
   1273 }
   1274 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1275 
   1276 
   1277 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
   1278   ; CHECK: movl
   1279   ; CHECK: movl
   1280   ; CHECK: vpcmpestri
   1281   ; CHECK: movl
   1282   %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1283   ret i32 %res
   1284 }
   1285 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1286 
   1287 
   1288 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
   1289   ; CHECK: movl
   1290   ; CHECK: movl
   1291   ; CHECK: vpcmpestri
   1292   ; CHECK: movl
   1293   %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1294   ret i32 %res
   1295 }
   1296 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1297 
   1298 
   1299 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
   1300   ; CHECK: movl
   1301   ; CHECK: movl
   1302   ; CHECK: vpcmpestri
   1303   ; CHECK: movl
   1304   %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1305   ret i32 %res
   1306 }
   1307 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1308 
   1309 
   1310 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
   1311   ; CHECK: movl
   1312   ; CHECK: movl
   1313   ; CHECK: vpcmpestrm
   1314   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   1315   ret <16 x i8> %res
   1316 }
   1317 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1318 
   1319 
   1320 define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
   1321   ; CHECK: vpcmpgtq
   1322   %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
   1323   ret <2 x i64> %res
   1324 }
   1325 declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
   1326 
   1327 
   1328 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
   1329   ; CHECK: vpcmpistri
   1330   ; CHECK: movl
   1331   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1332   ret i32 %res
   1333 }
   1334 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1335 
   1336 
   1337 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
   1338   ; CHECK: vpcmpistri
   1339   ; CHECK: movl
   1340   %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1341   ret i32 %res
   1342 }
   1343 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1344 
   1345 
   1346 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
   1347   ; CHECK: vpcmpistri
   1348   ; CHECK: movl
   1349   %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1350   ret i32 %res
   1351 }
   1352 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1353 
   1354 
   1355 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
   1356   ; CHECK: vpcmpistri
   1357   ; CHECK: movl
   1358   %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1359   ret i32 %res
   1360 }
   1361 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1362 
   1363 
   1364 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
   1365   ; CHECK: vpcmpistri
   1366   ; CHECK: movl
   1367   %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1368   ret i32 %res
   1369 }
   1370 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1371 
   1372 
   1373 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
   1374   ; CHECK: vpcmpistri
   1375   ; CHECK: movl
   1376   %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1377   ret i32 %res
   1378 }
   1379 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1380 
   1381 
   1382 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
   1383   ; CHECK: vpcmpistrm
   1384   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
   1385   ret <16 x i8> %res
   1386 }
   1387 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1388 
   1389 
   1390 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
   1391   ; CHECK: vaddss
   1392   %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1393   ret <4 x float> %res
   1394 }
   1395 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
   1396 
   1397 
   1398 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
   1399   ; CHECK: vcmpordps
   1400   %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1401   ret <4 x float> %res
   1402 }
   1403 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
   1404 
   1405 
   1406 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
   1407   ; CHECK: vcmpordss
   1408   %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1409   ret <4 x float> %res
   1410 }
   1411 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
   1412 
   1413 
   1414 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
   1415   ; CHECK: vcomiss
   1416   ; CHECK: sete
   1417   ; CHECK: movzbl
   1418   %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1419   ret i32 %res
   1420 }
   1421 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
   1422 
   1423 
   1424 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
   1425   ; CHECK: vcomiss
   1426   ; CHECK: setae
   1427   ; CHECK: movzbl
   1428   %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1429   ret i32 %res
   1430 }
   1431 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
   1432 
   1433 
   1434 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
   1435   ; CHECK: vcomiss
   1436   ; CHECK: seta
   1437   ; CHECK: movzbl
   1438   %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1439   ret i32 %res
   1440 }
   1441 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
   1442 
   1443 
   1444 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
   1445   ; CHECK: vcomiss
   1446   ; CHECK: setbe
   1447   ; CHECK: movzbl
   1448   %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1449   ret i32 %res
   1450 }
   1451 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
   1452 
   1453 
   1454 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
   1455   ; CHECK: vcomiss
   1456   ; CHECK: sbb
   1457   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1458   ret i32 %res
   1459 }
   1460 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
   1461 
   1462 
   1463 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
   1464   ; CHECK: vcomiss
   1465   ; CHECK: setne
   1466   ; CHECK: movzbl
   1467   %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1468   ret i32 %res
   1469 }
   1470 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
   1471 
   1472 
   1473 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
   1474   ; CHECK: movl
   1475   ; CHECK: vcvtsi2ss
   1476   %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
   1477   ret <4 x float> %res
   1478 }
   1479 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
   1480 
   1481 
   1482 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
   1483   ; CHECK: vcvtss2si
   1484   %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
   1485   ret i32 %res
   1486 }
   1487 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
   1488 
   1489 
   1490 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
   1491   ; CHECK: vcvttss2si
   1492   %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
   1493   ret i32 %res
   1494 }
   1495 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
   1496 
   1497 
   1498 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
   1499   ; CHECK: vdivss
   1500   %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1501   ret <4 x float> %res
   1502 }
   1503 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
   1504 
   1505 
   1506 define void @test_x86_sse_ldmxcsr(i8* %a0) {
   1507   ; CHECK: movl
   1508   ; CHECK: vldmxcsr
   1509   call void @llvm.x86.sse.ldmxcsr(i8* %a0)
   1510   ret void
   1511 }
   1512 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
   1513 
   1514 
   1515 define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
   1516   ; CHECK: movl
   1517   ; CHECK: vmovups
   1518   %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
   1519   ret <4 x float> %res
   1520 }
   1521 declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
   1522 
   1523 
   1524 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
   1525   ; CHECK: vmaxps
   1526   %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1527   ret <4 x float> %res
   1528 }
   1529 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
   1530 
   1531 
   1532 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
   1533   ; CHECK: vmaxss
   1534   %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1535   ret <4 x float> %res
   1536 }
   1537 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
   1538 
   1539 
   1540 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
   1541   ; CHECK: vminps
   1542   %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1543   ret <4 x float> %res
   1544 }
   1545 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
   1546 
   1547 
   1548 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
   1549   ; CHECK: vminss
   1550   %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1551   ret <4 x float> %res
   1552 }
   1553 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
   1554 
   1555 
   1556 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
   1557   ; CHECK: vmovmskps
   1558   %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
   1559   ret i32 %res
   1560 }
   1561 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
   1562 
   1563 
   1564 define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
   1565   ; CHECK: movl
   1566   ; CHECK: vmovntps
   1567   call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
   1568   ret void
   1569 }
   1570 declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
   1571 
   1572 
   1573 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
   1574   ; CHECK: vmulss
   1575   %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1576   ret <4 x float> %res
   1577 }
   1578 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
   1579 
   1580 
   1581 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
   1582   ; CHECK: vrcpps
   1583   %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1584   ret <4 x float> %res
   1585 }
   1586 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
   1587 
   1588 
   1589 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
   1590   ; CHECK: vrcpss
   1591   %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1592   ret <4 x float> %res
   1593 }
   1594 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
   1595 
   1596 
   1597 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
   1598   ; CHECK: vrsqrtps
   1599   %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1600   ret <4 x float> %res
   1601 }
   1602 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
   1603 
   1604 
   1605 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
   1606   ; CHECK: vrsqrtss
   1607   %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1608   ret <4 x float> %res
   1609 }
   1610 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
   1611 
   1612 
   1613 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
   1614   ; CHECK: vsqrtps
   1615   %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1616   ret <4 x float> %res
   1617 }
   1618 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
   1619 
   1620 
   1621 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
   1622   ; CHECK: vsqrtss
   1623   %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1624   ret <4 x float> %res
   1625 }
   1626 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
   1627 
   1628 
   1629 define void @test_x86_sse_stmxcsr(i8* %a0) {
   1630   ; CHECK: movl
   1631   ; CHECK: vstmxcsr
   1632   call void @llvm.x86.sse.stmxcsr(i8* %a0)
   1633   ret void
   1634 }
   1635 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
   1636 
   1637 
   1638 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
   1639   ; CHECK: movl
   1640   ; CHECK: vmovups
   1641   call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
   1642   ret void
   1643 }
   1644 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
   1645 
   1646 
   1647 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
   1648   ; CHECK: vsubss
   1649   %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1650   ret <4 x float> %res
   1651 }
   1652 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
   1653 
   1654 
   1655 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
   1656   ; CHECK: vucomiss
   1657   ; CHECK: sete
   1658   ; CHECK: movzbl
   1659   %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1660   ret i32 %res
   1661 }
   1662 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
   1663 
   1664 
   1665 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
   1666   ; CHECK: vucomiss
   1667   ; CHECK: setae
   1668   ; CHECK: movzbl
   1669   %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1670   ret i32 %res
   1671 }
   1672 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
   1673 
   1674 
   1675 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
   1676   ; CHECK: vucomiss
   1677   ; CHECK: seta
   1678   ; CHECK: movzbl
   1679   %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1680   ret i32 %res
   1681 }
   1682 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
   1683 
   1684 
   1685 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
   1686   ; CHECK: vucomiss
   1687   ; CHECK: setbe
   1688   ; CHECK: movzbl
   1689   %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1690   ret i32 %res
   1691 }
   1692 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
   1693 
   1694 
   1695 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
   1696   ; CHECK: vucomiss
   1697   ; CHECK: sbbl
   1698   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1699   ret i32 %res
   1700 }
   1701 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
   1702 
   1703 
   1704 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
   1705   ; CHECK: vucomiss
   1706   ; CHECK: setne
   1707   ; CHECK: movzbl
   1708   %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1709   ret i32 %res
   1710 }
   1711 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
   1712 
   1713 
   1714 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
   1715   ; CHECK: vpabsb
   1716   %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
   1717   ret <16 x i8> %res
   1718 }
   1719 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
   1720 
   1721 
   1722 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
   1723   ; CHECK: vpabsd
   1724   %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
   1725   ret <4 x i32> %res
   1726 }
   1727 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
   1728 
   1729 
   1730 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
   1731   ; CHECK: vpabsw
   1732   %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
   1733   ret <8 x i16> %res
   1734 }
   1735 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
   1736 
   1737 
   1738 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   1739   ; CHECK: vphaddd
   1740   %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1741   ret <4 x i32> %res
   1742 }
   1743 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   1744 
   1745 
   1746 define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
   1747   ; CHECK: vphaddsw
   1748   %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1749   ret <4 x i32> %res
   1750 }
   1751 declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
   1752 
   1753 
   1754 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   1755   ; CHECK: vphaddw
   1756   %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1757   ret <8 x i16> %res
   1758 }
   1759 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   1760 
   1761 
   1762 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   1763   ; CHECK: vphsubd
   1764   %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1765   ret <4 x i32> %res
   1766 }
   1767 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   1768 
   1769 
   1770 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   1771   ; CHECK: vphsubsw
   1772   %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1773   ret <8 x i16> %res
   1774 }
   1775 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   1776 
   1777 
   1778 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   1779   ; CHECK: vphsubw
   1780   %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1781   ret <8 x i16> %res
   1782 }
   1783 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   1784 
   1785 
   1786 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   1787   ; CHECK: vpmaddubsw
   1788   %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1789   ret <8 x i16> %res
   1790 }
   1791 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   1792 
   1793 
   1794 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   1795   ; CHECK: vpmulhrsw
   1796   %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1797   ret <8 x i16> %res
   1798 }
   1799 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   1800 
   1801 
   1802 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   1803   ; CHECK: vpshufb
   1804   %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1805   ret <16 x i8> %res
   1806 }
   1807 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   1808 
   1809 
   1810 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   1811   ; CHECK: vpsignb
   1812   %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1813   ret <16 x i8> %res
   1814 }
   1815 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   1816 
   1817 
   1818 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   1819   ; CHECK: vpsignd
   1820   %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1821   ret <4 x i32> %res
   1822 }
   1823 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   1824 
   1825 
   1826 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   1827   ; CHECK: vpsignw
   1828   %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1829   ret <8 x i16> %res
   1830 }
   1831 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   1832 
   1833 
   1834 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1835   ; CHECK: vaddsubpd
   1836   %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   1837   ret <4 x double> %res
   1838 }
   1839 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
   1840 
   1841 
   1842 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1843   ; CHECK: vaddsubps
   1844   %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   1845   ret <8 x float> %res
   1846 }
   1847 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
   1848 
   1849 
   1850 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1851   ; CHECK: vblendpd
   1852   %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
   1853   ret <4 x double> %res
   1854 }
   1855 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
   1856 
   1857 
   1858 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1859   ; CHECK: vblendps
   1860   %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
   1861   ret <8 x float> %res
   1862 }
   1863 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
   1864 
   1865 
   1866 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   1867   ; CHECK: vblendvpd
   1868   %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
   1869   ret <4 x double> %res
   1870 }
   1871 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
   1872 
   1873 
   1874 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   1875   ; CHECK: vblendvps
   1876   %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
   1877   ret <8 x float> %res
   1878 }
   1879 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
   1880 
   1881 
   1882 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1883   ; CHECK: vcmpordpd
   1884   %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   1885   ret <4 x double> %res
   1886 }
   1887 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
   1888 
   1889 
   1890 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1891   ; CHECK: vcmpordps
   1892   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   1893   ret <8 x float> %res
   1894 }
   1895 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   1896 
   1897 
   1898 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
   1899   ; CHECK: vcvtpd2psy
   1900   %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
   1901   ret <4 x float> %res
   1902 }
   1903 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
   1904 
   1905 
   1906 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
   1907   ; CHECK: vcvtpd2dqy
   1908   %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
   1909   ret <4 x i32> %res
   1910 }
   1911 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
   1912 
   1913 
   1914 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
   1915   ; CHECK: vcvtps2pd
   1916   %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
   1917   ret <4 x double> %res
   1918 }
   1919 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
   1920 
   1921 
   1922 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
   1923   ; CHECK: vcvtps2dq
   1924   %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
   1925   ret <8 x i32> %res
   1926 }
   1927 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
   1928 
   1929 
   1930 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
   1931   ; CHECK: vcvtdq2pd
   1932   %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
   1933   ret <4 x double> %res
   1934 }
   1935 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
   1936 
   1937 
   1938 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
   1939   ; CHECK: vcvtdq2ps
   1940   %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
   1941   ret <8 x float> %res
   1942 }
   1943 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
   1944 
   1945 
   1946 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
   1947   ; CHECK: vcvttpd2dqy
   1948   %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
   1949   ret <4 x i32> %res
   1950 }
   1951 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
   1952 
   1953 
   1954 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
   1955   ; CHECK: vcvttps2dq
   1956   %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
   1957   ret <8 x i32> %res
   1958 }
   1959 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
   1960 
   1961 
   1962 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1963   ; CHECK: vdpps
   1964   %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
   1965   ret <8 x float> %res
   1966 }
   1967 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
   1968 
   1969 
   1970 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1971   ; CHECK: vhaddpd
   1972   %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   1973   ret <4 x double> %res
   1974 }
   1975 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
   1976 
   1977 
   1978 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1979   ; CHECK: vhaddps
   1980   %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   1981   ret <8 x float> %res
   1982 }
   1983 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
   1984 
   1985 
   1986 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
   1987   ; CHECK: vhsubpd
   1988   %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   1989   ret <4 x double> %res
   1990 }
   1991 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
   1992 
   1993 
   1994 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
   1995   ; CHECK: vhsubps
   1996   %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   1997   ret <8 x float> %res
   1998 }
   1999 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2000 
   2001 
   2002 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
   2003   ; CHECK: vlddqu
   2004   %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
   2005   ret <32 x i8> %res
   2006 }
   2007 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
   2008 
   2009 
   2010 define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
   2011   ; CHECK: vmovdqu
   2012   %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
   2013   ret <32 x i8> %res
   2014 }
   2015 declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
   2016 
   2017 
   2018 define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
   2019   ; CHECK: vmovupd
   2020   %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
   2021   ret <4 x double> %res
   2022 }
   2023 declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
   2024 
   2025 
   2026 define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
   2027   ; CHECK: vmovups
   2028   %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
   2029   ret <8 x float> %res
   2030 }
   2031 declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
   2032 
   2033 
   2034 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
   2035   ; CHECK: vmaskmovpd
   2036   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   2037   ret <2 x double> %res
   2038 }
   2039 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
   2040 
   2041 
   2042 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
   2043   ; CHECK: vmaskmovpd
   2044   %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2045   ret <4 x double> %res
   2046 }
   2047 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
   2048 
   2049 
   2050 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
   2051   ; CHECK: vmaskmovps
   2052   %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   2053   ret <4 x float> %res
   2054 }
   2055 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
   2056 
   2057 
   2058 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
   2059   ; CHECK: vmaskmovps
   2060   %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2061   ret <8 x float> %res
   2062 }
   2063 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
   2064 
   2065 
   2066 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
   2067   ; CHECK: vmaskmovpd
   2068   call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
   2069   ret void
   2070 }
   2071 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
   2072 
   2073 
   2074 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
   2075   ; CHECK: vmaskmovpd
   2076   call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
   2077   ret void
   2078 }
   2079 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
   2080 
   2081 
   2082 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
   2083   ; CHECK: vmaskmovps
   2084   call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
   2085   ret void
   2086 }
   2087 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
   2088 
   2089 
   2090 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
   2091   ; CHECK: vmaskmovps
   2092   call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
   2093   ret void
   2094 }
   2095 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
   2096 
   2097 
   2098 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2099   ; CHECK: vmaxpd
   2100   %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2101   ret <4 x double> %res
   2102 }
   2103 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2104 
   2105 
   2106 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2107   ; CHECK: vmaxps
   2108   %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2109   ret <8 x float> %res
   2110 }
   2111 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2112 
   2113 
   2114 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2115   ; CHECK: vminpd
   2116   %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2117   ret <4 x double> %res
   2118 }
   2119 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2120 
   2121 
   2122 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2123   ; CHECK: vminps
   2124   %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2125   ret <8 x float> %res
   2126 }
   2127 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2128 
   2129 
   2130 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
   2131   ; CHECK: vmovmskpd
   2132   %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
   2133   ret i32 %res
   2134 }
   2135 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
   2136 
   2137 
   2138 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
   2139   ; CHECK: vmovmskps
   2140   %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
   2141   ret i32 %res
   2142 }
   2143 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
   2144 
   2145 
   2146 define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
   2147   ; CHECK: vmovntdq
   2148   call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
   2149   ret void
   2150 }
   2151 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
   2152 
   2153 
   2154 define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
   2155   ; CHECK: vmovntpd
   2156   call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
   2157   ret void
   2158 }
   2159 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
   2160 
   2161 
   2162 define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
   2163   ; CHECK: vmovntps
   2164   call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
   2165   ret void
   2166 }
   2167 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
   2168 
   2169 
   2170 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   2171   ; CHECK: vptest
   2172   ; CHECK: sbbl
   2173   %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2174   ret i32 %res
   2175 }
   2176 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
   2177 
   2178 
   2179 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
   2180   ; CHECK: vptest
   2181   ; CHECK: seta
   2182   ; CHECK: movzbl
   2183   %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2184   ret i32 %res
   2185 }
   2186 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
   2187 
   2188 
   2189 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
   2190   ; CHECK: vptest
   2191   ; CHECK: sete
   2192   ; CHECK: movzbl
   2193   %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2194   ret i32 %res
   2195 }
   2196 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
   2197 
   2198 
   2199 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
   2200   ; CHECK: vrcpps
   2201   %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2202   ret <8 x float> %res
   2203 }
   2204 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
   2205 
   2206 
   2207 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
   2208   ; CHECK: vroundpd
   2209   %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
   2210   ret <4 x double> %res
   2211 }
   2212 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
   2213 
   2214 
   2215 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
   2216   ; CHECK: vroundps
   2217   %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
   2218   ret <8 x float> %res
   2219 }
   2220 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
   2221 
   2222 
   2223 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
   2224   ; CHECK: vrsqrtps
   2225   %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2226   ret <8 x float> %res
   2227 }
   2228 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
   2229 
   2230 
   2231 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
   2232   ; CHECK: vsqrtpd
   2233   %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
   2234   ret <4 x double> %res
   2235 }
   2236 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
   2237 
   2238 
   2239 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
   2240   ; CHECK: vsqrtps
   2241   %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2242   ret <8 x float> %res
   2243 }
   2244 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
   2245 
   2246 
   2247 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
   2248   ; CHECK: vmovdqu
   2249   call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
   2250   ret void
   2251 }
   2252 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
   2253 
   2254 
   2255 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
   2256   ; CHECK: vmovupd
   2257   call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
   2258   ret void
   2259 }
   2260 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
   2261 
   2262 
   2263 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
   2264   ; CHECK: vmovups
   2265   call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
   2266   ret void
   2267 }
   2268 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
   2269 
   2270 
   2271 define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
   2272   ; CHECK: vbroadcastsd
   2273   %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
   2274   ret <4 x double> %res
   2275 }
   2276 declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
   2277 
   2278 
   2279 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
   2280   ; CHECK: vbroadcastf128
   2281   %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
   2282   ret <4 x double> %res
   2283 }
   2284 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
   2285 
   2286 
   2287 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
   2288   ; CHECK: vbroadcastf128
   2289   %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
   2290   ret <8 x float> %res
   2291 }
   2292 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
   2293 
   2294 
   2295 define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
   2296   ; CHECK: vbroadcastss
   2297   %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
   2298   ret <4 x float> %res
   2299 }
   2300 declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
   2301 
   2302 
   2303 define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
   2304   ; CHECK: vbroadcastss
   2305   %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
   2306   ret <8 x float> %res
   2307 }
   2308 declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
   2309 
   2310 
   2311 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
   2312   ; CHECK: vextractf128
   2313   %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
   2314   ret <2 x double> %res
   2315 }
   2316 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
   2317 
   2318 
   2319 define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
   2320   ; CHECK: vextractf128
   2321   %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   2322   ret <4 x float> %res
   2323 }
   2324 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
   2325 
   2326 
   2327 define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
   2328   ; CHECK: vextractf128
   2329   %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
   2330   ret <4 x i32> %res
   2331 }
   2332 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
   2333 
   2334 
   2335 define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) {
   2336   ; CHECK: vinsertf128
   2337   %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   2338   ret <4 x double> %res
   2339 }
   2340 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
   2341 
   2342 
   2343 define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) {
   2344   ; CHECK: vinsertf128
   2345   %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2346   ret <8 x float> %res
   2347 }
   2348 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
   2349 
   2350 
   2351 define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) {
   2352   ; CHECK: vinsertf128
   2353   %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
   2354   ret <8 x i32> %res
   2355 }
   2356 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
   2357 
   2358 
   2359 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2360   ; CHECK: vperm2f128
   2361   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   2362   ret <4 x double> %res
   2363 }
   2364 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
   2365 
   2366 
   2367 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2368   ; CHECK: vperm2f128
   2369   %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2370   ret <8 x float> %res
   2371 }
   2372 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   2373 
   2374 
   2375 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
   2376   ; CHECK: vperm2f128
   2377   %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
   2378   ret <8 x i32> %res
   2379 }
   2380 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
   2381 
   2382 
   2383 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
   2384   ; CHECK: vpermilpd
   2385   %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
   2386   ret <2 x double> %res
   2387 }
   2388 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
   2389 
   2390 
   2391 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
   2392   ; CHECK: vpermilpd
   2393   %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
   2394   ret <4 x double> %res
   2395 }
   2396 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
   2397 
   2398 
   2399 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
   2400   ; CHECK: vpermilps
   2401   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   2402   ret <4 x float> %res
   2403 }
   2404 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
   2405 
   2406 
   2407 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
   2408   ; CHECK: vpermilps
   2409   %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
   2410   ret <8 x float> %res
   2411 }
   2412 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
   2413 
   2414 
   2415 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
   2416   ; CHECK: vpermilpd
   2417   %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
   2418   ret <2 x double> %res
   2419 }
   2420 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
   2421 
   2422 
   2423 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
   2424   ; CHECK: vpermilpd
   2425   %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
   2426   ret <4 x double> %res
   2427 }
   2428 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
   2429 
   2430 
   2431 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
   2432   ; CHECK: vpermilps
   2433   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
   2434   ret <4 x float> %res
   2435 }
   2436 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
   2437 
   2438 
   2439 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
   2440   ; CHECK: vpermilps
   2441   %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
   2442   ret <8 x float> %res
   2443 }
   2444 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
   2445 
   2446 
   2447 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
   2448   ; CHECK: vtestpd
   2449   ; CHECK: sbbl
   2450   %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   2451   ret i32 %res
   2452 }
   2453 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
   2454 
   2455 
   2456 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2457   ; CHECK: vtestpd
   2458   ; CHECK: sbbl
   2459   %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   2460   ret i32 %res
   2461 }
   2462 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2463 
   2464 
   2465 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
   2466   ; CHECK: vtestps
   2467   ; CHECK: sbbl
   2468   %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2469   ret i32 %res
   2470 }
   2471 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
   2472 
   2473 
   2474 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2475   ; CHECK: vtestps
   2476   ; CHECK: sbbl
   2477   %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   2478   ret i32 %res
   2479 }
   2480 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2481 
   2482 
   2483 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
   2484   ; CHECK: vtestpd
   2485   ; CHECK: seta
   2486   ; CHECK: movzbl
   2487   %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   2488   ret i32 %res
   2489 }
   2490 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
   2491 
   2492 
   2493 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2494   ; CHECK: vtestpd
   2495   ; CHECK: seta
   2496   ; CHECK: movzbl
   2497   %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   2498   ret i32 %res
   2499 }
   2500 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2501 
   2502 
   2503 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
   2504   ; CHECK: vtestps
   2505   ; CHECK: seta
   2506   ; CHECK: movzbl
   2507   %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2508   ret i32 %res
   2509 }
   2510 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
   2511 
   2512 
   2513 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2514   ; CHECK: vtestps
   2515   ; CHECK: seta
   2516   ; CHECK: movzbl
   2517   %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   2518   ret i32 %res
   2519 }
   2520 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2521 
   2522 
   2523 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
   2524   ; CHECK: vtestpd
   2525   ; CHECK: sete
   2526   ; CHECK: movzbl
   2527   %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   2528   ret i32 %res
   2529 }
   2530 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
   2531 
   2532 
   2533 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2534   ; CHECK: vtestpd
   2535   ; CHECK: sete
   2536   ; CHECK: movzbl
   2537   %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   2538   ret i32 %res
   2539 }
   2540 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2541 
   2542 
   2543 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
   2544   ; CHECK: vtestps
   2545   ; CHECK: sete
   2546   ; CHECK: movzbl
   2547   %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2548   ret i32 %res
   2549 }
   2550 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
   2551 
   2552 
   2553 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2554   ; CHECK: vtestps
   2555   ; CHECK: sete
   2556   ; CHECK: movzbl
   2557   %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   2558   ret i32 %res
   2559 }
   2560 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2561 
   2562 
   2563 define void @test_x86_avx_vzeroall() {
   2564   ; CHECK: vzeroall
   2565   call void @llvm.x86.avx.vzeroall()
   2566   ret void
   2567 }
   2568 declare void @llvm.x86.avx.vzeroall() nounwind
   2569 
   2570 
   2571 define void @test_x86_avx_vzeroupper() {
   2572   ; CHECK: vzeroupper
   2573   call void @llvm.x86.avx.vzeroupper()
   2574   ret void
   2575 }
   2576 declare void @llvm.x86.avx.vzeroupper() nounwind
   2577 
   2578 
   2579