Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s
      3 
      4 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
      5 ; CHECK-LABEL: test_x86_aesni_aesdec:
      6 ; CHECK:       ## BB#0:
      7 ; CHECK-NEXT:    vaesdec %xmm1, %xmm0, %xmm0
      8 ; CHECK-NEXT:    retl
      9   %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     10   ret <2 x i64> %res
     11 }
     12 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
     13 
     14 
     15 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
     16 ; CHECK-LABEL: test_x86_aesni_aesdeclast:
     17 ; CHECK:       ## BB#0:
     18 ; CHECK-NEXT:    vaesdeclast %xmm1, %xmm0, %xmm0
     19 ; CHECK-NEXT:    retl
     20   %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     21   ret <2 x i64> %res
     22 }
     23 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
     24 
     25 
     26 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
     27 ; CHECK-LABEL: test_x86_aesni_aesenc:
     28 ; CHECK:       ## BB#0:
     29 ; CHECK-NEXT:    vaesenc %xmm1, %xmm0, %xmm0
     30 ; CHECK-NEXT:    retl
     31   %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     32   ret <2 x i64> %res
     33 }
     34 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
     35 
     36 
     37 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
     38 ; CHECK-LABEL: test_x86_aesni_aesenclast:
     39 ; CHECK:       ## BB#0:
     40 ; CHECK-NEXT:    vaesenclast %xmm1, %xmm0, %xmm0
     41 ; CHECK-NEXT:    retl
     42   %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     43   ret <2 x i64> %res
     44 }
     45 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
     46 
     47 
     48 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
     49 ; CHECK-LABEL: test_x86_aesni_aesimc:
     50 ; CHECK:       ## BB#0:
     51 ; CHECK-NEXT:    vaesimc %xmm0, %xmm0
     52 ; CHECK-NEXT:    retl
     53   %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
     54   ret <2 x i64> %res
     55 }
     56 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
     57 
     58 
     59 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
     60 ; CHECK-LABEL: test_x86_aesni_aeskeygenassist:
     61 ; CHECK:       ## BB#0:
     62 ; CHECK-NEXT:    vaeskeygenassist $7, %xmm0, %xmm0
     63 ; CHECK-NEXT:    retl
     64   %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
     65   ret <2 x i64> %res
     66 }
     67 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
     68 
     69 
     70 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
     71 ; CHECK-LABEL: test_x86_sse2_add_sd:
     72 ; CHECK:       ## BB#0:
     73 ; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
     74 ; CHECK-NEXT:    retl
     75   %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
     76   ret <2 x double> %res
     77 }
     78 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
     79 
     80 
     81 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
     82 ; CHECK-LABEL: test_x86_sse2_cmp_pd:
     83 ; CHECK:       ## BB#0:
     84 ; CHECK-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0
     85 ; CHECK-NEXT:    retl
     86   %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     87   ret <2 x double> %res
     88 }
     89 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
     90 
     91 
     92 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
     93 ; CHECK-LABEL: test_x86_sse2_cmp_sd:
     94 ; CHECK:       ## BB#0:
     95 ; CHECK-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0
     96 ; CHECK-NEXT:    retl
     97   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     98   ret <2 x double> %res
     99 }
    100 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
    101 
    102 
    103 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
    104 ; CHECK-LABEL: test_x86_sse2_comieq_sd:
    105 ; CHECK:       ## BB#0:
    106 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    107 ; CHECK-NEXT:    sete %al
    108 ; CHECK-NEXT:    movzbl %al, %eax
    109 ; CHECK-NEXT:    retl
    110   %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    111   ret i32 %res
    112 }
    113 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
    114 
    115 
    116 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
    117 ; CHECK-LABEL: test_x86_sse2_comige_sd:
    118 ; CHECK:       ## BB#0:
    119 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    120 ; CHECK-NEXT:    setae %al
    121 ; CHECK-NEXT:    movzbl %al, %eax
    122 ; CHECK-NEXT:    retl
    123   %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    124   ret i32 %res
    125 }
    126 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
    127 
    128 
    129 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
    130 ; CHECK-LABEL: test_x86_sse2_comigt_sd:
    131 ; CHECK:       ## BB#0:
    132 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    133 ; CHECK-NEXT:    seta %al
    134 ; CHECK-NEXT:    movzbl %al, %eax
    135 ; CHECK-NEXT:    retl
    136   %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    137   ret i32 %res
    138 }
    139 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
    140 
    141 
    142 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
    143 ; CHECK-LABEL: test_x86_sse2_comile_sd:
    144 ; CHECK:       ## BB#0:
    145 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    146 ; CHECK-NEXT:    setbe %al
    147 ; CHECK-NEXT:    movzbl %al, %eax
    148 ; CHECK-NEXT:    retl
    149   %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    150   ret i32 %res
    151 }
    152 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
    153 
    154 
    155 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
    156 ; CHECK-LABEL: test_x86_sse2_comilt_sd:
    157 ; CHECK:       ## BB#0:
    158 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    159 ; CHECK-NEXT:    sbbl %eax, %eax
    160 ; CHECK-NEXT:    andl $1, %eax
    161 ; CHECK-NEXT:    retl
    162   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    163   ret i32 %res
    164 }
    165 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
    166 
    167 
    168 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
    169 ; CHECK-LABEL: test_x86_sse2_comineq_sd:
    170 ; CHECK:       ## BB#0:
    171 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    172 ; CHECK-NEXT:    setne %al
    173 ; CHECK-NEXT:    movzbl %al, %eax
    174 ; CHECK-NEXT:    retl
    175   %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    176   ret i32 %res
    177 }
    178 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
    179 
    180 
    181 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
    182 ; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
    183 ; CHECK:       ## BB#0:
    184 ; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0
    185 ; CHECK-NEXT:    retl
    186   %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
    187   ret <2 x double> %res
    188 }
    189 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
    190 
    191 
    192 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
    193 ; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
    194 ; CHECK:       ## BB#0:
    195 ; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
    196 ; CHECK-NEXT:    retl
    197   %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
    198   ret <4 x float> %res
    199 }
    200 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
    201 
    202 
    203 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
    204 ; CHECK-LABEL: test_x86_sse2_cvtpd2dq:
    205 ; CHECK:       ## BB#0:
    206 ; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm0
    207 ; CHECK-NEXT:    retl
    208   %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    209   ret <4 x i32> %res
    210 }
    211 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
    212 
    213 
    214 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
    215 ; CHECK-LABEL: test_x86_sse2_cvtpd2ps:
    216 ; CHECK:       ## BB#0:
    217 ; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0
    218 ; CHECK-NEXT:    retl
    219   %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
    220   ret <4 x float> %res
    221 }
    222 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
    223 
    224 
    225 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
    226 ; CHECK-LABEL: test_x86_sse2_cvtps2dq:
    227 ; CHECK:       ## BB#0:
    228 ; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm0
    229 ; CHECK-NEXT:    retl
    230   %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    231   ret <4 x i32> %res
    232 }
    233 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
    234 
    235 
    236 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
    237 ; CHECK-LABEL: test_x86_sse2_cvtps2pd:
    238 ; CHECK:       ## BB#0:
    239 ; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm0
    240 ; CHECK-NEXT:    retl
    241   %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
    242   ret <2 x double> %res
    243 }
    244 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
    245 
    246 
    247 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
    248 ; CHECK-LABEL: test_x86_sse2_cvtsd2si:
    249 ; CHECK:       ## BB#0:
    250 ; CHECK-NEXT:    vcvtsd2si %xmm0, %eax
    251 ; CHECK-NEXT:    retl
    252   %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    253   ret i32 %res
    254 }
    255 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
    256 
    257 
    258 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
    259 ; CHECK-LABEL: test_x86_sse2_cvtsd2ss:
    260 ; CHECK:       ## BB#0:
    261 ; CHECK-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0
    262 ; CHECK-NEXT:    retl
    263   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
    264   ret <4 x float> %res
    265 }
    266 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
    267 
    268 
    269 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
    270 ; CHECK-LABEL: test_x86_sse2_cvtsi2sd:
    271 ; CHECK:       ## BB#0:
    272 ; CHECK-NEXT:    movl $7, %eax
    273 ; CHECK-NEXT:    vcvtsi2sdl %eax, %xmm0, %xmm0
    274 ; CHECK-NEXT:    retl
    275   %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
    276   ret <2 x double> %res
    277 }
    278 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
    279 
    280 
    281 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
    282 ; CHECK-LABEL: test_x86_sse2_cvtss2sd:
    283 ; CHECK:       ## BB#0:
    284 ; CHECK-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
    285 ; CHECK-NEXT:    retl
    286   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
    287   ret <2 x double> %res
    288 }
    289 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
    290 
    291 
    292 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
    293 ; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
    294 ; CHECK:       ## BB#0:
    295 ; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm0
    296 ; CHECK-NEXT:    retl
    297   %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    298   ret <4 x i32> %res
    299 }
    300 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
    301 
    302 
    303 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
    304 ; CHECK-LABEL: test_x86_sse2_cvttps2dq:
    305 ; CHECK:       ## BB#0:
    306 ; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
    307 ; CHECK-NEXT:    retl
    308   %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    309   ret <4 x i32> %res
    310 }
    311 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
    312 
    313 
    314 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
    315 ; CHECK-LABEL: test_x86_sse2_cvttsd2si:
    316 ; CHECK:       ## BB#0:
    317 ; CHECK-NEXT:    vcvttsd2si %xmm0, %eax
    318 ; CHECK-NEXT:    retl
    319   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    320   ret i32 %res
    321 }
    322 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
    323 
    324 
    325 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
    326 ; CHECK-LABEL: test_x86_sse2_div_sd:
    327 ; CHECK:       ## BB#0:
    328 ; CHECK-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    329 ; CHECK-NEXT:    retl
    330   %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    331   ret <2 x double> %res
    332 }
    333 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
    334 
    335 
    336 
    337 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
    338 ; CHECK-LABEL: test_x86_sse2_max_pd:
    339 ; CHECK:       ## BB#0:
    340 ; CHECK-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
    341 ; CHECK-NEXT:    retl
    342   %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    343   ret <2 x double> %res
    344 }
    345 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
    346 
    347 
    348 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
    349 ; CHECK-LABEL: test_x86_sse2_max_sd:
    350 ; CHECK:       ## BB#0:
    351 ; CHECK-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
    352 ; CHECK-NEXT:    retl
    353   %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    354   ret <2 x double> %res
    355 }
    356 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
    357 
    358 
    359 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
    360 ; CHECK-LABEL: test_x86_sse2_min_pd:
    361 ; CHECK:       ## BB#0:
    362 ; CHECK-NEXT:    vminpd %xmm1, %xmm0, %xmm0
    363 ; CHECK-NEXT:    retl
    364   %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    365   ret <2 x double> %res
    366 }
    367 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
    368 
    369 
    370 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
    371 ; CHECK-LABEL: test_x86_sse2_min_sd:
    372 ; CHECK:       ## BB#0:
    373 ; CHECK-NEXT:    vminsd %xmm1, %xmm0, %xmm0
    374 ; CHECK-NEXT:    retl
    375   %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    376   ret <2 x double> %res
    377 }
    378 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
    379 
    380 
    381 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
    382 ; CHECK-LABEL: test_x86_sse2_movmsk_pd:
    383 ; CHECK:       ## BB#0:
    384 ; CHECK-NEXT:    vmovmskpd %xmm0, %eax
    385 ; CHECK-NEXT:    retl
    386   %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
    387   ret i32 %res
    388 }
    389 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
    390 
    391 
    392 
    393 
    394 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
    395 ; CHECK-LABEL: test_x86_sse2_mul_sd:
    396 ; CHECK:       ## BB#0:
    397 ; CHECK-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
    398 ; CHECK-NEXT:    retl
    399   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    400   ret <2 x double> %res
    401 }
    402 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
    403 
    404 
    405 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
    406 ; CHECK-LABEL: test_x86_sse2_packssdw_128:
    407 ; CHECK:       ## BB#0:
    408 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    409 ; CHECK-NEXT:    retl
    410   %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
    411   ret <8 x i16> %res
    412 }
    413 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
    414 
    415 
    416 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    417 ; CHECK-LABEL: test_x86_sse2_packsswb_128:
    418 ; CHECK:       ## BB#0:
    419 ; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
    420 ; CHECK-NEXT:    retl
    421   %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    422   ret <16 x i8> %res
    423 }
    424 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    425 
    426 
    427 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    428 ; CHECK-LABEL: test_x86_sse2_packuswb_128:
    429 ; CHECK:       ## BB#0:
    430 ; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
    431 ; CHECK-NEXT:    retl
    432   %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    433   ret <16 x i8> %res
    434 }
    435 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    436 
    437 
    438 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
    439 ; CHECK-LABEL: test_x86_sse2_padds_b:
    440 ; CHECK:       ## BB#0:
    441 ; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0
    442 ; CHECK-NEXT:    retl
    443   %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    444   ret <16 x i8> %res
    445 }
    446 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
    447 
    448 
    449 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
    450 ; CHECK-LABEL: test_x86_sse2_padds_w:
    451 ; CHECK:       ## BB#0:
    452 ; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
    453 ; CHECK-NEXT:    retl
    454   %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    455   ret <8 x i16> %res
    456 }
    457 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
    458 
    459 
    460 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
    461 ; CHECK-LABEL: test_x86_sse2_paddus_b:
    462 ; CHECK:       ## BB#0:
    463 ; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
    464 ; CHECK-NEXT:    retl
    465   %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    466   ret <16 x i8> %res
    467 }
    468 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
    469 
    470 
    471 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
    472 ; CHECK-LABEL: test_x86_sse2_paddus_w:
    473 ; CHECK:       ## BB#0:
    474 ; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
    475 ; CHECK-NEXT:    retl
    476   %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    477   ret <8 x i16> %res
    478 }
    479 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
    480 
    481 
    482 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
    483 ; CHECK-LABEL: test_x86_sse2_pavg_b:
    484 ; CHECK:       ## BB#0:
    485 ; CHECK-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
    486 ; CHECK-NEXT:    retl
    487   %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    488   ret <16 x i8> %res
    489 }
    490 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
    491 
    492 
    493 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
    494 ; CHECK-LABEL: test_x86_sse2_pavg_w:
    495 ; CHECK:       ## BB#0:
    496 ; CHECK-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
    497 ; CHECK-NEXT:    retl
    498   %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    499   ret <8 x i16> %res
    500 }
    501 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
    502 
    503 
    504 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
    505 ; CHECK-LABEL: test_x86_sse2_pmadd_wd:
    506 ; CHECK:       ## BB#0:
    507 ; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
    508 ; CHECK-NEXT:    retl
    509   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
    510   ret <4 x i32> %res
    511 }
    512 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
    513 
    514 
    515 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
    516 ; CHECK-LABEL: test_x86_sse2_pmaxs_w:
    517 ; CHECK:       ## BB#0:
    518 ; CHECK-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
    519 ; CHECK-NEXT:    retl
    520   %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    521   ret <8 x i16> %res
    522 }
    523 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
    524 
    525 
    526 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
    527 ; CHECK-LABEL: test_x86_sse2_pmaxu_b:
    528 ; CHECK:       ## BB#0:
    529 ; CHECK-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
    530 ; CHECK-NEXT:    retl
    531   %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    532   ret <16 x i8> %res
    533 }
    534 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
    535 
    536 
    537 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
    538 ; CHECK-LABEL: test_x86_sse2_pmins_w:
    539 ; CHECK:       ## BB#0:
    540 ; CHECK-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
    541 ; CHECK-NEXT:    retl
    542   %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    543   ret <8 x i16> %res
    544 }
    545 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
    546 
    547 
    548 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
    549 ; CHECK-LABEL: test_x86_sse2_pminu_b:
    550 ; CHECK:       ## BB#0:
    551 ; CHECK-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    552 ; CHECK-NEXT:    retl
    553   %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    554   ret <16 x i8> %res
    555 }
    556 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
    557 
    558 
    559 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
    560 ; CHECK-LABEL: test_x86_sse2_pmovmskb_128:
    561 ; CHECK:       ## BB#0:
    562 ; CHECK-NEXT:    vpmovmskb %xmm0, %eax
    563 ; CHECK-NEXT:    retl
    564   %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
    565   ret i32 %res
    566 }
    567 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
    568 
    569 
    570 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
    571 ; CHECK-LABEL: test_x86_sse2_pmulh_w:
    572 ; CHECK:       ## BB#0:
    573 ; CHECK-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
    574 ; CHECK-NEXT:    retl
    575   %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    576   ret <8 x i16> %res
    577 }
    578 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
    579 
    580 
    581 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
    582 ; CHECK-LABEL: test_x86_sse2_pmulhu_w:
    583 ; CHECK:       ## BB#0:
    584 ; CHECK-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
    585 ; CHECK-NEXT:    retl
    586   %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    587   ret <8 x i16> %res
    588 }
    589 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
    590 
    591 
    592 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
    593 ; CHECK-LABEL: test_x86_sse2_pmulu_dq:
    594 ; CHECK:       ## BB#0:
    595 ; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
    596 ; CHECK-NEXT:    retl
    597   %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    598   ret <2 x i64> %res
    599 }
    600 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
    601 
    602 
    603 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
    604 ; CHECK-LABEL: test_x86_sse2_psad_bw:
    605 ; CHECK:       ## BB#0:
    606 ; CHECK-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
    607 ; CHECK-NEXT:    retl
    608   %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
    609   ret <2 x i64> %res
    610 }
    611 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
    612 
    613 
    614 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
    615 ; CHECK-LABEL: test_x86_sse2_psll_d:
    616 ; CHECK:       ## BB#0:
    617 ; CHECK-NEXT:    vpslld %xmm1, %xmm0, %xmm0
    618 ; CHECK-NEXT:    retl
    619   %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    620   ret <4 x i32> %res
    621 }
    622 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
    623 
    624 
    625 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
    626 ; CHECK-LABEL: test_x86_sse2_psll_q:
    627 ; CHECK:       ## BB#0:
    628 ; CHECK-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
    629 ; CHECK-NEXT:    retl
    630   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    631   ret <2 x i64> %res
    632 }
    633 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
    634 
    635 
    636 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
    637 ; CHECK-LABEL: test_x86_sse2_psll_w:
    638 ; CHECK:       ## BB#0:
    639 ; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
    640 ; CHECK-NEXT:    retl
    641   %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    642   ret <8 x i16> %res
    643 }
    644 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
    645 
    646 
    647 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
    648 ; CHECK-LABEL: test_x86_sse2_pslli_d:
    649 ; CHECK:       ## BB#0:
    650 ; CHECK-NEXT:    vpslld $7, %xmm0, %xmm0
    651 ; CHECK-NEXT:    retl
    652   %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    653   ret <4 x i32> %res
    654 }
    655 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
    656 
    657 
    658 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
    659 ; CHECK-LABEL: test_x86_sse2_pslli_q:
    660 ; CHECK:       ## BB#0:
    661 ; CHECK-NEXT:    vpsllq $7, %xmm0, %xmm0
    662 ; CHECK-NEXT:    retl
    663   %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    664   ret <2 x i64> %res
    665 }
    666 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
    667 
    668 
    669 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
    670 ; CHECK-LABEL: test_x86_sse2_pslli_w:
    671 ; CHECK:       ## BB#0:
    672 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
    673 ; CHECK-NEXT:    retl
    674   %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    675   ret <8 x i16> %res
    676 }
    677 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
    678 
    679 
    680 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
    681 ; CHECK-LABEL: test_x86_sse2_psra_d:
    682 ; CHECK:       ## BB#0:
    683 ; CHECK-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
    684 ; CHECK-NEXT:    retl
    685   %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    686   ret <4 x i32> %res
    687 }
    688 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
    689 
    690 
    691 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
    692 ; CHECK-LABEL: test_x86_sse2_psra_w:
    693 ; CHECK:       ## BB#0:
    694 ; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
    695 ; CHECK-NEXT:    retl
    696   %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    697   ret <8 x i16> %res
    698 }
    699 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
    700 
    701 
    702 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
    703 ; CHECK-LABEL: test_x86_sse2_psrai_d:
    704 ; CHECK:       ## BB#0:
    705 ; CHECK-NEXT:    vpsrad $7, %xmm0, %xmm0
    706 ; CHECK-NEXT:    retl
    707   %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    708   ret <4 x i32> %res
    709 }
    710 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
    711 
    712 
    713 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
    714 ; CHECK-LABEL: test_x86_sse2_psrai_w:
    715 ; CHECK:       ## BB#0:
    716 ; CHECK-NEXT:    vpsraw $7, %xmm0, %xmm0
    717 ; CHECK-NEXT:    retl
    718   %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    719   ret <8 x i16> %res
    720 }
    721 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
    722 
    723 
    724 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
    725 ; CHECK-LABEL: test_x86_sse2_psrl_d:
    726 ; CHECK:       ## BB#0:
    727 ; CHECK-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
    728 ; CHECK-NEXT:    retl
    729   %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    730   ret <4 x i32> %res
    731 }
    732 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
    733 
    734 
    735 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
    736 ; CHECK-LABEL: test_x86_sse2_psrl_q:
    737 ; CHECK:       ## BB#0:
    738 ; CHECK-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
    739 ; CHECK-NEXT:    retl
    740   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    741   ret <2 x i64> %res
    742 }
    743 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
    744 
    745 
    746 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
    747 ; CHECK-LABEL: test_x86_sse2_psrl_w:
    748 ; CHECK:       ## BB#0:
    749 ; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
    750 ; CHECK-NEXT:    retl
    751   %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    752   ret <8 x i16> %res
    753 }
    754 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
    755 
    756 
    757 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
    758 ; CHECK-LABEL: test_x86_sse2_psrli_d:
    759 ; CHECK:       ## BB#0:
    760 ; CHECK-NEXT:    vpsrld $7, %xmm0, %xmm0
    761 ; CHECK-NEXT:    retl
    762   %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    763   ret <4 x i32> %res
    764 }
    765 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
    766 
    767 
    768 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
    769 ; CHECK-LABEL: test_x86_sse2_psrli_q:
    770 ; CHECK:       ## BB#0:
    771 ; CHECK-NEXT:    vpsrlq $7, %xmm0, %xmm0
    772 ; CHECK-NEXT:    retl
    773   %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    774   ret <2 x i64> %res
    775 }
    776 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
    777 
    778 
    779 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
    780 ; CHECK-LABEL: test_x86_sse2_psrli_w:
    781 ; CHECK:       ## BB#0:
    782 ; CHECK-NEXT:    vpsrlw $7, %xmm0, %xmm0
    783 ; CHECK-NEXT:    retl
    784   %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    785   ret <8 x i16> %res
    786 }
    787 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
    788 
    789 
    790 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
    791 ; CHECK-LABEL: test_x86_sse2_psubs_b:
    792 ; CHECK:       ## BB#0:
    793 ; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0
    794 ; CHECK-NEXT:    retl
    795   %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    796   ret <16 x i8> %res
    797 }
    798 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
    799 
    800 
    801 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
    802 ; CHECK-LABEL: test_x86_sse2_psubs_w:
    803 ; CHECK:       ## BB#0:
    804 ; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
    805 ; CHECK-NEXT:    retl
    806   %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    807   ret <8 x i16> %res
    808 }
    809 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
    810 
    811 
    812 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
    813 ; CHECK-LABEL: test_x86_sse2_psubus_b:
    814 ; CHECK:       ## BB#0:
    815 ; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
    816 ; CHECK-NEXT:    retl
    817   %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    818   ret <16 x i8> %res
    819 }
    820 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
    821 
    822 
    823 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
    824 ; CHECK-LABEL: test_x86_sse2_psubus_w:
    825 ; CHECK:       ## BB#0:
    826 ; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
    827 ; CHECK-NEXT:    retl
    828   %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    829   ret <8 x i16> %res
    830 }
    831 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
    832 
    833 
    834 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
    835 ; CHECK-LABEL: test_x86_sse2_sqrt_pd:
    836 ; CHECK:       ## BB#0:
    837 ; CHECK-NEXT:    vsqrtpd %xmm0, %xmm0
    838 ; CHECK-NEXT:    retl
    839   %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    840   ret <2 x double> %res
    841 }
    842 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
    843 
    844 
    845 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
    846 ; CHECK-LABEL: test_x86_sse2_sqrt_sd:
    847 ; CHECK:       ## BB#0:
    848 ; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
    849 ; CHECK-NEXT:    retl
    850   %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    851   ret <2 x double> %res
    852 }
    853 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
    854 
    855 
    856 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
    857 ; CHECK-LABEL: test_x86_sse2_storel_dq:
    858 ; CHECK:       ## BB#0:
    859 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    860 ; CHECK-NEXT:    vmovlps %xmm0, (%eax)
    861 ; CHECK-NEXT:    retl
    862   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
    863   ret void
    864 }
    865 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
    866 
    867 
    868 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
    869   ; add operation forces the execution domain.
    870 ; CHECK-LABEL: test_x86_sse2_storeu_dq:
    871 ; CHECK:       ## BB#0:
    872 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    873 ; CHECK-NEXT:    vpaddb LCPI77_0, %xmm0, %xmm0
    874 ; CHECK-NEXT:    vmovdqu %xmm0, (%eax)
    875 ; CHECK-NEXT:    retl
    876   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    877   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
    878   ret void
    879 }
    880 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
    881 
    882 
    883 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
    884   ; fadd operation forces the execution domain.
    885 ; CHECK-LABEL: test_x86_sse2_storeu_pd:
    886 ; CHECK:       ## BB#0:
    887 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    888 ; CHECK-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    889 ; CHECK-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
    890 ; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    891 ; CHECK-NEXT:    vmovupd %xmm0, (%eax)
    892 ; CHECK-NEXT:    retl
    893   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
    894   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
    895   ret void
    896 }
    897 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
    898 
    899 
    900 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
    901 ; CHECK-LABEL: test_x86_sse2_sub_sd:
    902 ; CHECK:       ## BB#0:
    903 ; CHECK-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
    904 ; CHECK-NEXT:    retl
    905   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    906   ret <2 x double> %res
    907 }
    908 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
    909 
    910 
    911 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
    912 ; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
    913 ; CHECK:       ## BB#0:
    914 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    915 ; CHECK-NEXT:    sete %al
    916 ; CHECK-NEXT:    movzbl %al, %eax
    917 ; CHECK-NEXT:    retl
    918   %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    919   ret i32 %res
    920 }
    921 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
    922 
    923 
    924 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
    925 ; CHECK-LABEL: test_x86_sse2_ucomige_sd:
    926 ; CHECK:       ## BB#0:
    927 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    928 ; CHECK-NEXT:    setae %al
    929 ; CHECK-NEXT:    movzbl %al, %eax
    930 ; CHECK-NEXT:    retl
    931   %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    932   ret i32 %res
    933 }
    934 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
    935 
    936 
    937 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
    938 ; CHECK-LABEL: test_x86_sse2_ucomigt_sd:
    939 ; CHECK:       ## BB#0:
    940 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    941 ; CHECK-NEXT:    seta %al
    942 ; CHECK-NEXT:    movzbl %al, %eax
    943 ; CHECK-NEXT:    retl
    944   %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    945   ret i32 %res
    946 }
    947 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
    948 
    949 
    950 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
    951 ; CHECK-LABEL: test_x86_sse2_ucomile_sd:
    952 ; CHECK:       ## BB#0:
    953 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    954 ; CHECK-NEXT:    setbe %al
    955 ; CHECK-NEXT:    movzbl %al, %eax
    956 ; CHECK-NEXT:    retl
    957   %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    958   ret i32 %res
    959 }
    960 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
    961 
    962 
    963 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
    964 ; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
    965 ; CHECK:       ## BB#0:
    966 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    967 ; CHECK-NEXT:    sbbl %eax, %eax
    968 ; CHECK-NEXT:    andl $1, %eax
    969 ; CHECK-NEXT:    retl
    970   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    971   ret i32 %res
    972 }
    973 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
    974 
    975 
    976 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
    977 ; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
    978 ; CHECK:       ## BB#0:
    979 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    980 ; CHECK-NEXT:    setne %al
    981 ; CHECK-NEXT:    movzbl %al, %eax
    982 ; CHECK-NEXT:    retl
    983   %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    984   ret i32 %res
    985 }
    986 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
    987 
    988 
    989 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
    990 ; CHECK-LABEL: test_x86_sse3_addsub_pd:
    991 ; CHECK:       ## BB#0:
    992 ; CHECK-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
    993 ; CHECK-NEXT:    retl
    994   %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    995   ret <2 x double> %res
    996 }
    997 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
    998 
    999 
   1000 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
   1001 ; CHECK-LABEL: test_x86_sse3_addsub_ps:
   1002 ; CHECK:       ## BB#0:
   1003 ; CHECK-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
   1004 ; CHECK-NEXT:    retl
   1005   %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1006   ret <4 x float> %res
   1007 }
   1008 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
   1009 
   1010 
   1011 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
   1012 ; CHECK-LABEL: test_x86_sse3_hadd_pd:
   1013 ; CHECK:       ## BB#0:
   1014 ; CHECK-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
   1015 ; CHECK-NEXT:    retl
   1016   %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   1017   ret <2 x double> %res
   1018 }
   1019 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
   1020 
   1021 
   1022 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
   1023 ; CHECK-LABEL: test_x86_sse3_hadd_ps:
   1024 ; CHECK:       ## BB#0:
   1025 ; CHECK-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
   1026 ; CHECK-NEXT:    retl
   1027   %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1028   ret <4 x float> %res
   1029 }
   1030 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
   1031 
   1032 
   1033 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
   1034 ; CHECK-LABEL: test_x86_sse3_hsub_pd:
   1035 ; CHECK:       ## BB#0:
   1036 ; CHECK-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
   1037 ; CHECK-NEXT:    retl
   1038   %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   1039   ret <2 x double> %res
   1040 }
   1041 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
   1042 
   1043 
   1044 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
   1045 ; CHECK-LABEL: test_x86_sse3_hsub_ps:
   1046 ; CHECK:       ## BB#0:
   1047 ; CHECK-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
   1048 ; CHECK-NEXT:    retl
   1049   %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1050   ret <4 x float> %res
   1051 }
   1052 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
   1053 
   1054 
   1055 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
   1056 ; CHECK-LABEL: test_x86_sse3_ldu_dq:
   1057 ; CHECK:       ## BB#0:
   1058 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1059 ; CHECK-NEXT:    vlddqu (%eax), %xmm0
   1060 ; CHECK-NEXT:    retl
   1061   %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
   1062   ret <16 x i8> %res
   1063 }
   1064 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
   1065 
   1066 
   1067 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
   1068 ; CHECK-LABEL: test_x86_sse41_blendvpd:
   1069 ; CHECK:       ## BB#0:
   1070 ; CHECK-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
   1071 ; CHECK-NEXT:    retl
   1072   %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
   1073   ret <2 x double> %res
   1074 }
   1075 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
   1076 
   1077 
   1078 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   1079 ; CHECK-LABEL: test_x86_sse41_blendvps:
   1080 ; CHECK:       ## BB#0:
   1081 ; CHECK-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
   1082 ; CHECK-NEXT:    retl
   1083   %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
   1084   ret <4 x float> %res
   1085 }
   1086 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
   1087 
   1088 
   1089 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
   1090 ; CHECK-LABEL: test_x86_sse41_dppd:
   1091 ; CHECK:       ## BB#0:
   1092 ; CHECK-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0
   1093 ; CHECK-NEXT:    retl
   1094   %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
   1095   ret <2 x double> %res
   1096 }
   1097 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
   1098 
   1099 
   1100 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
   1101 ; CHECK-LABEL: test_x86_sse41_dpps:
   1102 ; CHECK:       ## BB#0:
   1103 ; CHECK-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0
   1104 ; CHECK-NEXT:    retl
   1105   %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1106   ret <4 x float> %res
   1107 }
   1108 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
   1109 
   1110 
   1111 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
   1112 ; CHECK-LABEL: test_x86_sse41_insertps:
   1113 ; CHECK:       ## BB#0:
   1114 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
   1115 ; CHECK-NEXT:    retl
   1116   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1117   ret <4 x float> %res
   1118 }
   1119 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
   1120 
   1121 
   1122 
   1123 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   1124 ; CHECK-LABEL: test_x86_sse41_mpsadbw:
   1125 ; CHECK:       ## BB#0:
   1126 ; CHECK-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0
   1127 ; CHECK-NEXT:    retl
   1128   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
   1129   ret <8 x i16> %res
   1130 }
   1131 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1132 
   1133 
   1134 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
   1135 ; CHECK-LABEL: test_x86_sse41_packusdw:
   1136 ; CHECK:       ## BB#0:
   1137 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
   1138 ; CHECK-NEXT:    retl
   1139   %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
   1140   ret <8 x i16> %res
   1141 }
   1142 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
   1143 
   1144 
   1145 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
   1146 ; CHECK-LABEL: test_x86_sse41_pblendvb:
   1147 ; CHECK:       ## BB#0:
   1148 ; CHECK-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
   1149 ; CHECK-NEXT:    retl
   1150   %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
   1151   ret <16 x i8> %res
   1152 }
   1153 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
   1154 
   1155 
   1156 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
   1157 ; CHECK-LABEL: test_x86_sse41_phminposuw:
   1158 ; CHECK:       ## BB#0:
   1159 ; CHECK-NEXT:    vphminposuw %xmm0, %xmm0
   1160 ; CHECK-NEXT:    retl
   1161   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
   1162   ret <8 x i16> %res
   1163 }
   1164 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
   1165 
   1166 
   1167 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
   1168 ; CHECK-LABEL: test_x86_sse41_pmaxsb:
   1169 ; CHECK:       ## BB#0:
   1170 ; CHECK-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
   1171 ; CHECK-NEXT:    retl
   1172   %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1173   ret <16 x i8> %res
   1174 }
   1175 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
   1176 
   1177 
   1178 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
   1179 ; CHECK-LABEL: test_x86_sse41_pmaxsd:
   1180 ; CHECK:       ## BB#0:
   1181 ; CHECK-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
   1182 ; CHECK-NEXT:    retl
   1183   %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1184   ret <4 x i32> %res
   1185 }
   1186 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
   1187 
   1188 
   1189 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
   1190 ; CHECK-LABEL: test_x86_sse41_pmaxud:
   1191 ; CHECK:       ## BB#0:
   1192 ; CHECK-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
   1193 ; CHECK-NEXT:    retl
   1194   %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1195   ret <4 x i32> %res
   1196 }
   1197 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
   1198 
   1199 
   1200 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
   1201 ; CHECK-LABEL: test_x86_sse41_pmaxuw:
   1202 ; CHECK:       ## BB#0:
   1203 ; CHECK-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
   1204 ; CHECK-NEXT:    retl
   1205   %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1206   ret <8 x i16> %res
   1207 }
   1208 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
   1209 
   1210 
   1211 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
   1212 ; CHECK-LABEL: test_x86_sse41_pminsb:
   1213 ; CHECK:       ## BB#0:
   1214 ; CHECK-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
   1215 ; CHECK-NEXT:    retl
   1216   %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1217   ret <16 x i8> %res
   1218 }
   1219 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
   1220 
   1221 
   1222 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
   1223 ; CHECK-LABEL: test_x86_sse41_pminsd:
   1224 ; CHECK:       ## BB#0:
   1225 ; CHECK-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
   1226 ; CHECK-NEXT:    retl
   1227   %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1228   ret <4 x i32> %res
   1229 }
   1230 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
   1231 
   1232 
   1233 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
   1234 ; CHECK-LABEL: test_x86_sse41_pminud:
   1235 ; CHECK:       ## BB#0:
   1236 ; CHECK-NEXT:    vpminud %xmm1, %xmm0, %xmm0
   1237 ; CHECK-NEXT:    retl
   1238   %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1239   ret <4 x i32> %res
   1240 }
   1241 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
   1242 
   1243 
   1244 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
   1245 ; CHECK-LABEL: test_x86_sse41_pminuw:
   1246 ; CHECK:       ## BB#0:
   1247 ; CHECK-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
   1248 ; CHECK-NEXT:    retl
   1249   %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1250   ret <8 x i16> %res
   1251 }
   1252 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
   1253 
   1254 
   1255 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
   1256 ; CHECK-LABEL: test_x86_sse41_pmovzxbd:
   1257 ; CHECK:       ## BB#0:
   1258 ; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1259 ; CHECK-NEXT:    retl
   1260   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
   1261   ret <4 x i32> %res
   1262 }
   1263 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
   1264 
   1265 
   1266 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
   1267 ; CHECK-LABEL: test_x86_sse41_pmovzxbq:
   1268 ; CHECK:       ## BB#0:
   1269 ; CHECK-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1270 ; CHECK-NEXT:    retl
   1271   %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
   1272   ret <2 x i64> %res
   1273 }
   1274 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
   1275 
   1276 
   1277 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
   1278 ; CHECK-LABEL: test_x86_sse41_pmovzxbw:
   1279 ; CHECK:       ## BB#0:
   1280 ; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1281 ; CHECK-NEXT:    retl
   1282   %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
   1283   ret <8 x i16> %res
   1284 }
   1285 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
   1286 
   1287 
   1288 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
   1289 ; CHECK-LABEL: test_x86_sse41_pmovzxdq:
   1290 ; CHECK:       ## BB#0:
   1291 ; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1292 ; CHECK-NEXT:    retl
   1293   %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
   1294   ret <2 x i64> %res
   1295 }
   1296 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
   1297 
   1298 
   1299 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
   1300 ; CHECK-LABEL: test_x86_sse41_pmovzxwd:
   1301 ; CHECK:       ## BB#0:
   1302 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1303 ; CHECK-NEXT:    retl
   1304   %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
   1305   ret <4 x i32> %res
   1306 }
   1307 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
   1308 
   1309 
   1310 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
   1311 ; CHECK-LABEL: test_x86_sse41_pmovzxwq:
   1312 ; CHECK:       ## BB#0:
   1313 ; CHECK-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1314 ; CHECK-NEXT:    retl
   1315   %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
   1316   ret <2 x i64> %res
   1317 }
   1318 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
   1319 
   1320 
   1321 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
   1322 ; CHECK-LABEL: test_x86_sse41_pmuldq:
   1323 ; CHECK:       ## BB#0:
   1324 ; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0
   1325 ; CHECK-NEXT:    retl
   1326   %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
   1327   ret <2 x i64> %res
   1328 }
   1329 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
   1330 
   1331 
   1332 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
   1333 ; CHECK-LABEL: test_x86_sse41_ptestc:
   1334 ; CHECK:       ## BB#0:
   1335 ; CHECK-NEXT:    vptest %xmm1, %xmm0
   1336 ; CHECK-NEXT:    sbbl %eax, %eax
   1337 ; CHECK-NEXT:    andl $1, %eax
   1338 ; CHECK-NEXT:    retl
   1339   %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   1340   ret i32 %res
   1341 }
   1342 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
   1343 
   1344 
   1345 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
   1346 ; CHECK-LABEL: test_x86_sse41_ptestnzc:
   1347 ; CHECK:       ## BB#0:
   1348 ; CHECK-NEXT:    vptest %xmm1, %xmm0
   1349 ; CHECK-NEXT:    seta %al
   1350 ; CHECK-NEXT:    movzbl %al, %eax
   1351 ; CHECK-NEXT:    retl
   1352   %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   1353   ret i32 %res
   1354 }
   1355 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
   1356 
   1357 
   1358 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
   1359 ; CHECK-LABEL: test_x86_sse41_ptestz:
   1360 ; CHECK:       ## BB#0:
   1361 ; CHECK-NEXT:    vptest %xmm1, %xmm0
   1362 ; CHECK-NEXT:    sete %al
   1363 ; CHECK-NEXT:    movzbl %al, %eax
   1364 ; CHECK-NEXT:    retl
   1365   %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   1366   ret i32 %res
   1367 }
   1368 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
   1369 
   1370 
   1371 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
   1372 ; CHECK-LABEL: test_x86_sse41_round_pd:
   1373 ; CHECK:       ## BB#0:
   1374 ; CHECK-NEXT:    vroundpd $7, %xmm0, %xmm0
   1375 ; CHECK-NEXT:    retl
   1376   %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
   1377   ret <2 x double> %res
   1378 }
   1379 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
   1380 
   1381 
   1382 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
   1383 ; CHECK-LABEL: test_x86_sse41_round_ps:
   1384 ; CHECK:       ## BB#0:
   1385 ; CHECK-NEXT:    vroundps $7, %xmm0, %xmm0
   1386 ; CHECK-NEXT:    retl
   1387   %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
   1388   ret <4 x float> %res
   1389 }
   1390 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
   1391 
   1392 
   1393 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
   1394 ; CHECK-LABEL: test_x86_sse41_round_sd:
   1395 ; CHECK:       ## BB#0:
   1396 ; CHECK-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0
   1397 ; CHECK-NEXT:    retl
   1398   %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
   1399   ret <2 x double> %res
   1400 }
   1401 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
   1402 
   1403 
   1404 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
   1405 ; CHECK-LABEL: test_x86_sse41_round_ss:
   1406 ; CHECK:       ## BB#0:
   1407 ; CHECK-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0
   1408 ; CHECK-NEXT:    retl
   1409   %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
   1410   ret <4 x float> %res
   1411 }
   1412 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
   1413 
   1414 
   1415 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
   1416 ; CHECK-LABEL: test_x86_sse42_pcmpestri128:
   1417 ; CHECK:       ## BB#0:
   1418 ; CHECK-NEXT:    movl $7, %eax
   1419 ; CHECK-NEXT:    movl $7, %edx
   1420 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1421 ; CHECK-NEXT:    movl %ecx, %eax
   1422 ; CHECK-NEXT:    retl
   1423   %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1424   ret i32 %res
   1425 }
   1426 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1427 
   1428 
   1429 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
   1430 ; CHECK-LABEL: test_x86_sse42_pcmpestri128_load:
   1431 ; CHECK:       ## BB#0:
   1432 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   1433 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1434 ; CHECK-NEXT:    vmovdqa (%eax), %xmm0
   1435 ; CHECK-NEXT:    movl $7, %eax
   1436 ; CHECK-NEXT:    movl $7, %edx
   1437 ; CHECK-NEXT:    vpcmpestri $7, (%ecx), %xmm0
   1438 ; CHECK-NEXT:    movl %ecx, %eax
   1439 ; CHECK-NEXT:    retl
   1440   %1 = load <16 x i8>, <16 x i8>* %a0
   1441   %2 = load <16 x i8>, <16 x i8>* %a2
   1442   %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
   1443   ret i32 %res
   1444 }
   1445 
   1446 
   1447 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
   1448 ; CHECK-LABEL: test_x86_sse42_pcmpestria128:
   1449 ; CHECK:       ## BB#0:
   1450 ; CHECK-NEXT:    movl $7, %eax
   1451 ; CHECK-NEXT:    movl $7, %edx
   1452 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1453 ; CHECK-NEXT:    seta %al
   1454 ; CHECK-NEXT:    movzbl %al, %eax
   1455 ; CHECK-NEXT:    retl
   1456   %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1457   ret i32 %res
   1458 }
   1459 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1460 
   1461 
   1462 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
   1463 ; CHECK-LABEL: test_x86_sse42_pcmpestric128:
   1464 ; CHECK:       ## BB#0:
   1465 ; CHECK-NEXT:    movl $7, %eax
   1466 ; CHECK-NEXT:    movl $7, %edx
   1467 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1468 ; CHECK-NEXT:    sbbl %eax, %eax
   1469 ; CHECK-NEXT:    andl $1, %eax
   1470 ; CHECK-NEXT:    retl
   1471   %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1472   ret i32 %res
   1473 }
   1474 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1475 
   1476 
   1477 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
   1478 ; CHECK-LABEL: test_x86_sse42_pcmpestrio128:
   1479 ; CHECK:       ## BB#0:
   1480 ; CHECK-NEXT:    movl $7, %eax
   1481 ; CHECK-NEXT:    movl $7, %edx
   1482 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1483 ; CHECK-NEXT:    seto %al
   1484 ; CHECK-NEXT:    movzbl %al, %eax
   1485 ; CHECK-NEXT:    retl
   1486   %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1487   ret i32 %res
   1488 }
   1489 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1490 
   1491 
   1492 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
   1493 ; CHECK-LABEL: test_x86_sse42_pcmpestris128:
   1494 ; CHECK:       ## BB#0:
   1495 ; CHECK-NEXT:    movl $7, %eax
   1496 ; CHECK-NEXT:    movl $7, %edx
   1497 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1498 ; CHECK-NEXT:    sets %al
   1499 ; CHECK-NEXT:    movzbl %al, %eax
   1500 ; CHECK-NEXT:    retl
   1501   %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1502   ret i32 %res
   1503 }
   1504 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1505 
   1506 
   1507 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
   1508 ; CHECK-LABEL: test_x86_sse42_pcmpestriz128:
   1509 ; CHECK:       ## BB#0:
   1510 ; CHECK-NEXT:    movl $7, %eax
   1511 ; CHECK-NEXT:    movl $7, %edx
   1512 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1513 ; CHECK-NEXT:    sete %al
   1514 ; CHECK-NEXT:    movzbl %al, %eax
   1515 ; CHECK-NEXT:    retl
   1516   %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1517   ret i32 %res
   1518 }
   1519 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1520 
   1521 
   1522 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
   1523 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128:
   1524 ; CHECK:       ## BB#0:
   1525 ; CHECK-NEXT:    movl $7, %eax
   1526 ; CHECK-NEXT:    movl $7, %edx
   1527 ; CHECK-NEXT:    vpcmpestrm $7, %xmm1, %xmm0
   1528 ; CHECK-NEXT:    retl
   1529   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   1530   ret <16 x i8> %res
   1531 }
   1532 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1533 
   1534 
   1535 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
   1536 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load:
   1537 ; CHECK:       ## BB#0:
   1538 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   1539 ; CHECK-NEXT:    movl $7, %eax
   1540 ; CHECK-NEXT:    movl $7, %edx
   1541 ; CHECK-NEXT:    vpcmpestrm $7, (%ecx), %xmm0
   1542 ; CHECK-NEXT:    retl
   1543   %1 = load <16 x i8>, <16 x i8>* %a2
   1544   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   1545   ret <16 x i8> %res
   1546 }
   1547 
   1548 
   1549 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
   1550 ; CHECK-LABEL: test_x86_sse42_pcmpistri128:
   1551 ; CHECK:       ## BB#0:
   1552 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1553 ; CHECK-NEXT:    movl %ecx, %eax
   1554 ; CHECK-NEXT:    retl
   1555   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1556   ret i32 %res
   1557 }
   1558 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1559 
   1560 
   1561 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
   1562 ; CHECK-LABEL: test_x86_sse42_pcmpistri128_load:
   1563 ; CHECK:       ## BB#0:
   1564 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1565 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   1566 ; CHECK-NEXT:    vmovdqa (%ecx), %xmm0
   1567 ; CHECK-NEXT:    vpcmpistri $7, (%eax), %xmm0
   1568 ; CHECK-NEXT:    movl %ecx, %eax
   1569 ; CHECK-NEXT:    retl
   1570   %1 = load <16 x i8>, <16 x i8>* %a0
   1571   %2 = load <16 x i8>, <16 x i8>* %a1
   1572   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
   1573   ret i32 %res
   1574 }
   1575 
   1576 
   1577 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
   1578 ; CHECK-LABEL: test_x86_sse42_pcmpistria128:
   1579 ; CHECK:       ## BB#0:
   1580 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1581 ; CHECK-NEXT:    seta %al
   1582 ; CHECK-NEXT:    movzbl %al, %eax
   1583 ; CHECK-NEXT:    retl
   1584   %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1585   ret i32 %res
   1586 }
   1587 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1588 
   1589 
   1590 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
   1591 ; CHECK-LABEL: test_x86_sse42_pcmpistric128:
   1592 ; CHECK:       ## BB#0:
   1593 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1594 ; CHECK-NEXT:    sbbl %eax, %eax
   1595 ; CHECK-NEXT:    andl $1, %eax
   1596 ; CHECK-NEXT:    retl
   1597   %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1598   ret i32 %res
   1599 }
   1600 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1601 
   1602 
   1603 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
   1604 ; CHECK-LABEL: test_x86_sse42_pcmpistrio128:
   1605 ; CHECK:       ## BB#0:
   1606 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1607 ; CHECK-NEXT:    seto %al
   1608 ; CHECK-NEXT:    movzbl %al, %eax
   1609 ; CHECK-NEXT:    retl
   1610   %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1611   ret i32 %res
   1612 }
   1613 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1614 
   1615 
   1616 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
   1617 ; CHECK-LABEL: test_x86_sse42_pcmpistris128:
   1618 ; CHECK:       ## BB#0:
   1619 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1620 ; CHECK-NEXT:    sets %al
   1621 ; CHECK-NEXT:    movzbl %al, %eax
   1622 ; CHECK-NEXT:    retl
   1623   %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1624   ret i32 %res
   1625 }
   1626 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1627 
   1628 
   1629 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
   1630 ; CHECK-LABEL: test_x86_sse42_pcmpistriz128:
   1631 ; CHECK:       ## BB#0:
   1632 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1633 ; CHECK-NEXT:    sete %al
   1634 ; CHECK-NEXT:    movzbl %al, %eax
   1635 ; CHECK-NEXT:    retl
   1636   %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1637   ret i32 %res
   1638 }
   1639 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1640 
   1641 
   1642 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
   1643 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128:
   1644 ; CHECK:       ## BB#0:
   1645 ; CHECK-NEXT:    vpcmpistrm $7, %xmm1, %xmm0
   1646 ; CHECK-NEXT:    retl
   1647   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
   1648   ret <16 x i8> %res
   1649 }
   1650 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1651 
   1652 
   1653 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
   1654 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load:
   1655 ; CHECK:       ## BB#0:
   1656 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1657 ; CHECK-NEXT:    vpcmpistrm $7, (%eax), %xmm0
   1658 ; CHECK-NEXT:    retl
   1659   %1 = load <16 x i8>, <16 x i8>* %a1
   1660   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
   1661   ret <16 x i8> %res
   1662 }
   1663 
   1664 
   1665 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
   1666 ; CHECK-LABEL: test_x86_sse_add_ss:
   1667 ; CHECK:       ## BB#0:
   1668 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
   1669 ; CHECK-NEXT:    retl
   1670   %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1671   ret <4 x float> %res
   1672 }
   1673 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
   1674 
   1675 
   1676 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
   1677 ; CHECK-LABEL: test_x86_sse_cmp_ps:
   1678 ; CHECK:       ## BB#0:
   1679 ; CHECK-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0
   1680 ; CHECK-NEXT:    retl
   1681   %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1682   ret <4 x float> %res
   1683 }
   1684 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
   1685 
   1686 
   1687 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
   1688 ; CHECK-LABEL: test_x86_sse_cmp_ss:
   1689 ; CHECK:       ## BB#0:
   1690 ; CHECK-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0
   1691 ; CHECK-NEXT:    retl
   1692   %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1693   ret <4 x float> %res
   1694 }
   1695 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
   1696 
   1697 
   1698 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
   1699 ; CHECK-LABEL: test_x86_sse_comieq_ss:
   1700 ; CHECK:       ## BB#0:
   1701 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1702 ; CHECK-NEXT:    sete %al
   1703 ; CHECK-NEXT:    movzbl %al, %eax
   1704 ; CHECK-NEXT:    retl
   1705   %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1706   ret i32 %res
   1707 }
   1708 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
   1709 
   1710 
   1711 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
   1712 ; CHECK-LABEL: test_x86_sse_comige_ss:
   1713 ; CHECK:       ## BB#0:
   1714 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1715 ; CHECK-NEXT:    setae %al
   1716 ; CHECK-NEXT:    movzbl %al, %eax
   1717 ; CHECK-NEXT:    retl
   1718   %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1719   ret i32 %res
   1720 }
   1721 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
   1722 
   1723 
   1724 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
   1725 ; CHECK-LABEL: test_x86_sse_comigt_ss:
   1726 ; CHECK:       ## BB#0:
   1727 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1728 ; CHECK-NEXT:    seta %al
   1729 ; CHECK-NEXT:    movzbl %al, %eax
   1730 ; CHECK-NEXT:    retl
   1731   %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1732   ret i32 %res
   1733 }
   1734 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
   1735 
   1736 
   1737 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
   1738 ; CHECK-LABEL: test_x86_sse_comile_ss:
   1739 ; CHECK:       ## BB#0:
   1740 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1741 ; CHECK-NEXT:    setbe %al
   1742 ; CHECK-NEXT:    movzbl %al, %eax
   1743 ; CHECK-NEXT:    retl
   1744   %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1745   ret i32 %res
   1746 }
   1747 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
   1748 
   1749 
   1750 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
   1751 ; CHECK-LABEL: test_x86_sse_comilt_ss:
   1752 ; CHECK:       ## BB#0:
   1753 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1754 ; CHECK-NEXT:    sbbl %eax, %eax
   1755 ; CHECK-NEXT:    andl $1, %eax
   1756 ; CHECK-NEXT:    retl
   1757   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1758   ret i32 %res
   1759 }
   1760 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
   1761 
   1762 
   1763 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
   1764 ; CHECK-LABEL: test_x86_sse_comineq_ss:
   1765 ; CHECK:       ## BB#0:
   1766 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1767 ; CHECK-NEXT:    setne %al
   1768 ; CHECK-NEXT:    movzbl %al, %eax
   1769 ; CHECK-NEXT:    retl
   1770   %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1771   ret i32 %res
   1772 }
   1773 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
   1774 
   1775 
   1776 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
   1777 ; CHECK-LABEL: test_x86_sse_cvtsi2ss:
   1778 ; CHECK:       ## BB#0:
   1779 ; CHECK-NEXT:    movl $7, %eax
   1780 ; CHECK-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
   1781 ; CHECK-NEXT:    retl
   1782   %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
   1783   ret <4 x float> %res
   1784 }
   1785 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
   1786 
   1787 
   1788 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
   1789 ; CHECK-LABEL: test_x86_sse_cvtss2si:
   1790 ; CHECK:       ## BB#0:
   1791 ; CHECK-NEXT:    vcvtss2si %xmm0, %eax
   1792 ; CHECK-NEXT:    retl
   1793   %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
   1794   ret i32 %res
   1795 }
   1796 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
   1797 
   1798 
   1799 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
   1800 ; CHECK-LABEL: test_x86_sse_cvttss2si:
   1801 ; CHECK:       ## BB#0:
   1802 ; CHECK-NEXT:    vcvttss2si %xmm0, %eax
   1803 ; CHECK-NEXT:    retl
   1804   %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
   1805   ret i32 %res
   1806 }
   1807 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
   1808 
   1809 
   1810 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
   1811 ; CHECK-LABEL: test_x86_sse_div_ss:
   1812 ; CHECK:       ## BB#0:
   1813 ; CHECK-NEXT:    vdivss %xmm1, %xmm0, %xmm0
   1814 ; CHECK-NEXT:    retl
   1815   %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1816   ret <4 x float> %res
   1817 }
   1818 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
   1819 
   1820 
   1821 define void @test_x86_sse_ldmxcsr(i8* %a0) {
   1822 ; CHECK-LABEL: test_x86_sse_ldmxcsr:
   1823 ; CHECK:       ## BB#0:
   1824 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1825 ; CHECK-NEXT:    vldmxcsr (%eax)
   1826 ; CHECK-NEXT:    retl
   1827   call void @llvm.x86.sse.ldmxcsr(i8* %a0)
   1828   ret void
   1829 }
   1830 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
   1831 
   1832 
   1833 
   1834 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
   1835 ; CHECK-LABEL: test_x86_sse_max_ps:
   1836 ; CHECK:       ## BB#0:
   1837 ; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
   1838 ; CHECK-NEXT:    retl
   1839   %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1840   ret <4 x float> %res
   1841 }
   1842 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
   1843 
   1844 
   1845 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
   1846 ; CHECK-LABEL: test_x86_sse_max_ss:
   1847 ; CHECK:       ## BB#0:
   1848 ; CHECK-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
   1849 ; CHECK-NEXT:    retl
   1850   %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1851   ret <4 x float> %res
   1852 }
   1853 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
   1854 
   1855 
   1856 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
   1857 ; CHECK-LABEL: test_x86_sse_min_ps:
   1858 ; CHECK:       ## BB#0:
   1859 ; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0
   1860 ; CHECK-NEXT:    retl
   1861   %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1862   ret <4 x float> %res
   1863 }
   1864 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
   1865 
   1866 
   1867 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
   1868 ; CHECK-LABEL: test_x86_sse_min_ss:
   1869 ; CHECK:       ## BB#0:
   1870 ; CHECK-NEXT:    vminss %xmm1, %xmm0, %xmm0
   1871 ; CHECK-NEXT:    retl
   1872   %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1873   ret <4 x float> %res
   1874 }
   1875 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
   1876 
   1877 
   1878 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
   1879 ; CHECK-LABEL: test_x86_sse_movmsk_ps:
   1880 ; CHECK:       ## BB#0:
   1881 ; CHECK-NEXT:    vmovmskps %xmm0, %eax
   1882 ; CHECK-NEXT:    retl
   1883   %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
   1884   ret i32 %res
   1885 }
   1886 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
   1887 
   1888 
   1889 
   1890 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
   1891 ; CHECK-LABEL: test_x86_sse_mul_ss:
   1892 ; CHECK:       ## BB#0:
   1893 ; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0
   1894 ; CHECK-NEXT:    retl
   1895   %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1896   ret <4 x float> %res
   1897 }
   1898 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
   1899 
   1900 
   1901 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
   1902 ; CHECK-LABEL: test_x86_sse_rcp_ps:
   1903 ; CHECK:       ## BB#0:
   1904 ; CHECK-NEXT:    vrcpps %xmm0, %xmm0
   1905 ; CHECK-NEXT:    retl
   1906   %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1907   ret <4 x float> %res
   1908 }
   1909 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
   1910 
   1911 
   1912 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
   1913 ; CHECK-LABEL: test_x86_sse_rcp_ss:
   1914 ; CHECK:       ## BB#0:
   1915 ; CHECK-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
   1916 ; CHECK-NEXT:    retl
   1917   %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1918   ret <4 x float> %res
   1919 }
   1920 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
   1921 
   1922 
   1923 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
   1924 ; CHECK-LABEL: test_x86_sse_rsqrt_ps:
   1925 ; CHECK:       ## BB#0:
   1926 ; CHECK-NEXT:    vrsqrtps %xmm0, %xmm0
   1927 ; CHECK-NEXT:    retl
   1928   %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1929   ret <4 x float> %res
   1930 }
   1931 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
   1932 
   1933 
   1934 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
   1935 ; CHECK-LABEL: test_x86_sse_rsqrt_ss:
   1936 ; CHECK:       ## BB#0:
   1937 ; CHECK-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
   1938 ; CHECK-NEXT:    retl
   1939   %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1940   ret <4 x float> %res
   1941 }
   1942 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
   1943 
   1944 
   1945 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
   1946 ; CHECK-LABEL: test_x86_sse_sqrt_ps:
   1947 ; CHECK:       ## BB#0:
   1948 ; CHECK-NEXT:    vsqrtps %xmm0, %xmm0
   1949 ; CHECK-NEXT:    retl
   1950   %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1951   ret <4 x float> %res
   1952 }
   1953 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
   1954 
   1955 
   1956 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
   1957 ; CHECK-LABEL: test_x86_sse_sqrt_ss:
   1958 ; CHECK:       ## BB#0:
   1959 ; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
   1960 ; CHECK-NEXT:    retl
   1961   %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1962   ret <4 x float> %res
   1963 }
   1964 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
   1965 
   1966 
   1967 define void @test_x86_sse_stmxcsr(i8* %a0) {
   1968 ; CHECK-LABEL: test_x86_sse_stmxcsr:
   1969 ; CHECK:       ## BB#0:
   1970 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1971 ; CHECK-NEXT:    vstmxcsr (%eax)
   1972 ; CHECK-NEXT:    retl
   1973   call void @llvm.x86.sse.stmxcsr(i8* %a0)
   1974   ret void
   1975 }
   1976 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
   1977 
   1978 
   1979 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
   1980 ; CHECK-LABEL: test_x86_sse_storeu_ps:
   1981 ; CHECK:       ## BB#0:
   1982 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1983 ; CHECK-NEXT:    vmovups %xmm0, (%eax)
   1984 ; CHECK-NEXT:    retl
   1985   call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
   1986   ret void
   1987 }
   1988 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
   1989 
   1990 
   1991 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
   1992 ; CHECK-LABEL: test_x86_sse_sub_ss:
   1993 ; CHECK:       ## BB#0:
   1994 ; CHECK-NEXT:    vsubss %xmm1, %xmm0, %xmm0
   1995 ; CHECK-NEXT:    retl
   1996   %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1997   ret <4 x float> %res
   1998 }
   1999 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
   2000 
   2001 
   2002 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
   2003 ; CHECK-LABEL: test_x86_sse_ucomieq_ss:
   2004 ; CHECK:       ## BB#0:
   2005 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2006 ; CHECK-NEXT:    sete %al
   2007 ; CHECK-NEXT:    movzbl %al, %eax
   2008 ; CHECK-NEXT:    retl
   2009   %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2010   ret i32 %res
   2011 }
   2012 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
   2013 
   2014 
   2015 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
   2016 ; CHECK-LABEL: test_x86_sse_ucomige_ss:
   2017 ; CHECK:       ## BB#0:
   2018 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2019 ; CHECK-NEXT:    setae %al
   2020 ; CHECK-NEXT:    movzbl %al, %eax
   2021 ; CHECK-NEXT:    retl
   2022   %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2023   ret i32 %res
   2024 }
   2025 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
   2026 
   2027 
   2028 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
   2029 ; CHECK-LABEL: test_x86_sse_ucomigt_ss:
   2030 ; CHECK:       ## BB#0:
   2031 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2032 ; CHECK-NEXT:    seta %al
   2033 ; CHECK-NEXT:    movzbl %al, %eax
   2034 ; CHECK-NEXT:    retl
   2035   %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2036   ret i32 %res
   2037 }
   2038 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
   2039 
   2040 
   2041 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
   2042 ; CHECK-LABEL: test_x86_sse_ucomile_ss:
   2043 ; CHECK:       ## BB#0:
   2044 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2045 ; CHECK-NEXT:    setbe %al
   2046 ; CHECK-NEXT:    movzbl %al, %eax
   2047 ; CHECK-NEXT:    retl
   2048   %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2049   ret i32 %res
   2050 }
   2051 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
   2052 
   2053 
   2054 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
   2055 ; CHECK-LABEL: test_x86_sse_ucomilt_ss:
   2056 ; CHECK:       ## BB#0:
   2057 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2058 ; CHECK-NEXT:    sbbl %eax, %eax
   2059 ; CHECK-NEXT:    andl $1, %eax
   2060 ; CHECK-NEXT:    retl
   2061   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2062   ret i32 %res
   2063 }
   2064 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
   2065 
   2066 
   2067 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
   2068 ; CHECK-LABEL: test_x86_sse_ucomineq_ss:
   2069 ; CHECK:       ## BB#0:
   2070 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2071 ; CHECK-NEXT:    setne %al
   2072 ; CHECK-NEXT:    movzbl %al, %eax
   2073 ; CHECK-NEXT:    retl
   2074   %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2075   ret i32 %res
   2076 }
   2077 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
   2078 
   2079 
   2080 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
   2081 ; CHECK-LABEL: test_x86_ssse3_pabs_b_128:
   2082 ; CHECK:       ## BB#0:
   2083 ; CHECK-NEXT:    vpabsb %xmm0, %xmm0
   2084 ; CHECK-NEXT:    retl
   2085   %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
   2086   ret <16 x i8> %res
   2087 }
   2088 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
   2089 
   2090 
   2091 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
   2092 ; CHECK-LABEL: test_x86_ssse3_pabs_d_128:
   2093 ; CHECK:       ## BB#0:
   2094 ; CHECK-NEXT:    vpabsd %xmm0, %xmm0
   2095 ; CHECK-NEXT:    retl
   2096   %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
   2097   ret <4 x i32> %res
   2098 }
   2099 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
   2100 
   2101 
   2102 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
   2103 ; CHECK-LABEL: test_x86_ssse3_pabs_w_128:
   2104 ; CHECK:       ## BB#0:
   2105 ; CHECK-NEXT:    vpabsw %xmm0, %xmm0
   2106 ; CHECK-NEXT:    retl
   2107   %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
   2108   ret <8 x i16> %res
   2109 }
   2110 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
   2111 
   2112 
   2113 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   2114 ; CHECK-LABEL: test_x86_ssse3_phadd_d_128:
   2115 ; CHECK:       ## BB#0:
   2116 ; CHECK-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
   2117 ; CHECK-NEXT:    retl
   2118   %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   2119   ret <4 x i32> %res
   2120 }
   2121 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   2122 
   2123 
   2124 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   2125 ; CHECK-LABEL: test_x86_ssse3_phadd_sw_128:
   2126 ; CHECK:       ## BB#0:
   2127 ; CHECK-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0
   2128 ; CHECK-NEXT:    retl
   2129   %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2130   ret <8 x i16> %res
   2131 }
   2132 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   2133 
   2134 
   2135 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   2136 ; CHECK-LABEL: test_x86_ssse3_phadd_w_128:
   2137 ; CHECK:       ## BB#0:
   2138 ; CHECK-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
   2139 ; CHECK-NEXT:    retl
   2140   %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2141   ret <8 x i16> %res
   2142 }
   2143 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   2144 
   2145 
   2146 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   2147 ; CHECK-LABEL: test_x86_ssse3_phsub_d_128:
   2148 ; CHECK:       ## BB#0:
   2149 ; CHECK-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
   2150 ; CHECK-NEXT:    retl
   2151   %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   2152   ret <4 x i32> %res
   2153 }
   2154 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   2155 
   2156 
   2157 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   2158 ; CHECK-LABEL: test_x86_ssse3_phsub_sw_128:
   2159 ; CHECK:       ## BB#0:
   2160 ; CHECK-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0
   2161 ; CHECK-NEXT:    retl
   2162   %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2163   ret <8 x i16> %res
   2164 }
   2165 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   2166 
   2167 
   2168 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   2169 ; CHECK-LABEL: test_x86_ssse3_phsub_w_128:
   2170 ; CHECK:       ## BB#0:
   2171 ; CHECK-NEXT:    vphsubw %xmm1, %xmm0, %xmm0
   2172 ; CHECK-NEXT:    retl
   2173   %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2174   ret <8 x i16> %res
   2175 }
   2176 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   2177 
   2178 
   2179 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
   2180 ; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
   2181 ; CHECK:       ## BB#0:
   2182 ; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0
   2183 ; CHECK-NEXT:    retl
   2184   %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
   2185   ret <8 x i16> %res
   2186 }
   2187 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
   2188 
   2189 
   2190 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   2191 ; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128:
   2192 ; CHECK:       ## BB#0:
   2193 ; CHECK-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0
   2194 ; CHECK-NEXT:    retl
   2195   %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2196   ret <8 x i16> %res
   2197 }
   2198 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   2199 
   2200 
   2201 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   2202 ; CHECK-LABEL: test_x86_ssse3_pshuf_b_128:
   2203 ; CHECK:       ## BB#0:
   2204 ; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   2205 ; CHECK-NEXT:    retl
   2206   %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   2207   ret <16 x i8> %res
   2208 }
   2209 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   2210 
   2211 
   2212 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   2213 ; CHECK-LABEL: test_x86_ssse3_psign_b_128:
   2214 ; CHECK:       ## BB#0:
   2215 ; CHECK-NEXT:    vpsignb %xmm1, %xmm0, %xmm0
   2216 ; CHECK-NEXT:    retl
   2217   %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   2218   ret <16 x i8> %res
   2219 }
   2220 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   2221 
   2222 
   2223 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   2224 ; CHECK-LABEL: test_x86_ssse3_psign_d_128:
   2225 ; CHECK:       ## BB#0:
   2226 ; CHECK-NEXT:    vpsignd %xmm1, %xmm0, %xmm0
   2227 ; CHECK-NEXT:    retl
   2228   %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   2229   ret <4 x i32> %res
   2230 }
   2231 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   2232 
   2233 
   2234 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   2235 ; CHECK-LABEL: test_x86_ssse3_psign_w_128:
   2236 ; CHECK:       ## BB#0:
   2237 ; CHECK-NEXT:    vpsignw %xmm1, %xmm0, %xmm0
   2238 ; CHECK-NEXT:    retl
   2239   %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2240   ret <8 x i16> %res
   2241 }
   2242 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   2243 
   2244 
   2245 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2246 ; CHECK-LABEL: test_x86_avx_addsub_pd_256:
   2247 ; CHECK:       ## BB#0:
   2248 ; CHECK-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
   2249 ; CHECK-NEXT:    retl
   2250   %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2251   ret <4 x double> %res
   2252 }
   2253 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2254 
   2255 
   2256 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2257 ; CHECK-LABEL: test_x86_avx_addsub_ps_256:
   2258 ; CHECK:       ## BB#0:
   2259 ; CHECK-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
   2260 ; CHECK-NEXT:    retl
   2261   %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2262   ret <8 x float> %res
   2263 }
   2264 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2265 
   2266 
   2267 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   2268 ; CHECK-LABEL: test_x86_avx_blendv_pd_256:
   2269 ; CHECK:       ## BB#0:
   2270 ; CHECK-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
   2271 ; CHECK-NEXT:    retl
   2272   %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
   2273   ret <4 x double> %res
   2274 }
   2275 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
   2276 
   2277 
   2278 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   2279 ; CHECK-LABEL: test_x86_avx_blendv_ps_256:
   2280 ; CHECK:       ## BB#0:
   2281 ; CHECK-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
   2282 ; CHECK-NEXT:    retl
   2283   %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
   2284   ret <8 x float> %res
   2285 }
   2286 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
   2287 
   2288 
   2289 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2290 ; CHECK-LABEL: test_x86_avx_cmp_pd_256:
   2291 ; CHECK:       ## BB#0:
   2292 ; CHECK-NEXT:    vcmpordpd %ymm1, %ymm0, %ymm0
   2293 ; CHECK-NEXT:    retl
   2294   %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   2295   ret <4 x double> %res
   2296 }
   2297 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
   2298 
   2299 
   2300 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2301 ; CHECK-LABEL: test_x86_avx_cmp_ps_256:
   2302 ; CHECK:       ## BB#0:
   2303 ; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm0
   2304 ; CHECK-NEXT:    retl
   2305   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2306   ret <8 x float> %res
   2307 }
   2308 
   2309 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
   2310 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
   2311 ; CHECK:       ## BB#0:
   2312 ; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1
   2313 ; CHECK-NEXT:    vcmpltps %ymm1, %ymm0, %ymm1
   2314 ; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %ymm1
   2315 ; CHECK-NEXT:    vcmpunordps %ymm1, %ymm0, %ymm1
   2316 ; CHECK-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm1
   2317 ; CHECK-NEXT:    vcmpnltps %ymm1, %ymm0, %ymm1
   2318 ; CHECK-NEXT:    vcmpnleps %ymm1, %ymm0, %ymm1
   2319 ; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm1
   2320 ; CHECK-NEXT:    vcmpeq_uqps %ymm1, %ymm0, %ymm1
   2321 ; CHECK-NEXT:    vcmpngeps %ymm1, %ymm0, %ymm1
   2322 ; CHECK-NEXT:    vcmpngtps %ymm1, %ymm0, %ymm1
   2323 ; CHECK-NEXT:    vcmpfalseps %ymm1, %ymm0, %ymm1
   2324 ; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %ymm1
   2325 ; CHECK-NEXT:    vcmpgeps %ymm1, %ymm0, %ymm1
   2326 ; CHECK-NEXT:    vcmpgtps %ymm1, %ymm0, %ymm1
   2327 ; CHECK-NEXT:    vcmptrueps %ymm1, %ymm0, %ymm1
   2328 ; CHECK-NEXT:    vcmpeq_osps %ymm1, %ymm0, %ymm1
   2329 ; CHECK-NEXT:    vcmplt_oqps %ymm1, %ymm0, %ymm1
   2330 ; CHECK-NEXT:    vcmple_oqps %ymm1, %ymm0, %ymm1
   2331 ; CHECK-NEXT:    vcmpunord_sps %ymm1, %ymm0, %ymm1
   2332 ; CHECK-NEXT:    vcmpneq_usps %ymm1, %ymm0, %ymm1
   2333 ; CHECK-NEXT:    vcmpnlt_uqps %ymm1, %ymm0, %ymm1
   2334 ; CHECK-NEXT:    vcmpnle_uqps %ymm1, %ymm0, %ymm1
   2335 ; CHECK-NEXT:    vcmpord_sps %ymm1, %ymm0, %ymm1
   2336 ; CHECK-NEXT:    vcmpeq_usps %ymm1, %ymm0, %ymm1
   2337 ; CHECK-NEXT:    vcmpnge_uqps %ymm1, %ymm0, %ymm1
   2338 ; CHECK-NEXT:    vcmpngt_uqps %ymm1, %ymm0, %ymm1
   2339 ; CHECK-NEXT:    vcmpfalse_osps %ymm1, %ymm0, %ymm1
   2340 ; CHECK-NEXT:    vcmpneq_osps %ymm1, %ymm0, %ymm1
   2341 ; CHECK-NEXT:    vcmpge_oqps %ymm1, %ymm0, %ymm1
   2342 ; CHECK-NEXT:    vcmpgt_oqps %ymm1, %ymm0, %ymm1
   2343 ; CHECK-NEXT:    vcmptrue_usps %ymm1, %ymm0, %ymm0
   2344 ; CHECK-NEXT:    retl
   2345   %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
   2346   %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
   2347   %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
   2348   %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
   2349   %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
   2350   %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
   2351   %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
   2352   %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
   2353   %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
   2354   %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
   2355   %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
   2356   %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
   2357   %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
   2358   %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
   2359   %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
   2360   %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
   2361   %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
   2362   %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
   2363   %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
   2364   %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
   2365   %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
   2366   %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
   2367   %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
   2368   %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
   2369   %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
   2370   %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
   2371   %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
   2372   %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
   2373   %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
   2374   %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
   2375   %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
   2376   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
   2377   ret <8 x float> %res
   2378 }
   2379 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   2380 
   2381 
   2382 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
   2383 ; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256:
   2384 ; CHECK:       ## BB#0:
   2385 ; CHECK-NEXT:    vcvtpd2psy %ymm0, %xmm0
   2386 ; CHECK-NEXT:    vzeroupper
   2387 ; CHECK-NEXT:    retl
   2388   %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
   2389   ret <4 x float> %res
   2390 }
   2391 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
   2392 
   2393 
   2394 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
   2395 ; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256:
   2396 ; CHECK:       ## BB#0:
   2397 ; CHECK-NEXT:    vcvtpd2dqy %ymm0, %xmm0
   2398 ; CHECK-NEXT:    vzeroupper
   2399 ; CHECK-NEXT:    retl
   2400   %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
   2401   ret <4 x i32> %res
   2402 }
   2403 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
   2404 
   2405 
   2406 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
   2407 ; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
   2408 ; CHECK:       ## BB#0:
   2409 ; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0
   2410 ; CHECK-NEXT:    retl
   2411   %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
   2412   ret <4 x double> %res
   2413 }
   2414 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
   2415 
   2416 
   2417 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
   2418 ; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
   2419 ; CHECK:       ## BB#0:
   2420 ; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm0
   2421 ; CHECK-NEXT:    retl
   2422   %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
   2423   ret <8 x i32> %res
   2424 }
   2425 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
   2426 
   2427 
   2428 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
   2429 ; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
   2430 ; CHECK:       ## BB#0:
   2431 ; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
   2432 ; CHECK-NEXT:    retl
   2433   %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
   2434   ret <4 x double> %res
   2435 }
   2436 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
   2437 
   2438 
   2439 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
   2440 ; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
   2441 ; CHECK:       ## BB#0:
   2442 ; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
   2443 ; CHECK-NEXT:    retl
   2444   %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
   2445   ret <8 x float> %res
   2446 }
   2447 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
   2448 
   2449 
   2450 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
   2451 ; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
   2452 ; CHECK:       ## BB#0:
   2453 ; CHECK-NEXT:    vcvttpd2dqy %ymm0, %xmm0
   2454 ; CHECK-NEXT:    vzeroupper
   2455 ; CHECK-NEXT:    retl
   2456   %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
   2457   ret <4 x i32> %res
   2458 }
   2459 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
   2460 
   2461 
   2462 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
   2463 ; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
   2464 ; CHECK:       ## BB#0:
   2465 ; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
   2466 ; CHECK-NEXT:    retl
   2467   %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
   2468   ret <8 x i32> %res
   2469 }
   2470 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
   2471 
   2472 
   2473 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2474 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
   2475 ; CHECK:       ## BB#0:
   2476 ; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0
   2477 ; CHECK-NEXT:    retl
   2478   %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2479   ret <8 x float> %res
   2480 }
   2481 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   2482 
   2483 
   2484 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2485 ; CHECK-LABEL: test_x86_avx_hadd_pd_256:
   2486 ; CHECK:       ## BB#0:
   2487 ; CHECK-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
   2488 ; CHECK-NEXT:    retl
   2489   %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2490   ret <4 x double> %res
   2491 }
   2492 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2493 
   2494 
   2495 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2496 ; CHECK-LABEL: test_x86_avx_hadd_ps_256:
   2497 ; CHECK:       ## BB#0:
   2498 ; CHECK-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
   2499 ; CHECK-NEXT:    retl
   2500   %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2501   ret <8 x float> %res
   2502 }
   2503 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2504 
   2505 
   2506 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2507 ; CHECK-LABEL: test_x86_avx_hsub_pd_256:
   2508 ; CHECK:       ## BB#0:
   2509 ; CHECK-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
   2510 ; CHECK-NEXT:    retl
   2511   %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2512   ret <4 x double> %res
   2513 }
   2514 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2515 
   2516 
   2517 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2518 ; CHECK-LABEL: test_x86_avx_hsub_ps_256:
   2519 ; CHECK:       ## BB#0:
   2520 ; CHECK-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
   2521 ; CHECK-NEXT:    retl
   2522   %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2523   ret <8 x float> %res
   2524 }
   2525 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2526 
   2527 
   2528 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
   2529 ; CHECK-LABEL: test_x86_avx_ldu_dq_256:
   2530 ; CHECK:       ## BB#0:
   2531 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2532 ; CHECK-NEXT:    vlddqu (%eax), %ymm0
   2533 ; CHECK-NEXT:    retl
   2534   %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
   2535   ret <32 x i8> %res
   2536 }
   2537 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
   2538 
   2539 
   2540 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
   2541 ; CHECK-LABEL: test_x86_avx_maskload_pd:
   2542 ; CHECK:       ## BB#0:
   2543 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2544 ; CHECK-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0
   2545 ; CHECK-NEXT:    retl
   2546   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
   2547   ret <2 x double> %res
   2548 }
   2549 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
   2550 
   2551 
   2552 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
   2553 ; CHECK-LABEL: test_x86_avx_maskload_pd_256:
   2554 ; CHECK:       ## BB#0:
   2555 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2556 ; CHECK-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0
   2557 ; CHECK-NEXT:    retl
   2558   %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
   2559   ret <4 x double> %res
   2560 }
   2561 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
   2562 
   2563 
   2564 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
   2565 ; CHECK-LABEL: test_x86_avx_maskload_ps:
   2566 ; CHECK:       ## BB#0:
   2567 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2568 ; CHECK-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0
   2569 ; CHECK-NEXT:    retl
   2570   %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
   2571   ret <4 x float> %res
   2572 }
   2573 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
   2574 
   2575 
   2576 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
   2577 ; CHECK-LABEL: test_x86_avx_maskload_ps_256:
   2578 ; CHECK:       ## BB#0:
   2579 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2580 ; CHECK-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0
   2581 ; CHECK-NEXT:    retl
   2582   %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
   2583   ret <8 x float> %res
   2584 }
   2585 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
   2586 
   2587 
   2588 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
   2589 ; CHECK-LABEL: test_x86_avx_maskstore_pd:
   2590 ; CHECK:       ## BB#0:
   2591 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2592 ; CHECK-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax)
   2593 ; CHECK-NEXT:    retl
   2594   call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
   2595   ret void
   2596 }
   2597 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
   2598 
   2599 
   2600 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
   2601 ; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
   2602 ; CHECK:       ## BB#0:
   2603 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2604 ; CHECK-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax)
   2605 ; CHECK-NEXT:    vzeroupper
   2606 ; CHECK-NEXT:    retl
   2607   call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
   2608   ret void
   2609 }
   2610 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
   2611 
   2612 
   2613 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
   2614 ; CHECK-LABEL: test_x86_avx_maskstore_ps:
   2615 ; CHECK:       ## BB#0:
   2616 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2617 ; CHECK-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax)
   2618 ; CHECK-NEXT:    retl
   2619   call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
   2620   ret void
   2621 }
   2622 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
   2623 
   2624 
   2625 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
   2626 ; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
   2627 ; CHECK:       ## BB#0:
   2628 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2629 ; CHECK-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax)
   2630 ; CHECK-NEXT:    vzeroupper
   2631 ; CHECK-NEXT:    retl
   2632   call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
   2633   ret void
   2634 }
   2635 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
   2636 
   2637 
   2638 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2639 ; CHECK-LABEL: test_x86_avx_max_pd_256:
   2640 ; CHECK:       ## BB#0:
   2641 ; CHECK-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
   2642 ; CHECK-NEXT:    retl
   2643   %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2644   ret <4 x double> %res
   2645 }
   2646 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2647 
   2648 
   2649 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2650 ; CHECK-LABEL: test_x86_avx_max_ps_256:
   2651 ; CHECK:       ## BB#0:
   2652 ; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
   2653 ; CHECK-NEXT:    retl
   2654   %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2655   ret <8 x float> %res
   2656 }
   2657 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2658 
   2659 
   2660 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2661 ; CHECK-LABEL: test_x86_avx_min_pd_256:
   2662 ; CHECK:       ## BB#0:
   2663 ; CHECK-NEXT:    vminpd %ymm1, %ymm0, %ymm0
   2664 ; CHECK-NEXT:    retl
   2665   %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2666   ret <4 x double> %res
   2667 }
   2668 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2669 
   2670 
   2671 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2672 ; CHECK-LABEL: test_x86_avx_min_ps_256:
   2673 ; CHECK:       ## BB#0:
   2674 ; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0
   2675 ; CHECK-NEXT:    retl
   2676   %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2677   ret <8 x float> %res
   2678 }
   2679 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2680 
   2681 
   2682 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
   2683 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
   2684 ; CHECK:       ## BB#0:
   2685 ; CHECK-NEXT:    vmovmskpd %ymm0, %eax
   2686 ; CHECK-NEXT:    vzeroupper
   2687 ; CHECK-NEXT:    retl
   2688   %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
   2689   ret i32 %res
   2690 }
   2691 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
   2692 
   2693 
   2694 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
   2695 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
   2696 ; CHECK:       ## BB#0:
   2697 ; CHECK-NEXT:    vmovmskps %ymm0, %eax
   2698 ; CHECK-NEXT:    vzeroupper
   2699 ; CHECK-NEXT:    retl
   2700   %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
   2701   ret i32 %res
   2702 }
   2703 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
   2704 
   2705 
   2706 
   2707 
   2708 
   2709 
   2710 
   2711 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   2712 ; CHECK-LABEL: test_x86_avx_ptestc_256:
   2713 ; CHECK:       ## BB#0:
   2714 ; CHECK-NEXT:    vptest %ymm1, %ymm0
   2715 ; CHECK-NEXT:    sbbl %eax, %eax
   2716 ; CHECK-NEXT:    andl $1, %eax
   2717 ; CHECK-NEXT:    vzeroupper
   2718 ; CHECK-NEXT:    retl
   2719   %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2720   ret i32 %res
   2721 }
   2722 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
   2723 
   2724 
   2725 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
   2726 ; CHECK-LABEL: test_x86_avx_ptestnzc_256:
   2727 ; CHECK:       ## BB#0:
   2728 ; CHECK-NEXT:    vptest %ymm1, %ymm0
   2729 ; CHECK-NEXT:    seta %al
   2730 ; CHECK-NEXT:    movzbl %al, %eax
   2731 ; CHECK-NEXT:    vzeroupper
   2732 ; CHECK-NEXT:    retl
   2733   %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2734   ret i32 %res
   2735 }
   2736 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
   2737 
   2738 
   2739 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
   2740 ; CHECK-LABEL: test_x86_avx_ptestz_256:
   2741 ; CHECK:       ## BB#0:
   2742 ; CHECK-NEXT:    vptest %ymm1, %ymm0
   2743 ; CHECK-NEXT:    sete %al
   2744 ; CHECK-NEXT:    movzbl %al, %eax
   2745 ; CHECK-NEXT:    vzeroupper
   2746 ; CHECK-NEXT:    retl
   2747   %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2748   ret i32 %res
   2749 }
   2750 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
   2751 
   2752 
   2753 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
   2754 ; CHECK-LABEL: test_x86_avx_rcp_ps_256:
   2755 ; CHECK:       ## BB#0:
   2756 ; CHECK-NEXT:    vrcpps %ymm0, %ymm0
   2757 ; CHECK-NEXT:    retl
   2758   %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2759   ret <8 x float> %res
   2760 }
   2761 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
   2762 
   2763 
   2764 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
   2765 ; CHECK-LABEL: test_x86_avx_round_pd_256:
   2766 ; CHECK:       ## BB#0:
   2767 ; CHECK-NEXT:    vroundpd $7, %ymm0, %ymm0
   2768 ; CHECK-NEXT:    retl
   2769   %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
   2770   ret <4 x double> %res
   2771 }
   2772 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
   2773 
   2774 
   2775 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
   2776 ; CHECK-LABEL: test_x86_avx_round_ps_256:
   2777 ; CHECK:       ## BB#0:
   2778 ; CHECK-NEXT:    vroundps $7, %ymm0, %ymm0
   2779 ; CHECK-NEXT:    retl
   2780   %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
   2781   ret <8 x float> %res
   2782 }
   2783 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
   2784 
   2785 
   2786 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
   2787 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
   2788 ; CHECK:       ## BB#0:
   2789 ; CHECK-NEXT:    vrsqrtps %ymm0, %ymm0
   2790 ; CHECK-NEXT:    retl
   2791   %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2792   ret <8 x float> %res
   2793 }
   2794 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
   2795 
   2796 
   2797 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
   2798 ; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
   2799 ; CHECK:       ## BB#0:
   2800 ; CHECK-NEXT:    vsqrtpd %ymm0, %ymm0
   2801 ; CHECK-NEXT:    retl
   2802   %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
   2803   ret <4 x double> %res
   2804 }
   2805 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
   2806 
   2807 
   2808 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
   2809 ; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
   2810 ; CHECK:       ## BB#0:
   2811 ; CHECK-NEXT:    vsqrtps %ymm0, %ymm0
   2812 ; CHECK-NEXT:    retl
   2813   %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2814   ret <8 x float> %res
   2815 }
   2816 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
   2817 
   2818 
   2819 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
   2820   ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
   2821   ; add operation forces the execution domain.
   2822 ; CHECK-LABEL: test_x86_avx_storeu_dq_256:
   2823 ; CHECK:       ## BB#0:
   2824 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2825 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2826 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   2827 ; CHECK-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
   2828 ; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
   2829 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2830 ; CHECK-NEXT:    vmovups %ymm0, (%eax)
   2831 ; CHECK-NEXT:    vzeroupper
   2832 ; CHECK-NEXT:    retl
   2833   %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   2834   call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
   2835   ret void
   2836 }
   2837 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
   2838 
   2839 
   2840 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
   2841   ; add operation forces the execution domain.
   2842 ; CHECK-LABEL: test_x86_avx_storeu_pd_256:
   2843 ; CHECK:       ## BB#0:
   2844 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2845 ; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
   2846 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
   2847 ; CHECK-NEXT:    vmovupd %ymm0, (%eax)
   2848 ; CHECK-NEXT:    vzeroupper
   2849 ; CHECK-NEXT:    retl
   2850   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
   2851   call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
   2852   ret void
   2853 }
   2854 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
   2855 
   2856 
   2857 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
   2858 ; CHECK-LABEL: test_x86_avx_storeu_ps_256:
   2859 ; CHECK:       ## BB#0:
   2860 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2861 ; CHECK-NEXT:    vmovups %ymm0, (%eax)
   2862 ; CHECK-NEXT:    vzeroupper
   2863 ; CHECK-NEXT:    retl
   2864   call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
   2865   ret void
   2866 }
   2867 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
   2868 
   2869 
   2870 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
   2871 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256:
   2872 ; CHECK:       ## BB#0:
   2873 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2874 ; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
   2875 ; CHECK-NEXT:    retl
   2876   %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
   2877   ret <4 x double> %res
   2878 }
   2879 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
   2880 
   2881 
   2882 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
   2883 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256:
   2884 ; CHECK:       ## BB#0:
   2885 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2886 ; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
   2887 ; CHECK-NEXT:    retl
   2888   %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
   2889   ret <8 x float> %res
   2890 }
   2891 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
   2892 
   2893 
   2894 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2895 ; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
   2896 ; CHECK:       ## BB#0:
   2897 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
   2898 ; CHECK-NEXT:    retl
   2899   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   2900   ret <4 x double> %res
   2901 }
   2902 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
   2903 
   2904 
   2905 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2906 ; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
   2907 ; CHECK:       ## BB#0:
   2908 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
   2909 ; CHECK-NEXT:    retl
   2910   %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2911   ret <8 x float> %res
   2912 }
   2913 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   2914 
   2915 
   2916 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
   2917 ; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
   2918 ; CHECK:       ## BB#0:
   2919 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
   2920 ; CHECK-NEXT:    retl
   2921   %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
   2922   ret <8 x i32> %res
   2923 }
   2924 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
   2925 
   2926 
   2927 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
   2928 ; CHECK-LABEL: test_x86_avx_vpermil_pd:
   2929 ; CHECK:       ## BB#0:
   2930 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
   2931 ; CHECK-NEXT:    retl
   2932   %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
   2933   ret <2 x double> %res
   2934 }
   2935 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
   2936 
   2937 
   2938 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
   2939 ; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
   2940 ; CHECK:       ## BB#0:
   2941 ; CHECK-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
   2942 ; CHECK-NEXT:    retl
   2943   %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
   2944   ret <4 x double> %res
   2945 }
   2946 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
   2947 
   2948 
   2949 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
   2950 ; CHECK-LABEL: test_x86_avx_vpermil_ps:
   2951 ; CHECK:       ## BB#0:
   2952 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
   2953 ; CHECK-NEXT:    retl
   2954   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   2955   ret <4 x float> %res
   2956 }
   2957 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
   2958 
   2959 
   2960 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
   2961 ; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
   2962 ; CHECK:       ## BB#0:
   2963 ; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
   2964 ; CHECK-NEXT:    retl
   2965   %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
   2966   ret <8 x float> %res
   2967 }
   2968 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
   2969 
   2970 
   2971 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
   2972 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd:
   2973 ; CHECK:       ## BB#0:
   2974 ; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
   2975 ; CHECK-NEXT:    retl
   2976   %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
   2977   ret <2 x double> %res
   2978 }
   2979 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
   2980 
   2981 
   2982 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
   2983 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256:
   2984 ; CHECK:       ## BB#0:
   2985 ; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0
   2986 ; CHECK-NEXT:    retl
   2987   %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
   2988   ret <4 x double> %res
   2989 }
   2990 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
   2991 
   2992 
   2993 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
   2994 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps:
   2995 ; CHECK:       ## BB#0:
   2996 ; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
   2997 ; CHECK-NEXT:    retl
   2998   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
   2999   ret <4 x float> %res
   3000 }
   3001 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
   3002 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load:
   3003 ; CHECK:       ## BB#0:
   3004 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3005 ; CHECK-NEXT:    vpermilps (%eax), %xmm0, %xmm0
   3006 ; CHECK-NEXT:    retl
   3007   %a2 = load <4 x i32>, <4 x i32>* %a1
   3008   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
   3009   ret <4 x float> %res
   3010 }
   3011 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
   3012 
   3013 
   3014 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
   3015 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256:
   3016 ; CHECK:       ## BB#0:
   3017 ; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm0
   3018 ; CHECK-NEXT:    retl
   3019   %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
   3020   ret <8 x float> %res
   3021 }
   3022 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
   3023 
   3024 
   3025 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
   3026 ; CHECK-LABEL: test_x86_avx_vtestc_pd:
   3027 ; CHECK:       ## BB#0:
   3028 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0
   3029 ; CHECK-NEXT:    sbbl %eax, %eax
   3030 ; CHECK-NEXT:    andl $1, %eax
   3031 ; CHECK-NEXT:    retl
   3032   %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   3033   ret i32 %res
   3034 }
   3035 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
   3036 
   3037 
   3038 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   3039 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
   3040 ; CHECK:       ## BB#0:
   3041 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0
   3042 ; CHECK-NEXT:    sbbl %eax, %eax
   3043 ; CHECK-NEXT:    andl $1, %eax
   3044 ; CHECK-NEXT:    vzeroupper
   3045 ; CHECK-NEXT:    retl
   3046   %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   3047   ret i32 %res
   3048 }
   3049 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
   3050 
   3051 
   3052 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
   3053 ; CHECK-LABEL: test_x86_avx_vtestc_ps:
   3054 ; CHECK:       ## BB#0:
   3055 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
   3056 ; CHECK-NEXT:    sbbl %eax, %eax
   3057 ; CHECK-NEXT:    andl $1, %eax
   3058 ; CHECK-NEXT:    retl
   3059   %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   3060   ret i32 %res
   3061 }
   3062 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
   3063 
   3064 
   3065 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   3066 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
   3067 ; CHECK:       ## BB#0:
   3068 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
   3069 ; CHECK-NEXT:    sbbl %eax, %eax
   3070 ; CHECK-NEXT:    andl $1, %eax
   3071 ; CHECK-NEXT:    vzeroupper
   3072 ; CHECK-NEXT:    retl
   3073   %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   3074   ret i32 %res
   3075 }
   3076 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
   3077 
   3078 
   3079 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
   3080 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
   3081 ; CHECK:       ## BB#0:
   3082 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0
   3083 ; CHECK-NEXT:    seta %al
   3084 ; CHECK-NEXT:    movzbl %al, %eax
   3085 ; CHECK-NEXT:    retl
   3086   %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   3087   ret i32 %res
   3088 }
   3089 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
   3090 
   3091 
   3092 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   3093 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
   3094 ; CHECK:       ## BB#0:
   3095 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0
   3096 ; CHECK-NEXT:    seta %al
   3097 ; CHECK-NEXT:    movzbl %al, %eax
   3098 ; CHECK-NEXT:    vzeroupper
   3099 ; CHECK-NEXT:    retl
   3100   %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   3101   ret i32 %res
   3102 }
   3103 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
   3104 
   3105 
   3106 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
   3107 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
   3108 ; CHECK:       ## BB#0:
   3109 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
   3110 ; CHECK-NEXT:    seta %al
   3111 ; CHECK-NEXT:    movzbl %al, %eax
   3112 ; CHECK-NEXT:    retl
   3113   %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   3114   ret i32 %res
   3115 }
   3116 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
   3117 
   3118 
   3119 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   3120 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
   3121 ; CHECK:       ## BB#0:
   3122 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
   3123 ; CHECK-NEXT:    seta %al
   3124 ; CHECK-NEXT:    movzbl %al, %eax
   3125 ; CHECK-NEXT:    vzeroupper
   3126 ; CHECK-NEXT:    retl
   3127   %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   3128   ret i32 %res
   3129 }
   3130 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
   3131 
   3132 
   3133 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
   3134 ; CHECK-LABEL: test_x86_avx_vtestz_pd:
   3135 ; CHECK:       ## BB#0:
   3136 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0
   3137 ; CHECK-NEXT:    sete %al
   3138 ; CHECK-NEXT:    movzbl %al, %eax
   3139 ; CHECK-NEXT:    retl
   3140   %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   3141   ret i32 %res
   3142 }
   3143 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
   3144 
   3145 
   3146 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
   3147 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
   3148 ; CHECK:       ## BB#0:
   3149 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0
   3150 ; CHECK-NEXT:    sete %al
   3151 ; CHECK-NEXT:    movzbl %al, %eax
   3152 ; CHECK-NEXT:    vzeroupper
   3153 ; CHECK-NEXT:    retl
   3154   %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   3155   ret i32 %res
   3156 }
   3157 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
   3158 
   3159 
   3160 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
   3161 ; CHECK-LABEL: test_x86_avx_vtestz_ps:
   3162 ; CHECK:       ## BB#0:
   3163 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
   3164 ; CHECK-NEXT:    sete %al
   3165 ; CHECK-NEXT:    movzbl %al, %eax
   3166 ; CHECK-NEXT:    retl
   3167   %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   3168   ret i32 %res
   3169 }
   3170 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
   3171 
   3172 
   3173 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
   3174 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
   3175 ; CHECK:       ## BB#0:
   3176 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
   3177 ; CHECK-NEXT:    sete %al
   3178 ; CHECK-NEXT:    movzbl %al, %eax
   3179 ; CHECK-NEXT:    vzeroupper
   3180 ; CHECK-NEXT:    retl
   3181   %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   3182   ret i32 %res
   3183 }
   3184 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
   3185 
   3186 
   3187 define void @test_x86_avx_vzeroall() {
   3188 ; CHECK-LABEL: test_x86_avx_vzeroall:
   3189 ; CHECK:       ## BB#0:
   3190 ; CHECK-NEXT:    vzeroall
   3191 ; CHECK-NEXT:    vzeroupper
   3192 ; CHECK-NEXT:    retl
   3193   call void @llvm.x86.avx.vzeroall()
   3194   ret void
   3195 }
   3196 declare void @llvm.x86.avx.vzeroall() nounwind
   3197 
   3198 
   3199 define void @test_x86_avx_vzeroupper() {
   3200 ; CHECK-LABEL: test_x86_avx_vzeroupper:
   3201 ; CHECK:       ## BB#0:
   3202 ; CHECK-NEXT:    vzeroupper
   3203 ; CHECK-NEXT:    vzeroupper
   3204 ; CHECK-NEXT:    retl
   3205   call void @llvm.x86.avx.vzeroupper()
   3206   ret void
   3207 }
   3208 declare void @llvm.x86.avx.vzeroupper() nounwind
   3209 
   3210 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
   3211 
   3212 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
   3213 ; CHECK-LABEL: monitor:
   3214 ; CHECK:       ## BB#0:
   3215 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
   3216 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   3217 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3218 ; CHECK-NEXT:    leal (%eax), %eax
   3219 ; CHECK-NEXT:    monitor
   3220 ; CHECK-NEXT:    retl
   3221   tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
   3222   ret void
   3223 }
   3224 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
   3225 
   3226 define void @mwait(i32 %E, i32 %H) nounwind {
   3227 ; CHECK-LABEL: mwait:
   3228 ; CHECK:       ## BB#0:
   3229 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   3230 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3231 ; CHECK-NEXT:    mwait
   3232 ; CHECK-NEXT:    retl
   3233   tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
   3234   ret void
   3235 }
   3236 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
   3237 
   3238 define void @sfence() nounwind {
   3239 ; CHECK-LABEL: sfence:
   3240 ; CHECK:       ## BB#0:
   3241 ; CHECK-NEXT:    sfence
   3242 ; CHECK-NEXT:    retl
   3243   tail call void @llvm.x86.sse.sfence()
   3244   ret void
   3245 }
   3246 declare void @llvm.x86.sse.sfence() nounwind
   3247 
   3248 define void @lfence() nounwind {
   3249 ; CHECK-LABEL: lfence:
   3250 ; CHECK:       ## BB#0:
   3251 ; CHECK-NEXT:    lfence
   3252 ; CHECK-NEXT:    retl
   3253   tail call void @llvm.x86.sse2.lfence()
   3254   ret void
   3255 }
   3256 declare void @llvm.x86.sse2.lfence() nounwind
   3257 
   3258 define void @mfence() nounwind {
   3259 ; CHECK-LABEL: mfence:
   3260 ; CHECK:       ## BB#0:
   3261 ; CHECK-NEXT:    mfence
   3262 ; CHECK-NEXT:    retl
   3263   tail call void @llvm.x86.sse2.mfence()
   3264   ret void
   3265 }
   3266 declare void @llvm.x86.sse2.mfence() nounwind
   3267 
   3268 define void @clflush(i8* %p) nounwind {
   3269 ; CHECK-LABEL: clflush:
   3270 ; CHECK:       ## BB#0:
   3271 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3272 ; CHECK-NEXT:    clflush (%eax)
   3273 ; CHECK-NEXT:    retl
   3274   tail call void @llvm.x86.sse2.clflush(i8* %p)
   3275   ret void
   3276 }
   3277 declare void @llvm.x86.sse2.clflush(i8*) nounwind
   3278 
   3279 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
   3280 ; CHECK-LABEL: crc32_32_8:
   3281 ; CHECK:       ## BB#0:
   3282 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3283 ; CHECK-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
   3284 ; CHECK-NEXT:    retl
   3285   %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
   3286   ret i32 %tmp
   3287 }
   3288 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
   3289 
   3290 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
   3291 ; CHECK-LABEL: crc32_32_16:
   3292 ; CHECK:       ## BB#0:
   3293 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3294 ; CHECK-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
   3295 ; CHECK-NEXT:    retl
   3296   %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
   3297   ret i32 %tmp
   3298 }
   3299 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
   3300 
   3301 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
   3302 ; CHECK-LABEL: crc32_32_32:
   3303 ; CHECK:       ## BB#0:
   3304 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3305 ; CHECK-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
   3306 ; CHECK-NEXT:    retl
   3307   %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
   3308   ret i32 %tmp
   3309 }
   3310 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
   3311 
   3312 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
   3313 ; CHECK-LABEL: movnt_dq:
   3314 ; CHECK:       ## BB#0:
   3315 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3316 ; CHECK-NEXT:    vpaddq LCPI276_0, %xmm0, %xmm0
   3317 ; CHECK-NEXT:    vmovntdq %ymm0, (%eax)
   3318 ; CHECK-NEXT:    vzeroupper
   3319 ; CHECK-NEXT:    retl
   3320   %a2 = add <2 x i64> %a1, <i64 1, i64 1>
   3321   %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   3322   tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
   3323   ret void
   3324 }
   3325 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
   3326 
   3327 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
   3328 ; CHECK-LABEL: movnt_ps:
   3329 ; CHECK:       ## BB#0:
   3330 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3331 ; CHECK-NEXT:    vmovntps %ymm0, (%eax)
   3332 ; CHECK-NEXT:    vzeroupper
   3333 ; CHECK-NEXT:    retl
   3334   tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
   3335   ret void
   3336 }
   3337 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
   3338 
   3339 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
   3340   ; add operation forces the execution domain.
   3341 ; CHECK-LABEL: movnt_pd:
   3342 ; CHECK:       ## BB#0:
   3343 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3344 ; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
   3345 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
   3346 ; CHECK-NEXT:    vmovntpd %ymm0, (%eax)
   3347 ; CHECK-NEXT:    vzeroupper
   3348 ; CHECK-NEXT:    retl
   3349   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
   3350   tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
   3351   ret void
   3352 }
   3353 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
   3354 
   3355 
   3356 ; Check for pclmulqdq
   3357 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
   3358 ; CHECK-LABEL: test_x86_pclmulqdq:
   3359 ; CHECK:       ## BB#0:
   3360 ; CHECK-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0
   3361 ; CHECK-NEXT:    retl
   3362   %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
   3363   ret <2 x i64> %res
   3364 }
   3365 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
   3366