Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s
      2 
      3 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
      4 ; CHECK-LABEL: test_x86_aesni_aesdec:
      5 ; CHECK:       # BB#0:
      6 ; CHECK-NEXT:    vaesdec %xmm1, %xmm0, %xmm0
      7 ; CHECK-NEXT:    retl
      8   %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
      9   ret <2 x i64> %res
     10 }
     11 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
     12 
     13 
     14 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
     15 ; CHECK-LABEL: test_x86_aesni_aesdeclast:
     16 ; CHECK:       # BB#0:
     17 ; CHECK-NEXT:    vaesdeclast %xmm1, %xmm0, %xmm0
     18 ; CHECK-NEXT:    retl
     19   %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     20   ret <2 x i64> %res
     21 }
     22 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
     23 
     24 
     25 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
     26 ; CHECK-LABEL: test_x86_aesni_aesenc:
     27 ; CHECK:       # BB#0:
     28 ; CHECK-NEXT:    vaesenc %xmm1, %xmm0, %xmm0
     29 ; CHECK-NEXT:    retl
     30   %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     31   ret <2 x i64> %res
     32 }
     33 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
     34 
     35 
     36 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
     37 ; CHECK-LABEL: test_x86_aesni_aesenclast:
     38 ; CHECK:       # BB#0:
     39 ; CHECK-NEXT:    vaesenclast %xmm1, %xmm0, %xmm0
     40 ; CHECK-NEXT:    retl
     41   %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
     42   ret <2 x i64> %res
     43 }
     44 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
     45 
     46 
     47 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
     48 ; CHECK-LABEL: test_x86_aesni_aesimc:
     49 ; CHECK:       # BB#0:
     50 ; CHECK-NEXT:    vaesimc %xmm0, %xmm0
     51 ; CHECK-NEXT:    retl
     52   %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
     53   ret <2 x i64> %res
     54 }
     55 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
     56 
     57 
     58 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
     59 ; CHECK-LABEL: test_x86_aesni_aeskeygenassist:
     60 ; CHECK:       # BB#0:
     61 ; CHECK-NEXT:    vaeskeygenassist $7, %xmm0, %xmm0
     62 ; CHECK-NEXT:    retl
     63   %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
     64   ret <2 x i64> %res
     65 }
     66 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
     67 
     68 
     69 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
     70 ; CHECK-LABEL: test_x86_sse2_add_sd:
     71 ; CHECK:       # BB#0:
     72 ; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
     73 ; CHECK-NEXT:    retl
     74   %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
     75   ret <2 x double> %res
     76 }
     77 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
     78 
     79 
     80 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
     81 ; CHECK-LABEL: test_x86_sse2_cmp_pd:
     82 ; CHECK:       # BB#0:
     83 ; CHECK-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0
     84 ; CHECK-NEXT:    retl
     85   %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     86   ret <2 x double> %res
     87 }
     88 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
     89 
     90 
     91 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
     92 ; CHECK-LABEL: test_x86_sse2_cmp_sd:
     93 ; CHECK:       # BB#0:
     94 ; CHECK-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0
     95 ; CHECK-NEXT:    retl
     96   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     97   ret <2 x double> %res
     98 }
     99 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
    100 
    101 
    102 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
    103 ; CHECK-LABEL: test_x86_sse2_comieq_sd:
    104 ; CHECK:       # BB#0:
    105 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    106 ; CHECK-NEXT:    sete %al
    107 ; CHECK-NEXT:    movzbl %al, %eax
    108 ; CHECK-NEXT:    retl
    109   %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    110   ret i32 %res
    111 }
    112 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
    113 
    114 
    115 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
    116 ; CHECK-LABEL: test_x86_sse2_comige_sd:
    117 ; CHECK:       # BB#0:
    118 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    119 ; CHECK-NEXT:    setae %al
    120 ; CHECK-NEXT:    movzbl %al, %eax
    121 ; CHECK-NEXT:    retl
    122   %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    123   ret i32 %res
    124 }
    125 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
    126 
    127 
    128 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
    129 ; CHECK-LABEL: test_x86_sse2_comigt_sd:
    130 ; CHECK:       # BB#0:
    131 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    132 ; CHECK-NEXT:    seta %al
    133 ; CHECK-NEXT:    movzbl %al, %eax
    134 ; CHECK-NEXT:    retl
    135   %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    136   ret i32 %res
    137 }
    138 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
    139 
    140 
    141 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
    142 ; CHECK-LABEL: test_x86_sse2_comile_sd:
    143 ; CHECK:       # BB#0:
    144 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    145 ; CHECK-NEXT:    setbe %al
    146 ; CHECK-NEXT:    movzbl %al, %eax
    147 ; CHECK-NEXT:    retl
    148   %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    149   ret i32 %res
    150 }
    151 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
    152 
    153 
    154 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
    155 ; CHECK-LABEL: test_x86_sse2_comilt_sd:
    156 ; CHECK:       # BB#0:
    157 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    158 ; CHECK-NEXT:    sbbl %eax, %eax
    159 ; CHECK-NEXT:    andl $1, %eax
    160 ; CHECK-NEXT:    retl
    161   %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    162   ret i32 %res
    163 }
    164 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
    165 
    166 
    167 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
    168 ; CHECK-LABEL: test_x86_sse2_comineq_sd:
    169 ; CHECK:       # BB#0:
    170 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
    171 ; CHECK-NEXT:    setne %al
    172 ; CHECK-NEXT:    movzbl %al, %eax
    173 ; CHECK-NEXT:    retl
    174   %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    175   ret i32 %res
    176 }
    177 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
    178 
    179 
    180 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
    181 ; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
    182 ; CHECK:       # BB#0:
    183 ; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0
    184 ; CHECK-NEXT:    retl
    185   %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
    186   ret <2 x double> %res
    187 }
    188 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
    189 
    190 
    191 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
    192 ; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
    193 ; CHECK:       # BB#0:
    194 ; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
    195 ; CHECK-NEXT:    retl
    196   %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
    197   ret <4 x float> %res
    198 }
    199 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
    200 
    201 
    202 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
    203 ; CHECK-LABEL: test_x86_sse2_cvtpd2dq:
    204 ; CHECK:       # BB#0:
    205 ; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm0
    206 ; CHECK-NEXT:    retl
    207   %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    208   ret <4 x i32> %res
    209 }
    210 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
    211 
    212 
    213 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
    214 ; CHECK-LABEL: test_x86_sse2_cvtpd2ps:
    215 ; CHECK:       # BB#0:
    216 ; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0
    217 ; CHECK-NEXT:    retl
    218   %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
    219   ret <4 x float> %res
    220 }
    221 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
    222 
    223 
    224 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
    225 ; CHECK-LABEL: test_x86_sse2_cvtps2dq:
    226 ; CHECK:       # BB#0:
    227 ; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm0
    228 ; CHECK-NEXT:    retl
    229   %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    230   ret <4 x i32> %res
    231 }
    232 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
    233 
    234 
    235 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
    236 ; CHECK-LABEL: test_x86_sse2_cvtps2pd:
    237 ; CHECK:       # BB#0:
    238 ; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm0
    239 ; CHECK-NEXT:    retl
    240   %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
    241   ret <2 x double> %res
    242 }
    243 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
    244 
    245 
    246 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
    247 ; CHECK-LABEL: test_x86_sse2_cvtsd2si:
    248 ; CHECK:       # BB#0:
    249 ; CHECK-NEXT:    vcvtsd2si %xmm0, %eax
    250 ; CHECK-NEXT:    retl
    251   %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    252   ret i32 %res
    253 }
    254 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
    255 
    256 
    257 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
    258 ; CHECK-LABEL: test_x86_sse2_cvtsd2ss:
    259 ; CHECK:       # BB#0:
    260 ; CHECK-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0
    261 ; CHECK-NEXT:    retl
    262   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
    263   ret <4 x float> %res
    264 }
    265 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
    266 
    267 
    268 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
    269 ; CHECK-LABEL: test_x86_sse2_cvtsi2sd:
    270 ; CHECK:       # BB#0:
    271 ; CHECK-NEXT:    movl $7, %eax
    272 ; CHECK-NEXT:    vcvtsi2sdl %eax, %xmm0, %xmm0
    273 ; CHECK-NEXT:    retl
    274   %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
    275   ret <2 x double> %res
    276 }
    277 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
    278 
    279 
    280 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
    281 ; CHECK-LABEL: test_x86_sse2_cvtss2sd:
    282 ; CHECK:       # BB#0:
    283 ; CHECK-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
    284 ; CHECK-NEXT:    retl
    285   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
    286   ret <2 x double> %res
    287 }
    288 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
    289 
    290 
    291 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
    292 ; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
    293 ; CHECK:       # BB#0:
    294 ; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm0
    295 ; CHECK-NEXT:    retl
    296   %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
    297   ret <4 x i32> %res
    298 }
    299 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
    300 
    301 
    302 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
    303 ; CHECK-LABEL: test_x86_sse2_cvttps2dq:
    304 ; CHECK:       # BB#0:
    305 ; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
    306 ; CHECK-NEXT:    retl
    307   %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
    308   ret <4 x i32> %res
    309 }
    310 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
    311 
    312 
    313 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
    314 ; CHECK-LABEL: test_x86_sse2_cvttsd2si:
    315 ; CHECK:       # BB#0:
    316 ; CHECK-NEXT:    vcvttsd2si %xmm0, %eax
    317 ; CHECK-NEXT:    retl
    318   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
    319   ret i32 %res
    320 }
    321 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
    322 
    323 
    324 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
    325 ; CHECK-LABEL: test_x86_sse2_div_sd:
    326 ; CHECK:       # BB#0:
    327 ; CHECK-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    328 ; CHECK-NEXT:    retl
    329   %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    330   ret <2 x double> %res
    331 }
    332 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
    333 
    334 
    335 
    336 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
    337 ; CHECK-LABEL: test_x86_sse2_max_pd:
    338 ; CHECK:       # BB#0:
    339 ; CHECK-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
    340 ; CHECK-NEXT:    retl
    341   %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    342   ret <2 x double> %res
    343 }
    344 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
    345 
    346 
    347 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
    348 ; CHECK-LABEL: test_x86_sse2_max_sd:
    349 ; CHECK:       # BB#0:
    350 ; CHECK-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
    351 ; CHECK-NEXT:    retl
    352   %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    353   ret <2 x double> %res
    354 }
    355 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
    356 
    357 
    358 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
    359 ; CHECK-LABEL: test_x86_sse2_min_pd:
    360 ; CHECK:       # BB#0:
    361 ; CHECK-NEXT:    vminpd %xmm1, %xmm0, %xmm0
    362 ; CHECK-NEXT:    retl
    363   %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    364   ret <2 x double> %res
    365 }
    366 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
    367 
    368 
    369 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
    370 ; CHECK-LABEL: test_x86_sse2_min_sd:
    371 ; CHECK:       # BB#0:
    372 ; CHECK-NEXT:    vminsd %xmm1, %xmm0, %xmm0
    373 ; CHECK-NEXT:    retl
    374   %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    375   ret <2 x double> %res
    376 }
    377 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
    378 
    379 
    380 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
    381 ; CHECK-LABEL: test_x86_sse2_movmsk_pd:
    382 ; CHECK:       # BB#0:
    383 ; CHECK-NEXT:    vmovmskpd %xmm0, %eax
    384 ; CHECK-NEXT:    retl
    385   %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
    386   ret i32 %res
    387 }
    388 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
    389 
    390 
    391 
    392 
    393 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
    394 ; CHECK-LABEL: test_x86_sse2_mul_sd:
    395 ; CHECK:       # BB#0:
    396 ; CHECK-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
    397 ; CHECK-NEXT:    retl
    398   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    399   ret <2 x double> %res
    400 }
    401 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
    402 
    403 
    404 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
    405 ; CHECK-LABEL: test_x86_sse2_packssdw_128:
    406 ; CHECK:       # BB#0:
    407 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    408 ; CHECK-NEXT:    retl
    409   %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
    410   ret <8 x i16> %res
    411 }
    412 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
    413 
    414 
    415 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    416 ; CHECK-LABEL: test_x86_sse2_packsswb_128:
    417 ; CHECK:       # BB#0:
    418 ; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
    419 ; CHECK-NEXT:    retl
    420   %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    421   ret <16 x i8> %res
    422 }
    423 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    424 
    425 
    426 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
    427 ; CHECK-LABEL: test_x86_sse2_packuswb_128:
    428 ; CHECK:       # BB#0:
    429 ; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
    430 ; CHECK-NEXT:    retl
    431   %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
    432   ret <16 x i8> %res
    433 }
    434 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
    435 
    436 
    437 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
    438 ; CHECK-LABEL: test_x86_sse2_padds_b:
    439 ; CHECK:       # BB#0:
    440 ; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0
    441 ; CHECK-NEXT:    retl
    442   %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    443   ret <16 x i8> %res
    444 }
    445 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
    446 
    447 
    448 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
    449 ; CHECK-LABEL: test_x86_sse2_padds_w:
    450 ; CHECK:       # BB#0:
    451 ; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
    452 ; CHECK-NEXT:    retl
    453   %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    454   ret <8 x i16> %res
    455 }
    456 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
    457 
    458 
    459 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
    460 ; CHECK-LABEL: test_x86_sse2_paddus_b:
    461 ; CHECK:       # BB#0:
    462 ; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
    463 ; CHECK-NEXT:    retl
    464   %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    465   ret <16 x i8> %res
    466 }
    467 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
    468 
    469 
    470 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
    471 ; CHECK-LABEL: test_x86_sse2_paddus_w:
    472 ; CHECK:       # BB#0:
    473 ; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
    474 ; CHECK-NEXT:    retl
    475   %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    476   ret <8 x i16> %res
    477 }
    478 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
    479 
    480 
    481 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
    482 ; CHECK-LABEL: test_x86_sse2_pavg_b:
    483 ; CHECK:       # BB#0:
    484 ; CHECK-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
    485 ; CHECK-NEXT:    retl
    486   %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    487   ret <16 x i8> %res
    488 }
    489 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
    490 
    491 
    492 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
    493 ; CHECK-LABEL: test_x86_sse2_pavg_w:
    494 ; CHECK:       # BB#0:
    495 ; CHECK-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
    496 ; CHECK-NEXT:    retl
    497   %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    498   ret <8 x i16> %res
    499 }
    500 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
    501 
    502 
    503 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
    504 ; CHECK-LABEL: test_x86_sse2_pmadd_wd:
    505 ; CHECK:       # BB#0:
    506 ; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
    507 ; CHECK-NEXT:    retl
    508   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
    509   ret <4 x i32> %res
    510 }
    511 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
    512 
    513 
    514 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
    515 ; CHECK-LABEL: test_x86_sse2_pmaxs_w:
    516 ; CHECK:       # BB#0:
    517 ; CHECK-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
    518 ; CHECK-NEXT:    retl
    519   %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    520   ret <8 x i16> %res
    521 }
    522 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
    523 
    524 
    525 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
    526 ; CHECK-LABEL: test_x86_sse2_pmaxu_b:
    527 ; CHECK:       # BB#0:
    528 ; CHECK-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
    529 ; CHECK-NEXT:    retl
    530   %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    531   ret <16 x i8> %res
    532 }
    533 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
    534 
    535 
    536 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
    537 ; CHECK-LABEL: test_x86_sse2_pmins_w:
    538 ; CHECK:       # BB#0:
    539 ; CHECK-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
    540 ; CHECK-NEXT:    retl
    541   %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    542   ret <8 x i16> %res
    543 }
    544 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
    545 
    546 
    547 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
    548 ; CHECK-LABEL: test_x86_sse2_pminu_b:
    549 ; CHECK:       # BB#0:
    550 ; CHECK-NEXT:    vpminub %xmm1, %xmm0, %xmm0
    551 ; CHECK-NEXT:    retl
    552   %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    553   ret <16 x i8> %res
    554 }
    555 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
    556 
    557 
    558 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
    559 ; CHECK-LABEL: test_x86_sse2_pmovmskb_128:
    560 ; CHECK:       # BB#0:
    561 ; CHECK-NEXT:    vpmovmskb %xmm0, %eax
    562 ; CHECK-NEXT:    retl
    563   %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
    564   ret i32 %res
    565 }
    566 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
    567 
    568 
    569 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
    570 ; CHECK-LABEL: test_x86_sse2_pmulh_w:
    571 ; CHECK:       # BB#0:
    572 ; CHECK-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
    573 ; CHECK-NEXT:    retl
    574   %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    575   ret <8 x i16> %res
    576 }
    577 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
    578 
    579 
    580 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
    581 ; CHECK-LABEL: test_x86_sse2_pmulhu_w:
    582 ; CHECK:       # BB#0:
    583 ; CHECK-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
    584 ; CHECK-NEXT:    retl
    585   %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    586   ret <8 x i16> %res
    587 }
    588 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
    589 
    590 
    591 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
    592 ; CHECK-LABEL: test_x86_sse2_pmulu_dq:
    593 ; CHECK:       # BB#0:
    594 ; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
    595 ; CHECK-NEXT:    retl
    596   %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    597   ret <2 x i64> %res
    598 }
    599 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
    600 
    601 
    602 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
    603 ; CHECK-LABEL: test_x86_sse2_psad_bw:
    604 ; CHECK:       # BB#0:
    605 ; CHECK-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
    606 ; CHECK-NEXT:    retl
    607   %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
    608   ret <2 x i64> %res
    609 }
    610 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
    611 
    612 
    613 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
    614 ; CHECK-LABEL: test_x86_sse2_psll_d:
    615 ; CHECK:       # BB#0:
    616 ; CHECK-NEXT:    vpslld %xmm1, %xmm0, %xmm0
    617 ; CHECK-NEXT:    retl
    618   %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    619   ret <4 x i32> %res
    620 }
    621 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
    622 
    623 
    624 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
    625 ; CHECK-LABEL: test_x86_sse2_psll_q:
    626 ; CHECK:       # BB#0:
    627 ; CHECK-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
    628 ; CHECK-NEXT:    retl
    629   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    630   ret <2 x i64> %res
    631 }
    632 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
    633 
    634 
    635 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
    636 ; CHECK-LABEL: test_x86_sse2_psll_w:
    637 ; CHECK:       # BB#0:
    638 ; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
    639 ; CHECK-NEXT:    retl
    640   %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    641   ret <8 x i16> %res
    642 }
    643 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
    644 
    645 
    646 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
    647 ; CHECK-LABEL: test_x86_sse2_pslli_d:
    648 ; CHECK:       # BB#0:
    649 ; CHECK-NEXT:    vpslld $7, %xmm0, %xmm0
    650 ; CHECK-NEXT:    retl
    651   %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    652   ret <4 x i32> %res
    653 }
    654 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
    655 
    656 
    657 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
    658 ; CHECK-LABEL: test_x86_sse2_pslli_q:
    659 ; CHECK:       # BB#0:
    660 ; CHECK-NEXT:    vpsllq $7, %xmm0, %xmm0
    661 ; CHECK-NEXT:    retl
    662   %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    663   ret <2 x i64> %res
    664 }
    665 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
    666 
    667 
    668 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
    669 ; CHECK-LABEL: test_x86_sse2_pslli_w:
    670 ; CHECK:       # BB#0:
    671 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
    672 ; CHECK-NEXT:    retl
    673   %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    674   ret <8 x i16> %res
    675 }
    676 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
    677 
    678 
    679 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
    680 ; CHECK-LABEL: test_x86_sse2_psra_d:
    681 ; CHECK:       # BB#0:
    682 ; CHECK-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
    683 ; CHECK-NEXT:    retl
    684   %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    685   ret <4 x i32> %res
    686 }
    687 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
    688 
    689 
    690 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
    691 ; CHECK-LABEL: test_x86_sse2_psra_w:
    692 ; CHECK:       # BB#0:
    693 ; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
    694 ; CHECK-NEXT:    retl
    695   %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    696   ret <8 x i16> %res
    697 }
    698 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
    699 
    700 
    701 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
    702 ; CHECK-LABEL: test_x86_sse2_psrai_d:
    703 ; CHECK:       # BB#0:
    704 ; CHECK-NEXT:    vpsrad $7, %xmm0, %xmm0
    705 ; CHECK-NEXT:    retl
    706   %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    707   ret <4 x i32> %res
    708 }
    709 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
    710 
    711 
    712 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
    713 ; CHECK-LABEL: test_x86_sse2_psrai_w:
    714 ; CHECK:       # BB#0:
    715 ; CHECK-NEXT:    vpsraw $7, %xmm0, %xmm0
    716 ; CHECK-NEXT:    retl
    717   %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    718   ret <8 x i16> %res
    719 }
    720 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
    721 
    722 
    723 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
    724 ; CHECK-LABEL: test_x86_sse2_psrl_d:
    725 ; CHECK:       # BB#0:
    726 ; CHECK-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
    727 ; CHECK-NEXT:    retl
    728   %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    729   ret <4 x i32> %res
    730 }
    731 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
    732 
    733 
    734 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
    735 ; CHECK-LABEL: test_x86_sse2_psrl_q:
    736 ; CHECK:       # BB#0:
    737 ; CHECK-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
    738 ; CHECK-NEXT:    retl
    739   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
    740   ret <2 x i64> %res
    741 }
    742 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
    743 
    744 
    745 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
    746 ; CHECK-LABEL: test_x86_sse2_psrl_w:
    747 ; CHECK:       # BB#0:
    748 ; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
    749 ; CHECK-NEXT:    retl
    750   %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    751   ret <8 x i16> %res
    752 }
    753 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
    754 
    755 
    756 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
    757 ; CHECK-LABEL: test_x86_sse2_psrli_d:
    758 ; CHECK:       # BB#0:
    759 ; CHECK-NEXT:    vpsrld $7, %xmm0, %xmm0
    760 ; CHECK-NEXT:    retl
    761   %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
    762   ret <4 x i32> %res
    763 }
    764 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
    765 
    766 
    767 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
    768 ; CHECK-LABEL: test_x86_sse2_psrli_q:
    769 ; CHECK:       # BB#0:
    770 ; CHECK-NEXT:    vpsrlq $7, %xmm0, %xmm0
    771 ; CHECK-NEXT:    retl
    772   %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    773   ret <2 x i64> %res
    774 }
    775 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
    776 
    777 
    778 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
    779 ; CHECK-LABEL: test_x86_sse2_psrli_w:
    780 ; CHECK:       # BB#0:
    781 ; CHECK-NEXT:    vpsrlw $7, %xmm0, %xmm0
    782 ; CHECK-NEXT:    retl
    783   %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
    784   ret <8 x i16> %res
    785 }
    786 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
    787 
    788 
    789 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
    790 ; CHECK-LABEL: test_x86_sse2_psubs_b:
    791 ; CHECK:       # BB#0:
    792 ; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0
    793 ; CHECK-NEXT:    retl
    794   %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    795   ret <16 x i8> %res
    796 }
    797 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
    798 
    799 
    800 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
    801 ; CHECK-LABEL: test_x86_sse2_psubs_w:
    802 ; CHECK:       # BB#0:
    803 ; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
    804 ; CHECK-NEXT:    retl
    805   %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    806   ret <8 x i16> %res
    807 }
    808 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
    809 
    810 
    811 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
    812 ; CHECK-LABEL: test_x86_sse2_psubus_b:
    813 ; CHECK:       # BB#0:
    814 ; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
    815 ; CHECK-NEXT:    retl
    816   %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    817   ret <16 x i8> %res
    818 }
    819 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
    820 
    821 
    822 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
    823 ; CHECK-LABEL: test_x86_sse2_psubus_w:
    824 ; CHECK:       # BB#0:
    825 ; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
    826 ; CHECK-NEXT:    retl
    827   %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    828   ret <8 x i16> %res
    829 }
    830 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
    831 
    832 
    833 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
    834 ; CHECK-LABEL: test_x86_sse2_sqrt_pd:
    835 ; CHECK:       # BB#0:
    836 ; CHECK-NEXT:    vsqrtpd %xmm0, %xmm0
    837 ; CHECK-NEXT:    retl
    838   %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    839   ret <2 x double> %res
    840 }
    841 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
    842 
    843 
    844 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
    845 ; CHECK-LABEL: test_x86_sse2_sqrt_sd:
    846 ; CHECK:       # BB#0:
    847 ; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
    848 ; CHECK-NEXT:    retl
    849   %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
    850   ret <2 x double> %res
    851 }
    852 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
    853 
    854 
    855 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
    856 ; CHECK-LABEL: test_x86_sse2_storel_dq:
    857 ; CHECK:       # BB#0:
    858 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    859 ; CHECK-NEXT:    vmovq %xmm0, (%eax)
    860 ; CHECK-NEXT:    retl
    861   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
    862   ret void
    863 }
    864 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
    865 
    866 
    867 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
    868   ; add operation forces the execution domain.
    869 ; CHECK-LABEL: test_x86_sse2_storeu_dq:
    870 ; CHECK:       # BB#0:
    871 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    872 ; CHECK-NEXT:    vpaddb LCPI77_0, %xmm0, %xmm0
    873 ; CHECK-NEXT:    vmovdqu %xmm0, (%eax)
    874 ; CHECK-NEXT:    retl
    875   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    876   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
    877   ret void
    878 }
    879 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
    880 
    881 
    882 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
    883   ; fadd operation forces the execution domain.
    884 ; CHECK-LABEL: test_x86_sse2_storeu_pd:
    885 ; CHECK:       # BB#0:
    886 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
    887 ; CHECK-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    888 ; CHECK-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
    889 ; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    890 ; CHECK-NEXT:    vmovupd %xmm0, (%eax)
    891 ; CHECK-NEXT:    retl
    892   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
    893   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
    894   ret void
    895 }
    896 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
    897 
    898 
    899 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
    900 ; CHECK-LABEL: test_x86_sse2_sub_sd:
    901 ; CHECK:       # BB#0:
    902 ; CHECK-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
    903 ; CHECK-NEXT:    retl
    904   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    905   ret <2 x double> %res
    906 }
    907 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
    908 
    909 
    910 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
    911 ; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
    912 ; CHECK:       # BB#0:
    913 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    914 ; CHECK-NEXT:    sete %al
    915 ; CHECK-NEXT:    movzbl %al, %eax
    916 ; CHECK-NEXT:    retl
    917   %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    918   ret i32 %res
    919 }
    920 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
    921 
    922 
    923 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
    924 ; CHECK-LABEL: test_x86_sse2_ucomige_sd:
    925 ; CHECK:       # BB#0:
    926 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    927 ; CHECK-NEXT:    setae %al
    928 ; CHECK-NEXT:    movzbl %al, %eax
    929 ; CHECK-NEXT:    retl
    930   %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    931   ret i32 %res
    932 }
    933 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
    934 
    935 
    936 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
    937 ; CHECK-LABEL: test_x86_sse2_ucomigt_sd:
    938 ; CHECK:       # BB#0:
    939 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    940 ; CHECK-NEXT:    seta %al
    941 ; CHECK-NEXT:    movzbl %al, %eax
    942 ; CHECK-NEXT:    retl
    943   %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    944   ret i32 %res
    945 }
    946 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
    947 
    948 
    949 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
    950 ; CHECK-LABEL: test_x86_sse2_ucomile_sd:
    951 ; CHECK:       # BB#0:
    952 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    953 ; CHECK-NEXT:    setbe %al
    954 ; CHECK-NEXT:    movzbl %al, %eax
    955 ; CHECK-NEXT:    retl
    956   %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    957   ret i32 %res
    958 }
    959 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
    960 
    961 
    962 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
    963 ; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
    964 ; CHECK:       # BB#0:
    965 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    966 ; CHECK-NEXT:    sbbl %eax, %eax
    967 ; CHECK-NEXT:    andl $1, %eax
    968 ; CHECK-NEXT:    retl
    969   %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    970   ret i32 %res
    971 }
    972 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
    973 
    974 
    975 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
    976 ; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
    977 ; CHECK:       # BB#0:
    978 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
    979 ; CHECK-NEXT:    setne %al
    980 ; CHECK-NEXT:    movzbl %al, %eax
    981 ; CHECK-NEXT:    retl
    982   %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
    983   ret i32 %res
    984 }
    985 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
    986 
    987 
    988 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
    989 ; CHECK-LABEL: test_x86_sse3_addsub_pd:
    990 ; CHECK:       # BB#0:
    991 ; CHECK-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
    992 ; CHECK-NEXT:    retl
    993   %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    994   ret <2 x double> %res
    995 }
    996 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
    997 
    998 
    999 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
   1000 ; CHECK-LABEL: test_x86_sse3_addsub_ps:
   1001 ; CHECK:       # BB#0:
   1002 ; CHECK-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
   1003 ; CHECK-NEXT:    retl
   1004   %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1005   ret <4 x float> %res
   1006 }
   1007 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
   1008 
   1009 
   1010 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
   1011 ; CHECK-LABEL: test_x86_sse3_hadd_pd:
   1012 ; CHECK:       # BB#0:
   1013 ; CHECK-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
   1014 ; CHECK-NEXT:    retl
   1015   %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   1016   ret <2 x double> %res
   1017 }
   1018 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
   1019 
   1020 
   1021 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
   1022 ; CHECK-LABEL: test_x86_sse3_hadd_ps:
   1023 ; CHECK:       # BB#0:
   1024 ; CHECK-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
   1025 ; CHECK-NEXT:    retl
   1026   %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1027   ret <4 x float> %res
   1028 }
   1029 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
   1030 
   1031 
   1032 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
   1033 ; CHECK-LABEL: test_x86_sse3_hsub_pd:
   1034 ; CHECK:       # BB#0:
   1035 ; CHECK-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
   1036 ; CHECK-NEXT:    retl
   1037   %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   1038   ret <2 x double> %res
   1039 }
   1040 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
   1041 
   1042 
   1043 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
   1044 ; CHECK-LABEL: test_x86_sse3_hsub_ps:
   1045 ; CHECK:       # BB#0:
   1046 ; CHECK-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
   1047 ; CHECK-NEXT:    retl
   1048   %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1049   ret <4 x float> %res
   1050 }
   1051 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
   1052 
   1053 
   1054 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
   1055 ; CHECK-LABEL: test_x86_sse3_ldu_dq:
   1056 ; CHECK:       # BB#0:
   1057 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1058 ; CHECK-NEXT:    vlddqu (%eax), %xmm0
   1059 ; CHECK-NEXT:    retl
   1060   %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
   1061   ret <16 x i8> %res
   1062 }
   1063 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
   1064 
   1065 
   1066 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
   1067 ; CHECK-LABEL: test_x86_sse41_blendvpd:
   1068 ; CHECK:       # BB#0:
   1069 ; CHECK-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
   1070 ; CHECK-NEXT:    retl
   1071   %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
   1072   ret <2 x double> %res
   1073 }
   1074 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
   1075 
   1076 
   1077 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   1078 ; CHECK-LABEL: test_x86_sse41_blendvps:
   1079 ; CHECK:       # BB#0:
   1080 ; CHECK-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
   1081 ; CHECK-NEXT:    retl
   1082   %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
   1083   ret <4 x float> %res
   1084 }
   1085 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
   1086 
   1087 
   1088 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
   1089 ; CHECK-LABEL: test_x86_sse41_dppd:
   1090 ; CHECK:       # BB#0:
   1091 ; CHECK-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0
   1092 ; CHECK-NEXT:    retl
   1093   %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
   1094   ret <2 x double> %res
   1095 }
   1096 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
   1097 
   1098 
   1099 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
   1100 ; CHECK-LABEL: test_x86_sse41_dpps:
   1101 ; CHECK:       # BB#0:
   1102 ; CHECK-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0
   1103 ; CHECK-NEXT:    retl
   1104   %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1105   ret <4 x float> %res
   1106 }
   1107 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
   1108 
   1109 
   1110 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
   1111 ; CHECK-LABEL: test_x86_sse41_insertps:
   1112 ; CHECK:       # BB#0:
   1113 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
   1114 ; CHECK-NEXT:    retl
   1115   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1116   ret <4 x float> %res
   1117 }
   1118 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
   1119 
   1120 
   1121 
   1122 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   1123 ; CHECK-LABEL: test_x86_sse41_mpsadbw:
   1124 ; CHECK:       # BB#0:
   1125 ; CHECK-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0
   1126 ; CHECK-NEXT:    retl
   1127   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
   1128   ret <8 x i16> %res
   1129 }
   1130 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1131 
   1132 
   1133 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
   1134 ; CHECK-LABEL: test_x86_sse41_packusdw:
   1135 ; CHECK:       # BB#0:
   1136 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
   1137 ; CHECK-NEXT:    retl
   1138   %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
   1139   ret <8 x i16> %res
   1140 }
   1141 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
   1142 
   1143 
   1144 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
   1145 ; CHECK-LABEL: test_x86_sse41_pblendvb:
   1146 ; CHECK:       # BB#0:
   1147 ; CHECK-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
   1148 ; CHECK-NEXT:    retl
   1149   %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
   1150   ret <16 x i8> %res
   1151 }
   1152 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
   1153 
   1154 
   1155 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
   1156 ; CHECK-LABEL: test_x86_sse41_phminposuw:
   1157 ; CHECK:       # BB#0:
   1158 ; CHECK-NEXT:    vphminposuw %xmm0, %xmm0
   1159 ; CHECK-NEXT:    retl
   1160   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
   1161   ret <8 x i16> %res
   1162 }
   1163 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
   1164 
   1165 
   1166 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
   1167 ; CHECK-LABEL: test_x86_sse41_pmaxsb:
   1168 ; CHECK:       # BB#0:
   1169 ; CHECK-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
   1170 ; CHECK-NEXT:    retl
   1171   %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1172   ret <16 x i8> %res
   1173 }
   1174 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
   1175 
   1176 
   1177 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
   1178 ; CHECK-LABEL: test_x86_sse41_pmaxsd:
   1179 ; CHECK:       # BB#0:
   1180 ; CHECK-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
   1181 ; CHECK-NEXT:    retl
   1182   %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1183   ret <4 x i32> %res
   1184 }
   1185 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
   1186 
   1187 
   1188 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
   1189 ; CHECK-LABEL: test_x86_sse41_pmaxud:
   1190 ; CHECK:       # BB#0:
   1191 ; CHECK-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
   1192 ; CHECK-NEXT:    retl
   1193   %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1194   ret <4 x i32> %res
   1195 }
   1196 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
   1197 
   1198 
   1199 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
   1200 ; CHECK-LABEL: test_x86_sse41_pmaxuw:
   1201 ; CHECK:       # BB#0:
   1202 ; CHECK-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
   1203 ; CHECK-NEXT:    retl
   1204   %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1205   ret <8 x i16> %res
   1206 }
   1207 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
   1208 
   1209 
   1210 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
   1211 ; CHECK-LABEL: test_x86_sse41_pminsb:
   1212 ; CHECK:       # BB#0:
   1213 ; CHECK-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
   1214 ; CHECK-NEXT:    retl
   1215   %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1216   ret <16 x i8> %res
   1217 }
   1218 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
   1219 
   1220 
   1221 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
   1222 ; CHECK-LABEL: test_x86_sse41_pminsd:
   1223 ; CHECK:       # BB#0:
   1224 ; CHECK-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
   1225 ; CHECK-NEXT:    retl
   1226   %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1227   ret <4 x i32> %res
   1228 }
   1229 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
   1230 
   1231 
   1232 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
   1233 ; CHECK-LABEL: test_x86_sse41_pminud:
   1234 ; CHECK:       # BB#0:
   1235 ; CHECK-NEXT:    vpminud %xmm1, %xmm0, %xmm0
   1236 ; CHECK-NEXT:    retl
   1237   %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1238   ret <4 x i32> %res
   1239 }
   1240 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
   1241 
   1242 
   1243 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
   1244 ; CHECK-LABEL: test_x86_sse41_pminuw:
   1245 ; CHECK:       # BB#0:
   1246 ; CHECK-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
   1247 ; CHECK-NEXT:    retl
   1248   %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   1249   ret <8 x i16> %res
   1250 }
   1251 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
   1252 
   1253 
   1254 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
   1255 ; CHECK-LABEL: test_x86_sse41_pmovsxbd:
   1256 ; CHECK:       # BB#0:
   1257 ; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0
   1258 ; CHECK-NEXT:    retl
   1259   %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
   1260   ret <4 x i32> %res
   1261 }
   1262 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
   1263 
   1264 
   1265 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
   1266 ; CHECK-LABEL: test_x86_sse41_pmovsxbq:
   1267 ; CHECK:       # BB#0:
   1268 ; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
   1269 ; CHECK-NEXT:    retl
   1270   %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
   1271   ret <2 x i64> %res
   1272 }
   1273 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
   1274 
   1275 
   1276 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
   1277 ; CHECK-LABEL: test_x86_sse41_pmovsxbw:
   1278 ; CHECK:       # BB#0:
   1279 ; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
   1280 ; CHECK-NEXT:    retl
   1281   %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
   1282   ret <8 x i16> %res
   1283 }
   1284 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
   1285 
   1286 
   1287 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
   1288 ; CHECK-LABEL: test_x86_sse41_pmovsxdq:
   1289 ; CHECK:       # BB#0:
   1290 ; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
   1291 ; CHECK-NEXT:    retl
   1292   %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
   1293   ret <2 x i64> %res
   1294 }
   1295 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
   1296 
   1297 
   1298 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
   1299 ; CHECK-LABEL: test_x86_sse41_pmovsxwd:
   1300 ; CHECK:       # BB#0:
   1301 ; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
   1302 ; CHECK-NEXT:    retl
   1303   %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
   1304   ret <4 x i32> %res
   1305 }
   1306 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
   1307 
   1308 
   1309 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
   1310 ; CHECK-LABEL: test_x86_sse41_pmovsxwq:
   1311 ; CHECK:       # BB#0:
   1312 ; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0
   1313 ; CHECK-NEXT:    retl
   1314   %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
   1315   ret <2 x i64> %res
   1316 }
   1317 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
   1318 
   1319 
   1320 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
   1321 ; CHECK-LABEL: test_x86_sse41_pmovzxbd:
   1322 ; CHECK:       # BB#0:
   1323 ; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   1324 ; CHECK-NEXT:    retl
   1325   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
   1326   ret <4 x i32> %res
   1327 }
   1328 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
   1329 
   1330 
   1331 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
   1332 ; CHECK-LABEL: test_x86_sse41_pmovzxbq:
   1333 ; CHECK:       # BB#0:
   1334 ; CHECK-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   1335 ; CHECK-NEXT:    retl
   1336   %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
   1337   ret <2 x i64> %res
   1338 }
   1339 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
   1340 
   1341 
   1342 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
   1343 ; CHECK-LABEL: test_x86_sse41_pmovzxbw:
   1344 ; CHECK:       # BB#0:
   1345 ; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   1346 ; CHECK-NEXT:    retl
   1347   %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
   1348   ret <8 x i16> %res
   1349 }
   1350 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
   1351 
   1352 
   1353 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
   1354 ; CHECK-LABEL: test_x86_sse41_pmovzxdq:
   1355 ; CHECK:       # BB#0:
   1356 ; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1357 ; CHECK-NEXT:    retl
   1358   %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
   1359   ret <2 x i64> %res
   1360 }
   1361 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
   1362 
   1363 
   1364 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
   1365 ; CHECK-LABEL: test_x86_sse41_pmovzxwd:
   1366 ; CHECK:       # BB#0:
   1367 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1368 ; CHECK-NEXT:    retl
   1369   %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
   1370   ret <4 x i32> %res
   1371 }
   1372 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
   1373 
   1374 
   1375 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
   1376 ; CHECK-LABEL: test_x86_sse41_pmovzxwq:
   1377 ; CHECK:       # BB#0:
   1378 ; CHECK-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   1379 ; CHECK-NEXT:    retl
   1380   %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
   1381   ret <2 x i64> %res
   1382 }
   1383 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
   1384 
   1385 
   1386 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
   1387 ; CHECK-LABEL: test_x86_sse41_pmuldq:
   1388 ; CHECK:       # BB#0:
   1389 ; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0
   1390 ; CHECK-NEXT:    retl
   1391   %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
   1392   ret <2 x i64> %res
   1393 }
   1394 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
   1395 
   1396 
   1397 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
   1398 ; CHECK-LABEL: test_x86_sse41_ptestc:
   1399 ; CHECK:       # BB#0:
   1400 ; CHECK-NEXT:    vptest %xmm1, %xmm0
   1401 ; CHECK-NEXT:    sbbl %eax, %eax
   1402 ; CHECK-NEXT:    andl $1, %eax
   1403 ; CHECK-NEXT:    retl
   1404   %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   1405   ret i32 %res
   1406 }
   1407 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
   1408 
   1409 
   1410 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
   1411 ; CHECK-LABEL: test_x86_sse41_ptestnzc:
   1412 ; CHECK:       # BB#0:
   1413 ; CHECK-NEXT:    vptest %xmm1, %xmm0
   1414 ; CHECK-NEXT:    seta %al
   1415 ; CHECK-NEXT:    movzbl %al, %eax
   1416 ; CHECK-NEXT:    retl
   1417   %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   1418   ret i32 %res
   1419 }
   1420 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
   1421 
   1422 
   1423 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
   1424 ; CHECK-LABEL: test_x86_sse41_ptestz:
   1425 ; CHECK:       # BB#0:
   1426 ; CHECK-NEXT:    vptest %xmm1, %xmm0
   1427 ; CHECK-NEXT:    sete %al
   1428 ; CHECK-NEXT:    movzbl %al, %eax
   1429 ; CHECK-NEXT:    retl
   1430   %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   1431   ret i32 %res
   1432 }
   1433 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
   1434 
   1435 
   1436 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
   1437 ; CHECK-LABEL: test_x86_sse41_round_pd:
   1438 ; CHECK:       # BB#0:
   1439 ; CHECK-NEXT:    vroundpd $7, %xmm0, %xmm0
   1440 ; CHECK-NEXT:    retl
   1441   %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
   1442   ret <2 x double> %res
   1443 }
   1444 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
   1445 
   1446 
   1447 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
   1448 ; CHECK-LABEL: test_x86_sse41_round_ps:
   1449 ; CHECK:       # BB#0:
   1450 ; CHECK-NEXT:    vroundps $7, %xmm0, %xmm0
   1451 ; CHECK-NEXT:    retl
   1452   %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
   1453   ret <4 x float> %res
   1454 }
   1455 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
   1456 
   1457 
   1458 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
   1459 ; CHECK-LABEL: test_x86_sse41_round_sd:
   1460 ; CHECK:       # BB#0:
   1461 ; CHECK-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0
   1462 ; CHECK-NEXT:    retl
   1463   %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
   1464   ret <2 x double> %res
   1465 }
   1466 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
   1467 
   1468 
   1469 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
   1470 ; CHECK-LABEL: test_x86_sse41_round_ss:
   1471 ; CHECK:       # BB#0:
   1472 ; CHECK-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0
   1473 ; CHECK-NEXT:    retl
   1474   %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
   1475   ret <4 x float> %res
   1476 }
   1477 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
   1478 
   1479 
   1480 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
   1481 ; CHECK-LABEL: test_x86_sse42_pcmpestri128:
   1482 ; CHECK:       # BB#0:
   1483 ; CHECK-NEXT:    movl $7, %eax
   1484 ; CHECK-NEXT:    movl $7, %edx
   1485 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1486 ; CHECK-NEXT:    movl %ecx, %eax
   1487 ; CHECK-NEXT:    retl
   1488   %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1489   ret i32 %res
   1490 }
   1491 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1492 
   1493 
   1494 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
   1495 ; CHECK-LABEL: test_x86_sse42_pcmpestri128_load:
   1496 ; CHECK:       # BB#0:
   1497 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   1498 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1499 ; CHECK-NEXT:    vmovdqa (%eax), %xmm0
   1500 ; CHECK-NEXT:    movl $7, %eax
   1501 ; CHECK-NEXT:    movl $7, %edx
   1502 ; CHECK-NEXT:    vpcmpestri $7, (%ecx), %xmm0
   1503 ; CHECK-NEXT:    movl %ecx, %eax
   1504 ; CHECK-NEXT:    retl
   1505   %1 = load <16 x i8>, <16 x i8>* %a0
   1506   %2 = load <16 x i8>, <16 x i8>* %a2
   1507   %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
   1508   ret i32 %res
   1509 }
   1510 
   1511 
   1512 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
   1513 ; CHECK-LABEL: test_x86_sse42_pcmpestria128:
   1514 ; CHECK:       # BB#0:
   1515 ; CHECK-NEXT:    movl $7, %eax
   1516 ; CHECK-NEXT:    movl $7, %edx
   1517 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1518 ; CHECK-NEXT:    seta %al
   1519 ; CHECK-NEXT:    movzbl %al, %eax
   1520 ; CHECK-NEXT:    retl
   1521   %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1522   ret i32 %res
   1523 }
   1524 declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1525 
   1526 
   1527 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
   1528 ; CHECK-LABEL: test_x86_sse42_pcmpestric128:
   1529 ; CHECK:       # BB#0:
   1530 ; CHECK-NEXT:    movl $7, %eax
   1531 ; CHECK-NEXT:    movl $7, %edx
   1532 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1533 ; CHECK-NEXT:    sbbl %eax, %eax
   1534 ; CHECK-NEXT:    andl $1, %eax
   1535 ; CHECK-NEXT:    retl
   1536   %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1537   ret i32 %res
   1538 }
   1539 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1540 
   1541 
   1542 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
   1543 ; CHECK-LABEL: test_x86_sse42_pcmpestrio128:
   1544 ; CHECK:       # BB#0:
   1545 ; CHECK-NEXT:    movl $7, %eax
   1546 ; CHECK-NEXT:    movl $7, %edx
   1547 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1548 ; CHECK-NEXT:    seto %al
   1549 ; CHECK-NEXT:    movzbl %al, %eax
   1550 ; CHECK-NEXT:    retl
   1551   %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1552   ret i32 %res
   1553 }
   1554 declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1555 
   1556 
   1557 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
   1558 ; CHECK-LABEL: test_x86_sse42_pcmpestris128:
   1559 ; CHECK:       # BB#0:
   1560 ; CHECK-NEXT:    movl $7, %eax
   1561 ; CHECK-NEXT:    movl $7, %edx
   1562 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1563 ; CHECK-NEXT:    sets %al
   1564 ; CHECK-NEXT:    movzbl %al, %eax
   1565 ; CHECK-NEXT:    retl
   1566   %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1567   ret i32 %res
   1568 }
   1569 declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1570 
   1571 
   1572 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
   1573 ; CHECK-LABEL: test_x86_sse42_pcmpestriz128:
   1574 ; CHECK:       # BB#0:
   1575 ; CHECK-NEXT:    movl $7, %eax
   1576 ; CHECK-NEXT:    movl $7, %edx
   1577 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
   1578 ; CHECK-NEXT:    sete %al
   1579 ; CHECK-NEXT:    movzbl %al, %eax
   1580 ; CHECK-NEXT:    retl
   1581   %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   1582   ret i32 %res
   1583 }
   1584 declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1585 
   1586 
   1587 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
   1588 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128:
   1589 ; CHECK:       # BB#0:
   1590 ; CHECK-NEXT:    movl $7, %eax
   1591 ; CHECK-NEXT:    movl $7, %edx
   1592 ; CHECK-NEXT:    vpcmpestrm $7, %xmm1, %xmm0
   1593 ; CHECK-NEXT:    retl
   1594   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   1595   ret <16 x i8> %res
   1596 }
   1597 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
   1598 
   1599 
   1600 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
   1601 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load:
   1602 ; CHECK:       # BB#0:
   1603 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   1604 ; CHECK-NEXT:    movl $7, %eax
   1605 ; CHECK-NEXT:    movl $7, %edx
   1606 ; CHECK-NEXT:    vpcmpestrm $7, (%ecx), %xmm0
   1607 ; CHECK-NEXT:    retl
   1608   %1 = load <16 x i8>, <16 x i8>* %a2
   1609   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   1610   ret <16 x i8> %res
   1611 }
   1612 
   1613 
   1614 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
   1615 ; CHECK-LABEL: test_x86_sse42_pcmpistri128:
   1616 ; CHECK:       # BB#0:
   1617 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1618 ; CHECK-NEXT:    movl %ecx, %eax
   1619 ; CHECK-NEXT:    retl
   1620   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1621   ret i32 %res
   1622 }
   1623 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1624 
   1625 
   1626 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
   1627 ; CHECK-LABEL: test_x86_sse42_pcmpistri128_load:
   1628 ; CHECK:       # BB#0:
   1629 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1630 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   1631 ; CHECK-NEXT:    vmovdqa (%ecx), %xmm0
   1632 ; CHECK-NEXT:    vpcmpistri $7, (%eax), %xmm0
   1633 ; CHECK-NEXT:    movl %ecx, %eax
   1634 ; CHECK-NEXT:    retl
   1635   %1 = load <16 x i8>, <16 x i8>* %a0
   1636   %2 = load <16 x i8>, <16 x i8>* %a1
   1637   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
   1638   ret i32 %res
   1639 }
   1640 
   1641 
   1642 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
   1643 ; CHECK-LABEL: test_x86_sse42_pcmpistria128:
   1644 ; CHECK:       # BB#0:
   1645 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1646 ; CHECK-NEXT:    seta %al
   1647 ; CHECK-NEXT:    movzbl %al, %eax
   1648 ; CHECK-NEXT:    retl
   1649   %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1650   ret i32 %res
   1651 }
   1652 declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1653 
   1654 
   1655 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
   1656 ; CHECK-LABEL: test_x86_sse42_pcmpistric128:
   1657 ; CHECK:       # BB#0:
   1658 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1659 ; CHECK-NEXT:    sbbl %eax, %eax
   1660 ; CHECK-NEXT:    andl $1, %eax
   1661 ; CHECK-NEXT:    retl
   1662   %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1663   ret i32 %res
   1664 }
   1665 declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1666 
   1667 
   1668 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
   1669 ; CHECK-LABEL: test_x86_sse42_pcmpistrio128:
   1670 ; CHECK:       # BB#0:
   1671 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1672 ; CHECK-NEXT:    seto %al
   1673 ; CHECK-NEXT:    movzbl %al, %eax
   1674 ; CHECK-NEXT:    retl
   1675   %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1676   ret i32 %res
   1677 }
   1678 declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1679 
   1680 
   1681 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
   1682 ; CHECK-LABEL: test_x86_sse42_pcmpistris128:
   1683 ; CHECK:       # BB#0:
   1684 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1685 ; CHECK-NEXT:    sets %al
   1686 ; CHECK-NEXT:    movzbl %al, %eax
   1687 ; CHECK-NEXT:    retl
   1688   %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1689   ret i32 %res
   1690 }
   1691 declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1692 
   1693 
   1694 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
   1695 ; CHECK-LABEL: test_x86_sse42_pcmpistriz128:
   1696 ; CHECK:       # BB#0:
   1697 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
   1698 ; CHECK-NEXT:    sete %al
   1699 ; CHECK-NEXT:    movzbl %al, %eax
   1700 ; CHECK-NEXT:    retl
   1701   %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   1702   ret i32 %res
   1703 }
   1704 declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1705 
   1706 
   1707 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
   1708 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128:
   1709 ; CHECK:       # BB#0:
   1710 ; CHECK-NEXT:    vpcmpistrm $7, %xmm1, %xmm0
   1711 ; CHECK-NEXT:    retl
   1712   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
   1713   ret <16 x i8> %res
   1714 }
   1715 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1716 
   1717 
   1718 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
   1719 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load:
   1720 ; CHECK:       # BB#0:
   1721 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1722 ; CHECK-NEXT:    vpcmpistrm $7, (%eax), %xmm0
   1723 ; CHECK-NEXT:    retl
   1724   %1 = load <16 x i8>, <16 x i8>* %a1
   1725   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
   1726   ret <16 x i8> %res
   1727 }
   1728 
   1729 
   1730 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
   1731 ; CHECK-LABEL: test_x86_sse_add_ss:
   1732 ; CHECK:       # BB#0:
   1733 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
   1734 ; CHECK-NEXT:    retl
   1735   %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1736   ret <4 x float> %res
   1737 }
   1738 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
   1739 
   1740 
   1741 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
   1742 ; CHECK-LABEL: test_x86_sse_cmp_ps:
   1743 ; CHECK:       # BB#0:
   1744 ; CHECK-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0
   1745 ; CHECK-NEXT:    retl
   1746   %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1747   ret <4 x float> %res
   1748 }
   1749 declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
   1750 
   1751 
   1752 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
   1753 ; CHECK-LABEL: test_x86_sse_cmp_ss:
   1754 ; CHECK:       # BB#0:
   1755 ; CHECK-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0
   1756 ; CHECK-NEXT:    retl
   1757   %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   1758   ret <4 x float> %res
   1759 }
   1760 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
   1761 
   1762 
   1763 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
   1764 ; CHECK-LABEL: test_x86_sse_comieq_ss:
   1765 ; CHECK:       # BB#0:
   1766 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1767 ; CHECK-NEXT:    sete %al
   1768 ; CHECK-NEXT:    movzbl %al, %eax
   1769 ; CHECK-NEXT:    retl
   1770   %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1771   ret i32 %res
   1772 }
   1773 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
   1774 
   1775 
   1776 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
   1777 ; CHECK-LABEL: test_x86_sse_comige_ss:
   1778 ; CHECK:       # BB#0:
   1779 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1780 ; CHECK-NEXT:    setae %al
   1781 ; CHECK-NEXT:    movzbl %al, %eax
   1782 ; CHECK-NEXT:    retl
   1783   %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1784   ret i32 %res
   1785 }
   1786 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
   1787 
   1788 
   1789 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
   1790 ; CHECK-LABEL: test_x86_sse_comigt_ss:
   1791 ; CHECK:       # BB#0:
   1792 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1793 ; CHECK-NEXT:    seta %al
   1794 ; CHECK-NEXT:    movzbl %al, %eax
   1795 ; CHECK-NEXT:    retl
   1796   %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1797   ret i32 %res
   1798 }
   1799 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
   1800 
   1801 
   1802 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
   1803 ; CHECK-LABEL: test_x86_sse_comile_ss:
   1804 ; CHECK:       # BB#0:
   1805 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1806 ; CHECK-NEXT:    setbe %al
   1807 ; CHECK-NEXT:    movzbl %al, %eax
   1808 ; CHECK-NEXT:    retl
   1809   %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1810   ret i32 %res
   1811 }
   1812 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
   1813 
   1814 
   1815 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
   1816 ; CHECK-LABEL: test_x86_sse_comilt_ss:
   1817 ; CHECK:       # BB#0:
   1818 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1819 ; CHECK-NEXT:    sbbl %eax, %eax
   1820 ; CHECK-NEXT:    andl $1, %eax
   1821 ; CHECK-NEXT:    retl
   1822   %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1823   ret i32 %res
   1824 }
   1825 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
   1826 
   1827 
   1828 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
   1829 ; CHECK-LABEL: test_x86_sse_comineq_ss:
   1830 ; CHECK:       # BB#0:
   1831 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
   1832 ; CHECK-NEXT:    setne %al
   1833 ; CHECK-NEXT:    movzbl %al, %eax
   1834 ; CHECK-NEXT:    retl
   1835   %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   1836   ret i32 %res
   1837 }
   1838 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
   1839 
   1840 
   1841 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
   1842 ; CHECK-LABEL: test_x86_sse_cvtsi2ss:
   1843 ; CHECK:       # BB#0:
   1844 ; CHECK-NEXT:    movl $7, %eax
   1845 ; CHECK-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
   1846 ; CHECK-NEXT:    retl
   1847   %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
   1848   ret <4 x float> %res
   1849 }
   1850 declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
   1851 
   1852 
   1853 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
   1854 ; CHECK-LABEL: test_x86_sse_cvtss2si:
   1855 ; CHECK:       # BB#0:
   1856 ; CHECK-NEXT:    vcvtss2si %xmm0, %eax
   1857 ; CHECK-NEXT:    retl
   1858   %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
   1859   ret i32 %res
   1860 }
   1861 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
   1862 
   1863 
   1864 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
   1865 ; CHECK-LABEL: test_x86_sse_cvttss2si:
   1866 ; CHECK:       # BB#0:
   1867 ; CHECK-NEXT:    vcvttss2si %xmm0, %eax
   1868 ; CHECK-NEXT:    retl
   1869   %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
   1870   ret i32 %res
   1871 }
   1872 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
   1873 
   1874 
   1875 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
   1876 ; CHECK-LABEL: test_x86_sse_div_ss:
   1877 ; CHECK:       # BB#0:
   1878 ; CHECK-NEXT:    vdivss %xmm1, %xmm0, %xmm0
   1879 ; CHECK-NEXT:    retl
   1880   %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1881   ret <4 x float> %res
   1882 }
   1883 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
   1884 
   1885 
   1886 define void @test_x86_sse_ldmxcsr(i8* %a0) {
   1887 ; CHECK-LABEL: test_x86_sse_ldmxcsr:
   1888 ; CHECK:       # BB#0:
   1889 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   1890 ; CHECK-NEXT:    vldmxcsr (%eax)
   1891 ; CHECK-NEXT:    retl
   1892   call void @llvm.x86.sse.ldmxcsr(i8* %a0)
   1893   ret void
   1894 }
   1895 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
   1896 
   1897 
   1898 
   1899 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
   1900 ; CHECK-LABEL: test_x86_sse_max_ps:
   1901 ; CHECK:       # BB#0:
   1902 ; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
   1903 ; CHECK-NEXT:    retl
   1904   %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1905   ret <4 x float> %res
   1906 }
   1907 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
   1908 
   1909 
   1910 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
   1911 ; CHECK-LABEL: test_x86_sse_max_ss:
   1912 ; CHECK:       # BB#0:
   1913 ; CHECK-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
   1914 ; CHECK-NEXT:    retl
   1915   %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1916   ret <4 x float> %res
   1917 }
   1918 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
   1919 
   1920 
   1921 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
   1922 ; CHECK-LABEL: test_x86_sse_min_ps:
   1923 ; CHECK:       # BB#0:
   1924 ; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0
   1925 ; CHECK-NEXT:    retl
   1926   %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1927   ret <4 x float> %res
   1928 }
   1929 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
   1930 
   1931 
   1932 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
   1933 ; CHECK-LABEL: test_x86_sse_min_ss:
   1934 ; CHECK:       # BB#0:
   1935 ; CHECK-NEXT:    vminss %xmm1, %xmm0, %xmm0
   1936 ; CHECK-NEXT:    retl
   1937   %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1938   ret <4 x float> %res
   1939 }
   1940 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
   1941 
   1942 
   1943 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
   1944 ; CHECK-LABEL: test_x86_sse_movmsk_ps:
   1945 ; CHECK:       # BB#0:
   1946 ; CHECK-NEXT:    vmovmskps %xmm0, %eax
   1947 ; CHECK-NEXT:    retl
   1948   %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
   1949   ret i32 %res
   1950 }
   1951 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
   1952 
   1953 
   1954 
   1955 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
   1956 ; CHECK-LABEL: test_x86_sse_mul_ss:
   1957 ; CHECK:       # BB#0:
   1958 ; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0
   1959 ; CHECK-NEXT:    retl
   1960   %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   1961   ret <4 x float> %res
   1962 }
   1963 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
   1964 
   1965 
   1966 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
   1967 ; CHECK-LABEL: test_x86_sse_rcp_ps:
   1968 ; CHECK:       # BB#0:
   1969 ; CHECK-NEXT:    vrcpps %xmm0, %xmm0
   1970 ; CHECK-NEXT:    retl
   1971   %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1972   ret <4 x float> %res
   1973 }
   1974 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
   1975 
   1976 
   1977 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
   1978 ; CHECK-LABEL: test_x86_sse_rcp_ss:
   1979 ; CHECK:       # BB#0:
   1980 ; CHECK-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
   1981 ; CHECK-NEXT:    retl
   1982   %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1983   ret <4 x float> %res
   1984 }
   1985 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
   1986 
   1987 
   1988 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
   1989 ; CHECK-LABEL: test_x86_sse_rsqrt_ps:
   1990 ; CHECK:       # BB#0:
   1991 ; CHECK-NEXT:    vrsqrtps %xmm0, %xmm0
   1992 ; CHECK-NEXT:    retl
   1993   %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   1994   ret <4 x float> %res
   1995 }
   1996 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
   1997 
   1998 
   1999 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
   2000 ; CHECK-LABEL: test_x86_sse_rsqrt_ss:
   2001 ; CHECK:       # BB#0:
   2002 ; CHECK-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
   2003 ; CHECK-NEXT:    retl
   2004   %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   2005   ret <4 x float> %res
   2006 }
   2007 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
   2008 
   2009 
   2010 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
   2011 ; CHECK-LABEL: test_x86_sse_sqrt_ps:
   2012 ; CHECK:       # BB#0:
   2013 ; CHECK-NEXT:    vsqrtps %xmm0, %xmm0
   2014 ; CHECK-NEXT:    retl
   2015   %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   2016   ret <4 x float> %res
   2017 }
   2018 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
   2019 
   2020 
   2021 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
   2022 ; CHECK-LABEL: test_x86_sse_sqrt_ss:
   2023 ; CHECK:       # BB#0:
   2024 ; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
   2025 ; CHECK-NEXT:    retl
   2026   %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
   2027   ret <4 x float> %res
   2028 }
   2029 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
   2030 
   2031 
   2032 define void @test_x86_sse_stmxcsr(i8* %a0) {
   2033 ; CHECK-LABEL: test_x86_sse_stmxcsr:
   2034 ; CHECK:       # BB#0:
   2035 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2036 ; CHECK-NEXT:    vstmxcsr (%eax)
   2037 ; CHECK-NEXT:    retl
   2038   call void @llvm.x86.sse.stmxcsr(i8* %a0)
   2039   ret void
   2040 }
   2041 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
   2042 
   2043 
   2044 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
   2045 ; CHECK-LABEL: test_x86_sse_storeu_ps:
   2046 ; CHECK:       # BB#0:
   2047 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2048 ; CHECK-NEXT:    vmovups %xmm0, (%eax)
   2049 ; CHECK-NEXT:    retl
   2050   call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
   2051   ret void
   2052 }
   2053 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
   2054 
   2055 
   2056 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
   2057 ; CHECK-LABEL: test_x86_sse_sub_ss:
   2058 ; CHECK:       # BB#0:
   2059 ; CHECK-NEXT:    vsubss %xmm1, %xmm0, %xmm0
   2060 ; CHECK-NEXT:    retl
   2061   %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   2062   ret <4 x float> %res
   2063 }
   2064 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
   2065 
   2066 
   2067 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
   2068 ; CHECK-LABEL: test_x86_sse_ucomieq_ss:
   2069 ; CHECK:       # BB#0:
   2070 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2071 ; CHECK-NEXT:    sete %al
   2072 ; CHECK-NEXT:    movzbl %al, %eax
   2073 ; CHECK-NEXT:    retl
   2074   %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2075   ret i32 %res
   2076 }
   2077 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
   2078 
   2079 
   2080 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
   2081 ; CHECK-LABEL: test_x86_sse_ucomige_ss:
   2082 ; CHECK:       # BB#0:
   2083 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2084 ; CHECK-NEXT:    setae %al
   2085 ; CHECK-NEXT:    movzbl %al, %eax
   2086 ; CHECK-NEXT:    retl
   2087   %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2088   ret i32 %res
   2089 }
   2090 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
   2091 
   2092 
   2093 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
   2094 ; CHECK-LABEL: test_x86_sse_ucomigt_ss:
   2095 ; CHECK:       # BB#0:
   2096 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2097 ; CHECK-NEXT:    seta %al
   2098 ; CHECK-NEXT:    movzbl %al, %eax
   2099 ; CHECK-NEXT:    retl
   2100   %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2101   ret i32 %res
   2102 }
   2103 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
   2104 
   2105 
   2106 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
   2107 ; CHECK-LABEL: test_x86_sse_ucomile_ss:
   2108 ; CHECK:       # BB#0:
   2109 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2110 ; CHECK-NEXT:    setbe %al
   2111 ; CHECK-NEXT:    movzbl %al, %eax
   2112 ; CHECK-NEXT:    retl
   2113   %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2114   ret i32 %res
   2115 }
   2116 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
   2117 
   2118 
   2119 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
   2120 ; CHECK-LABEL: test_x86_sse_ucomilt_ss:
   2121 ; CHECK:       # BB#0:
   2122 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2123 ; CHECK-NEXT:    sbbl %eax, %eax
   2124 ; CHECK-NEXT:    andl $1, %eax
   2125 ; CHECK-NEXT:    retl
   2126   %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2127   ret i32 %res
   2128 }
   2129 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
   2130 
   2131 
   2132 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
   2133 ; CHECK-LABEL: test_x86_sse_ucomineq_ss:
   2134 ; CHECK:       # BB#0:
   2135 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
   2136 ; CHECK-NEXT:    setne %al
   2137 ; CHECK-NEXT:    movzbl %al, %eax
   2138 ; CHECK-NEXT:    retl
   2139   %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   2140   ret i32 %res
   2141 }
   2142 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
   2143 
   2144 
   2145 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
   2146 ; CHECK-LABEL: test_x86_ssse3_pabs_b_128:
   2147 ; CHECK:       # BB#0:
   2148 ; CHECK-NEXT:    vpabsb %xmm0, %xmm0
   2149 ; CHECK-NEXT:    retl
   2150   %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
   2151   ret <16 x i8> %res
   2152 }
   2153 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
   2154 
   2155 
   2156 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
   2157 ; CHECK-LABEL: test_x86_ssse3_pabs_d_128:
   2158 ; CHECK:       # BB#0:
   2159 ; CHECK-NEXT:    vpabsd %xmm0, %xmm0
   2160 ; CHECK-NEXT:    retl
   2161   %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
   2162   ret <4 x i32> %res
   2163 }
   2164 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
   2165 
   2166 
   2167 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
   2168 ; CHECK-LABEL: test_x86_ssse3_pabs_w_128:
   2169 ; CHECK:       # BB#0:
   2170 ; CHECK-NEXT:    vpabsw %xmm0, %xmm0
   2171 ; CHECK-NEXT:    retl
   2172   %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
   2173   ret <8 x i16> %res
   2174 }
   2175 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
   2176 
   2177 
   2178 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   2179 ; CHECK-LABEL: test_x86_ssse3_phadd_d_128:
   2180 ; CHECK:       # BB#0:
   2181 ; CHECK-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
   2182 ; CHECK-NEXT:    retl
   2183   %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   2184   ret <4 x i32> %res
   2185 }
   2186 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   2187 
   2188 
   2189 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   2190 ; CHECK-LABEL: test_x86_ssse3_phadd_sw_128:
   2191 ; CHECK:       # BB#0:
   2192 ; CHECK-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0
   2193 ; CHECK-NEXT:    retl
   2194   %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2195   ret <8 x i16> %res
   2196 }
   2197 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   2198 
   2199 
   2200 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   2201 ; CHECK-LABEL: test_x86_ssse3_phadd_w_128:
   2202 ; CHECK:       # BB#0:
   2203 ; CHECK-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
   2204 ; CHECK-NEXT:    retl
   2205   %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2206   ret <8 x i16> %res
   2207 }
   2208 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   2209 
   2210 
   2211 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   2212 ; CHECK-LABEL: test_x86_ssse3_phsub_d_128:
   2213 ; CHECK:       # BB#0:
   2214 ; CHECK-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
   2215 ; CHECK-NEXT:    retl
   2216   %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   2217   ret <4 x i32> %res
   2218 }
   2219 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   2220 
   2221 
   2222 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   2223 ; CHECK-LABEL: test_x86_ssse3_phsub_sw_128:
   2224 ; CHECK:       # BB#0:
   2225 ; CHECK-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0
   2226 ; CHECK-NEXT:    retl
   2227   %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2228   ret <8 x i16> %res
   2229 }
   2230 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   2231 
   2232 
   2233 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   2234 ; CHECK-LABEL: test_x86_ssse3_phsub_w_128:
   2235 ; CHECK:       # BB#0:
   2236 ; CHECK-NEXT:    vphsubw %xmm1, %xmm0, %xmm0
   2237 ; CHECK-NEXT:    retl
   2238   %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2239   ret <8 x i16> %res
   2240 }
   2241 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   2242 
   2243 
   2244 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
   2245 ; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
   2246 ; CHECK:       # BB#0:
   2247 ; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0
   2248 ; CHECK-NEXT:    retl
   2249   %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
   2250   ret <8 x i16> %res
   2251 }
   2252 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
   2253 
   2254 
   2255 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   2256 ; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128:
   2257 ; CHECK:       # BB#0:
   2258 ; CHECK-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0
   2259 ; CHECK-NEXT:    retl
   2260   %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2261   ret <8 x i16> %res
   2262 }
   2263 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
   2264 
   2265 
   2266 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   2267 ; CHECK-LABEL: test_x86_ssse3_pshuf_b_128:
   2268 ; CHECK:       # BB#0:
   2269 ; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
   2270 ; CHECK-NEXT:    retl
   2271   %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   2272   ret <16 x i8> %res
   2273 }
   2274 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   2275 
   2276 
   2277 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   2278 ; CHECK-LABEL: test_x86_ssse3_psign_b_128:
   2279 ; CHECK:       # BB#0:
   2280 ; CHECK-NEXT:    vpsignb %xmm1, %xmm0, %xmm0
   2281 ; CHECK-NEXT:    retl
   2282   %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
   2283   ret <16 x i8> %res
   2284 }
   2285 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
   2286 
   2287 
   2288 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   2289 ; CHECK-LABEL: test_x86_ssse3_psign_d_128:
   2290 ; CHECK:       # BB#0:
   2291 ; CHECK-NEXT:    vpsignd %xmm1, %xmm0, %xmm0
   2292 ; CHECK-NEXT:    retl
   2293   %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   2294   ret <4 x i32> %res
   2295 }
   2296 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
   2297 
   2298 
   2299 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   2300 ; CHECK-LABEL: test_x86_ssse3_psign_w_128:
   2301 ; CHECK:       # BB#0:
   2302 ; CHECK-NEXT:    vpsignw %xmm1, %xmm0, %xmm0
   2303 ; CHECK-NEXT:    retl
   2304   %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
   2305   ret <8 x i16> %res
   2306 }
   2307 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
   2308 
   2309 
   2310 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2311 ; CHECK-LABEL: test_x86_avx_addsub_pd_256:
   2312 ; CHECK:       # BB#0:
   2313 ; CHECK-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
   2314 ; CHECK-NEXT:    retl
   2315   %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2316   ret <4 x double> %res
   2317 }
   2318 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2319 
   2320 
   2321 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2322 ; CHECK-LABEL: test_x86_avx_addsub_ps_256:
   2323 ; CHECK:       # BB#0:
   2324 ; CHECK-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
   2325 ; CHECK-NEXT:    retl
   2326   %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2327   ret <8 x float> %res
   2328 }
   2329 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2330 
   2331 
   2332 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   2333 ; CHECK-LABEL: test_x86_avx_blendv_pd_256:
   2334 ; CHECK:       # BB#0:
   2335 ; CHECK-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
   2336 ; CHECK-NEXT:    retl
   2337   %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
   2338   ret <4 x double> %res
   2339 }
   2340 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
   2341 
   2342 
   2343 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   2344 ; CHECK-LABEL: test_x86_avx_blendv_ps_256:
   2345 ; CHECK:       # BB#0:
   2346 ; CHECK-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
   2347 ; CHECK-NEXT:    retl
   2348   %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
   2349   ret <8 x float> %res
   2350 }
   2351 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
   2352 
   2353 
   2354 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2355 ; CHECK-LABEL: test_x86_avx_cmp_pd_256:
   2356 ; CHECK:       # BB#0:
   2357 ; CHECK-NEXT:    vcmpordpd %ymm1, %ymm0, %ymm0
   2358 ; CHECK-NEXT:    retl
   2359   %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   2360   ret <4 x double> %res
   2361 }
   2362 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
   2363 
   2364 
   2365 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2366 ; CHECK-LABEL: test_x86_avx_cmp_ps_256:
   2367 ; CHECK:       # BB#0:
   2368 ; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm0
   2369 ; CHECK-NEXT:    retl
   2370   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2371   ret <8 x float> %res
   2372 }
   2373 
   2374 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
   2375 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
   2376 ; CHECK:       # BB#0:
   2377 ; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1
   2378 ; CHECK-NEXT:    vcmpltps %ymm1, %ymm0, %ymm1
   2379 ; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %ymm1
   2380 ; CHECK-NEXT:    vcmpunordps %ymm1, %ymm0, %ymm1
   2381 ; CHECK-NEXT:    vcmpneqps %ymm1, %ymm0, %ymm1
   2382 ; CHECK-NEXT:    vcmpnltps %ymm1, %ymm0, %ymm1
   2383 ; CHECK-NEXT:    vcmpnleps %ymm1, %ymm0, %ymm1
   2384 ; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm1
   2385 ; CHECK-NEXT:    vcmpeq_uqps %ymm1, %ymm0, %ymm1
   2386 ; CHECK-NEXT:    vcmpngeps %ymm1, %ymm0, %ymm1
   2387 ; CHECK-NEXT:    vcmpngtps %ymm1, %ymm0, %ymm1
   2388 ; CHECK-NEXT:    vcmpfalseps %ymm1, %ymm0, %ymm1
   2389 ; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %ymm1
   2390 ; CHECK-NEXT:    vcmpgeps %ymm1, %ymm0, %ymm1
   2391 ; CHECK-NEXT:    vcmpgtps %ymm1, %ymm0, %ymm1
   2392 ; CHECK-NEXT:    vcmptrueps %ymm1, %ymm0, %ymm1
   2393 ; CHECK-NEXT:    vcmpeq_osps %ymm1, %ymm0, %ymm1
   2394 ; CHECK-NEXT:    vcmplt_oqps %ymm1, %ymm0, %ymm1
   2395 ; CHECK-NEXT:    vcmple_oqps %ymm1, %ymm0, %ymm1
   2396 ; CHECK-NEXT:    vcmpunord_sps %ymm1, %ymm0, %ymm1
   2397 ; CHECK-NEXT:    vcmpneq_usps %ymm1, %ymm0, %ymm1
   2398 ; CHECK-NEXT:    vcmpnlt_uqps %ymm1, %ymm0, %ymm1
   2399 ; CHECK-NEXT:    vcmpnle_uqps %ymm1, %ymm0, %ymm1
   2400 ; CHECK-NEXT:    vcmpord_sps %ymm1, %ymm0, %ymm1
   2401 ; CHECK-NEXT:    vcmpeq_usps %ymm1, %ymm0, %ymm1
   2402 ; CHECK-NEXT:    vcmpnge_uqps %ymm1, %ymm0, %ymm1
   2403 ; CHECK-NEXT:    vcmpngt_uqps %ymm1, %ymm0, %ymm1
   2404 ; CHECK-NEXT:    vcmpfalse_osps %ymm1, %ymm0, %ymm1
   2405 ; CHECK-NEXT:    vcmpneq_osps %ymm1, %ymm0, %ymm1
   2406 ; CHECK-NEXT:    vcmpge_oqps %ymm1, %ymm0, %ymm1
   2407 ; CHECK-NEXT:    vcmpgt_oqps %ymm1, %ymm0, %ymm1
   2408 ; CHECK-NEXT:    vcmptrue_usps %ymm1, %ymm0, %ymm0
   2409 ; CHECK-NEXT:    retl
   2410   %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
   2411   %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
   2412   %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
   2413   %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
   2414   %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
   2415   %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
   2416   %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
   2417   %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
   2418   %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
   2419   %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
   2420   %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
   2421   %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
   2422   %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
   2423   %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
   2424   %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
   2425   %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
   2426   %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
   2427   %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
   2428   %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
   2429   %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
   2430   %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
   2431   %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
   2432   %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
   2433   %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
   2434   %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
   2435   %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
   2436   %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
   2437   %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
   2438   %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
   2439   %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
   2440   %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
   2441   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
   2442   ret <8 x float> %res
   2443 }
   2444 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   2445 
   2446 
   2447 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
   2448 ; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256:
   2449 ; CHECK:       # BB#0:
   2450 ; CHECK-NEXT:    vcvtpd2psy %ymm0, %xmm0
   2451 ; CHECK-NEXT:    vzeroupper
   2452 ; CHECK-NEXT:    retl
   2453   %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
   2454   ret <4 x float> %res
   2455 }
   2456 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
   2457 
   2458 
   2459 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
   2460 ; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256:
   2461 ; CHECK:       # BB#0:
   2462 ; CHECK-NEXT:    vcvtpd2dqy %ymm0, %xmm0
   2463 ; CHECK-NEXT:    vzeroupper
   2464 ; CHECK-NEXT:    retl
   2465   %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
   2466   ret <4 x i32> %res
   2467 }
   2468 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
   2469 
   2470 
   2471 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
   2472 ; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
   2473 ; CHECK:       # BB#0:
   2474 ; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0
   2475 ; CHECK-NEXT:    retl
   2476   %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
   2477   ret <4 x double> %res
   2478 }
   2479 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
   2480 
   2481 
   2482 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
   2483 ; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
   2484 ; CHECK:       # BB#0:
   2485 ; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm0
   2486 ; CHECK-NEXT:    retl
   2487   %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
   2488   ret <8 x i32> %res
   2489 }
   2490 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
   2491 
   2492 
   2493 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
   2494 ; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
   2495 ; CHECK:       # BB#0:
   2496 ; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
   2497 ; CHECK-NEXT:    retl
   2498   %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
   2499   ret <4 x double> %res
   2500 }
   2501 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
   2502 
   2503 
   2504 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
   2505 ; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
   2506 ; CHECK:       # BB#0:
   2507 ; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
   2508 ; CHECK-NEXT:    retl
   2509   %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
   2510   ret <8 x float> %res
   2511 }
   2512 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
   2513 
   2514 
   2515 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
   2516 ; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
   2517 ; CHECK:       # BB#0:
   2518 ; CHECK-NEXT:    vcvttpd2dqy %ymm0, %xmm0
   2519 ; CHECK-NEXT:    vzeroupper
   2520 ; CHECK-NEXT:    retl
   2521   %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
   2522   ret <4 x i32> %res
   2523 }
   2524 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
   2525 
   2526 
   2527 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
   2528 ; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
   2529 ; CHECK:       # BB#0:
   2530 ; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
   2531 ; CHECK-NEXT:    retl
   2532   %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
   2533   ret <8 x i32> %res
   2534 }
   2535 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
   2536 
   2537 
   2538 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2539 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
   2540 ; CHECK:       # BB#0:
   2541 ; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0
   2542 ; CHECK-NEXT:    retl
   2543   %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2544   ret <8 x float> %res
   2545 }
   2546 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   2547 
   2548 
   2549 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2550 ; CHECK-LABEL: test_x86_avx_hadd_pd_256:
   2551 ; CHECK:       # BB#0:
   2552 ; CHECK-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
   2553 ; CHECK-NEXT:    retl
   2554   %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2555   ret <4 x double> %res
   2556 }
   2557 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2558 
   2559 
   2560 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2561 ; CHECK-LABEL: test_x86_avx_hadd_ps_256:
   2562 ; CHECK:       # BB#0:
   2563 ; CHECK-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
   2564 ; CHECK-NEXT:    retl
   2565   %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2566   ret <8 x float> %res
   2567 }
   2568 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2569 
   2570 
   2571 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2572 ; CHECK-LABEL: test_x86_avx_hsub_pd_256:
   2573 ; CHECK:       # BB#0:
   2574 ; CHECK-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
   2575 ; CHECK-NEXT:    retl
   2576   %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2577   ret <4 x double> %res
   2578 }
   2579 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2580 
   2581 
   2582 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2583 ; CHECK-LABEL: test_x86_avx_hsub_ps_256:
   2584 ; CHECK:       # BB#0:
   2585 ; CHECK-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
   2586 ; CHECK-NEXT:    retl
   2587   %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2588   ret <8 x float> %res
   2589 }
   2590 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2591 
   2592 
   2593 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
   2594 ; CHECK-LABEL: test_x86_avx_ldu_dq_256:
   2595 ; CHECK:       # BB#0:
   2596 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2597 ; CHECK-NEXT:    vlddqu (%eax), %ymm0
   2598 ; CHECK-NEXT:    retl
   2599   %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
   2600   ret <32 x i8> %res
   2601 }
   2602 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
   2603 
   2604 
   2605 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
   2606 ; CHECK-LABEL: test_x86_avx_maskload_pd:
   2607 ; CHECK:       # BB#0:
   2608 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2609 ; CHECK-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0
   2610 ; CHECK-NEXT:    retl
   2611   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   2612   ret <2 x double> %res
   2613 }
   2614 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
   2615 
   2616 
   2617 define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
   2618 ; CHECK-LABEL: test_x86_avx_maskload_pd_256:
   2619 ; CHECK:       # BB#0:
   2620 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2621 ; CHECK-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0
   2622 ; CHECK-NEXT:    retl
   2623   %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2624   ret <4 x double> %res
   2625 }
   2626 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
   2627 
   2628 
   2629 define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
   2630 ; CHECK-LABEL: test_x86_avx_maskload_ps:
   2631 ; CHECK:       # BB#0:
   2632 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2633 ; CHECK-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0
   2634 ; CHECK-NEXT:    retl
   2635   %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
   2636   ret <4 x float> %res
   2637 }
   2638 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
   2639 
   2640 
   2641 define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
   2642 ; CHECK-LABEL: test_x86_avx_maskload_ps_256:
   2643 ; CHECK:       # BB#0:
   2644 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2645 ; CHECK-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0
   2646 ; CHECK-NEXT:    retl
   2647   %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2648   ret <8 x float> %res
   2649 }
   2650 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
   2651 
   2652 
   2653 define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
   2654 ; CHECK-LABEL: test_x86_avx_maskstore_pd:
   2655 ; CHECK:       # BB#0:
   2656 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2657 ; CHECK-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax)
   2658 ; CHECK-NEXT:    retl
   2659   call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
   2660   ret void
   2661 }
   2662 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
   2663 
   2664 
   2665 define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
   2666 ; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
   2667 ; CHECK:       # BB#0:
   2668 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2669 ; CHECK-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax)
   2670 ; CHECK-NEXT:    vzeroupper
   2671 ; CHECK-NEXT:    retl
   2672   call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
   2673   ret void
   2674 }
   2675 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
   2676 
   2677 
   2678 define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
   2679 ; CHECK-LABEL: test_x86_avx_maskstore_ps:
   2680 ; CHECK:       # BB#0:
   2681 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2682 ; CHECK-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax)
   2683 ; CHECK-NEXT:    retl
   2684   call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
   2685   ret void
   2686 }
   2687 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
   2688 
   2689 
   2690 define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
   2691 ; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
   2692 ; CHECK:       # BB#0:
   2693 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2694 ; CHECK-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax)
   2695 ; CHECK-NEXT:    vzeroupper
   2696 ; CHECK-NEXT:    retl
   2697   call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
   2698   ret void
   2699 }
   2700 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
   2701 
   2702 
   2703 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2704 ; CHECK-LABEL: test_x86_avx_max_pd_256:
   2705 ; CHECK:       # BB#0:
   2706 ; CHECK-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
   2707 ; CHECK-NEXT:    retl
   2708   %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2709   ret <4 x double> %res
   2710 }
   2711 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2712 
   2713 
   2714 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2715 ; CHECK-LABEL: test_x86_avx_max_ps_256:
   2716 ; CHECK:       # BB#0:
   2717 ; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
   2718 ; CHECK-NEXT:    retl
   2719   %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2720   ret <8 x float> %res
   2721 }
   2722 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2723 
   2724 
   2725 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2726 ; CHECK-LABEL: test_x86_avx_min_pd_256:
   2727 ; CHECK:       # BB#0:
   2728 ; CHECK-NEXT:    vminpd %ymm1, %ymm0, %ymm0
   2729 ; CHECK-NEXT:    retl
   2730   %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
   2731   ret <4 x double> %res
   2732 }
   2733 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2734 
   2735 
   2736 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2737 ; CHECK-LABEL: test_x86_avx_min_ps_256:
   2738 ; CHECK:       # BB#0:
   2739 ; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0
   2740 ; CHECK-NEXT:    retl
   2741   %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
   2742   ret <8 x float> %res
   2743 }
   2744 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2745 
   2746 
   2747 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
   2748 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
   2749 ; CHECK:       # BB#0:
   2750 ; CHECK-NEXT:    vmovmskpd %ymm0, %eax
   2751 ; CHECK-NEXT:    vzeroupper
   2752 ; CHECK-NEXT:    retl
   2753   %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
   2754   ret i32 %res
   2755 }
   2756 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
   2757 
   2758 
   2759 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
   2760 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
   2761 ; CHECK:       # BB#0:
   2762 ; CHECK-NEXT:    vmovmskps %ymm0, %eax
   2763 ; CHECK-NEXT:    vzeroupper
   2764 ; CHECK-NEXT:    retl
   2765   %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
   2766   ret i32 %res
   2767 }
   2768 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
   2769 
   2770 
   2771 
   2772 
   2773 
   2774 
   2775 
   2776 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   2777 ; CHECK-LABEL: test_x86_avx_ptestc_256:
   2778 ; CHECK:       # BB#0:
   2779 ; CHECK-NEXT:    vptest %ymm1, %ymm0
   2780 ; CHECK-NEXT:    sbbl %eax, %eax
   2781 ; CHECK-NEXT:    andl $1, %eax
   2782 ; CHECK-NEXT:    vzeroupper
   2783 ; CHECK-NEXT:    retl
   2784   %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2785   ret i32 %res
   2786 }
   2787 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
   2788 
   2789 
   2790 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
   2791 ; CHECK-LABEL: test_x86_avx_ptestnzc_256:
   2792 ; CHECK:       # BB#0:
   2793 ; CHECK-NEXT:    vptest %ymm1, %ymm0
   2794 ; CHECK-NEXT:    seta %al
   2795 ; CHECK-NEXT:    movzbl %al, %eax
   2796 ; CHECK-NEXT:    vzeroupper
   2797 ; CHECK-NEXT:    retl
   2798   %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2799   ret i32 %res
   2800 }
   2801 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
   2802 
   2803 
   2804 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
   2805 ; CHECK-LABEL: test_x86_avx_ptestz_256:
   2806 ; CHECK:       # BB#0:
   2807 ; CHECK-NEXT:    vptest %ymm1, %ymm0
   2808 ; CHECK-NEXT:    sete %al
   2809 ; CHECK-NEXT:    movzbl %al, %eax
   2810 ; CHECK-NEXT:    vzeroupper
   2811 ; CHECK-NEXT:    retl
   2812   %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
   2813   ret i32 %res
   2814 }
   2815 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
   2816 
   2817 
   2818 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
   2819 ; CHECK-LABEL: test_x86_avx_rcp_ps_256:
   2820 ; CHECK:       # BB#0:
   2821 ; CHECK-NEXT:    vrcpps %ymm0, %ymm0
   2822 ; CHECK-NEXT:    retl
   2823   %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2824   ret <8 x float> %res
   2825 }
   2826 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
   2827 
   2828 
   2829 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
   2830 ; CHECK-LABEL: test_x86_avx_round_pd_256:
   2831 ; CHECK:       # BB#0:
   2832 ; CHECK-NEXT:    vroundpd $7, %ymm0, %ymm0
   2833 ; CHECK-NEXT:    retl
   2834   %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
   2835   ret <4 x double> %res
   2836 }
   2837 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
   2838 
   2839 
   2840 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
   2841 ; CHECK-LABEL: test_x86_avx_round_ps_256:
   2842 ; CHECK:       # BB#0:
   2843 ; CHECK-NEXT:    vroundps $7, %ymm0, %ymm0
   2844 ; CHECK-NEXT:    retl
   2845   %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
   2846   ret <8 x float> %res
   2847 }
   2848 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
   2849 
   2850 
   2851 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
   2852 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
   2853 ; CHECK:       # BB#0:
   2854 ; CHECK-NEXT:    vrsqrtps %ymm0, %ymm0
   2855 ; CHECK-NEXT:    retl
   2856   %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2857   ret <8 x float> %res
   2858 }
   2859 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
   2860 
   2861 
   2862 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
   2863 ; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
   2864 ; CHECK:       # BB#0:
   2865 ; CHECK-NEXT:    vsqrtpd %ymm0, %ymm0
   2866 ; CHECK-NEXT:    retl
   2867   %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
   2868   ret <4 x double> %res
   2869 }
   2870 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
   2871 
   2872 
   2873 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
   2874 ; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
   2875 ; CHECK:       # BB#0:
   2876 ; CHECK-NEXT:    vsqrtps %ymm0, %ymm0
   2877 ; CHECK-NEXT:    retl
   2878   %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
   2879   ret <8 x float> %res
   2880 }
   2881 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
   2882 
   2883 
   2884 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
   2885   ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
   2886   ; add operation forces the execution domain.
   2887 ; CHECK-LABEL: test_x86_avx_storeu_dq_256:
   2888 ; CHECK:       # BB#0:
   2889 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2890 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2891 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
   2892 ; CHECK-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
   2893 ; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
   2894 ; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2895 ; CHECK-NEXT:    vmovups %ymm0, (%eax)
   2896 ; CHECK-NEXT:    vzeroupper
   2897 ; CHECK-NEXT:    retl
   2898   %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   2899   call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
   2900   ret void
   2901 }
   2902 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
   2903 
   2904 
   2905 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
   2906   ; add operation forces the execution domain.
   2907 ; CHECK-LABEL: test_x86_avx_storeu_pd_256:
   2908 ; CHECK:       # BB#0:
   2909 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2910 ; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
   2911 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
   2912 ; CHECK-NEXT:    vmovupd %ymm0, (%eax)
   2913 ; CHECK-NEXT:    vzeroupper
   2914 ; CHECK-NEXT:    retl
   2915   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
   2916   call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
   2917   ret void
   2918 }
   2919 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
   2920 
   2921 
   2922 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
   2923 ; CHECK-LABEL: test_x86_avx_storeu_ps_256:
   2924 ; CHECK:       # BB#0:
   2925 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2926 ; CHECK-NEXT:    vmovups %ymm0, (%eax)
   2927 ; CHECK-NEXT:    vzeroupper
   2928 ; CHECK-NEXT:    retl
   2929   call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
   2930   ret void
   2931 }
   2932 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
   2933 
   2934 
   2935 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
   2936 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256:
   2937 ; CHECK:       # BB#0:
   2938 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2939 ; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
   2940 ; CHECK-NEXT:    retl
   2941   %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
   2942   ret <4 x double> %res
   2943 }
   2944 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
   2945 
   2946 
   2947 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
   2948 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256:
   2949 ; CHECK:       # BB#0:
   2950 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2951 ; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
   2952 ; CHECK-NEXT:    retl
   2953   %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
   2954   ret <8 x float> %res
   2955 }
   2956 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
   2957 
   2958 
   2959 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
   2960 ; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
   2961 ; CHECK:       # BB#0:
   2962 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
   2963 ; CHECK-NEXT:    retl
   2964   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   2965   ret <4 x double> %res
   2966 }
   2967 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
   2968 
   2969 
   2970 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
   2971 ; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
   2972 ; CHECK:       # BB#0:
   2973 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
   2974 ; CHECK-NEXT:    retl
   2975   %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   2976   ret <8 x float> %res
   2977 }
   2978 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   2979 
   2980 
   2981 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
   2982 ; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
   2983 ; CHECK:       # BB#0:
   2984 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
   2985 ; CHECK-NEXT:    retl
   2986   %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
   2987   ret <8 x i32> %res
   2988 }
   2989 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
   2990 
   2991 
   2992 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
   2993 ; CHECK-LABEL: test_x86_avx_vpermil_pd:
   2994 ; CHECK:       # BB#0:
   2995 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
   2996 ; CHECK-NEXT:    retl
   2997   %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
   2998   ret <2 x double> %res
   2999 }
   3000 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
   3001 
   3002 
   3003 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
   3004 ; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
   3005 ; CHECK:       # BB#0:
   3006 ; CHECK-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
   3007 ; CHECK-NEXT:    retl
   3008   %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
   3009   ret <4 x double> %res
   3010 }
   3011 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
   3012 
   3013 
   3014 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
   3015 ; CHECK-LABEL: test_x86_avx_vpermil_ps:
   3016 ; CHECK:       # BB#0:
   3017 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
   3018 ; CHECK-NEXT:    retl
   3019   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   3020   ret <4 x float> %res
   3021 }
   3022 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
   3023 
   3024 
   3025 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
   3026 ; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
   3027 ; CHECK:       # BB#0:
   3028 ; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
   3029 ; CHECK-NEXT:    retl
   3030   %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
   3031   ret <8 x float> %res
   3032 }
   3033 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
   3034 
   3035 
   3036 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
   3037 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd:
   3038 ; CHECK:       # BB#0:
   3039 ; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
   3040 ; CHECK-NEXT:    retl
   3041   %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
   3042   ret <2 x double> %res
   3043 }
   3044 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
   3045 
   3046 
   3047 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
   3048 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256:
   3049 ; CHECK:       # BB#0:
   3050 ; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0
   3051 ; CHECK-NEXT:    retl
   3052   %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
   3053   ret <4 x double> %res
   3054 }
   3055 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
   3056 
   3057 
   3058 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
   3059 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps:
   3060 ; CHECK:       # BB#0:
   3061 ; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
   3062 ; CHECK-NEXT:    retl
   3063   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
   3064   ret <4 x float> %res
   3065 }
   3066 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
   3067 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load:
   3068 ; CHECK:       # BB#0:
   3069 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3070 ; CHECK-NEXT:    vpermilps (%eax), %xmm0, %xmm0
   3071 ; CHECK-NEXT:    retl
   3072   %a2 = load <4 x i32>, <4 x i32>* %a1
   3073   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
   3074   ret <4 x float> %res
   3075 }
   3076 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
   3077 
   3078 
   3079 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
   3080 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256:
   3081 ; CHECK:       # BB#0:
   3082 ; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm0
   3083 ; CHECK-NEXT:    retl
   3084   %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
   3085   ret <8 x float> %res
   3086 }
   3087 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
   3088 
   3089 
   3090 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
   3091 ; CHECK-LABEL: test_x86_avx_vtestc_pd:
   3092 ; CHECK:       # BB#0:
   3093 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0
   3094 ; CHECK-NEXT:    sbbl %eax, %eax
   3095 ; CHECK-NEXT:    andl $1, %eax
   3096 ; CHECK-NEXT:    retl
   3097   %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   3098   ret i32 %res
   3099 }
   3100 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
   3101 
   3102 
   3103 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   3104 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
   3105 ; CHECK:       # BB#0:
   3106 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0
   3107 ; CHECK-NEXT:    sbbl %eax, %eax
   3108 ; CHECK-NEXT:    andl $1, %eax
   3109 ; CHECK-NEXT:    vzeroupper
   3110 ; CHECK-NEXT:    retl
   3111   %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   3112   ret i32 %res
   3113 }
   3114 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
   3115 
   3116 
   3117 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
   3118 ; CHECK-LABEL: test_x86_avx_vtestc_ps:
   3119 ; CHECK:       # BB#0:
   3120 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
   3121 ; CHECK-NEXT:    sbbl %eax, %eax
   3122 ; CHECK-NEXT:    andl $1, %eax
   3123 ; CHECK-NEXT:    retl
   3124   %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   3125   ret i32 %res
   3126 }
   3127 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
   3128 
   3129 
   3130 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   3131 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
   3132 ; CHECK:       # BB#0:
   3133 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
   3134 ; CHECK-NEXT:    sbbl %eax, %eax
   3135 ; CHECK-NEXT:    andl $1, %eax
   3136 ; CHECK-NEXT:    vzeroupper
   3137 ; CHECK-NEXT:    retl
   3138   %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   3139   ret i32 %res
   3140 }
   3141 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
   3142 
   3143 
   3144 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
   3145 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
   3146 ; CHECK:       # BB#0:
   3147 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0
   3148 ; CHECK-NEXT:    seta %al
   3149 ; CHECK-NEXT:    movzbl %al, %eax
   3150 ; CHECK-NEXT:    retl
   3151   %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   3152   ret i32 %res
   3153 }
   3154 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
   3155 
   3156 
   3157 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
   3158 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
   3159 ; CHECK:       # BB#0:
   3160 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0
   3161 ; CHECK-NEXT:    seta %al
   3162 ; CHECK-NEXT:    movzbl %al, %eax
   3163 ; CHECK-NEXT:    vzeroupper
   3164 ; CHECK-NEXT:    retl
   3165   %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   3166   ret i32 %res
   3167 }
   3168 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
   3169 
   3170 
   3171 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
   3172 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
   3173 ; CHECK:       # BB#0:
   3174 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
   3175 ; CHECK-NEXT:    seta %al
   3176 ; CHECK-NEXT:    movzbl %al, %eax
   3177 ; CHECK-NEXT:    retl
   3178   %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   3179   ret i32 %res
   3180 }
   3181 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
   3182 
   3183 
   3184 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
   3185 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
   3186 ; CHECK:       # BB#0:
   3187 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
   3188 ; CHECK-NEXT:    seta %al
   3189 ; CHECK-NEXT:    movzbl %al, %eax
   3190 ; CHECK-NEXT:    vzeroupper
   3191 ; CHECK-NEXT:    retl
   3192   %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   3193   ret i32 %res
   3194 }
   3195 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
   3196 
   3197 
   3198 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
   3199 ; CHECK-LABEL: test_x86_avx_vtestz_pd:
   3200 ; CHECK:       # BB#0:
   3201 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0
   3202 ; CHECK-NEXT:    sete %al
   3203 ; CHECK-NEXT:    movzbl %al, %eax
   3204 ; CHECK-NEXT:    retl
   3205   %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
   3206   ret i32 %res
   3207 }
   3208 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
   3209 
   3210 
   3211 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
   3212 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
   3213 ; CHECK:       # BB#0:
   3214 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0
   3215 ; CHECK-NEXT:    sete %al
   3216 ; CHECK-NEXT:    movzbl %al, %eax
   3217 ; CHECK-NEXT:    vzeroupper
   3218 ; CHECK-NEXT:    retl
   3219   %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
   3220   ret i32 %res
   3221 }
   3222 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
   3223 
   3224 
   3225 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
   3226 ; CHECK-LABEL: test_x86_avx_vtestz_ps:
   3227 ; CHECK:       # BB#0:
   3228 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
   3229 ; CHECK-NEXT:    sete %al
   3230 ; CHECK-NEXT:    movzbl %al, %eax
   3231 ; CHECK-NEXT:    retl
   3232   %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
   3233   ret i32 %res
   3234 }
   3235 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
   3236 
   3237 
   3238 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
   3239 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
   3240 ; CHECK:       # BB#0:
   3241 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
   3242 ; CHECK-NEXT:    sete %al
   3243 ; CHECK-NEXT:    movzbl %al, %eax
   3244 ; CHECK-NEXT:    vzeroupper
   3245 ; CHECK-NEXT:    retl
   3246   %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
   3247   ret i32 %res
   3248 }
   3249 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
   3250 
   3251 
   3252 define void @test_x86_avx_vzeroall() {
   3253 ; CHECK-LABEL: test_x86_avx_vzeroall:
   3254 ; CHECK:       # BB#0:
   3255 ; CHECK-NEXT:    vzeroall
   3256 ; CHECK-NEXT:    vzeroupper
   3257 ; CHECK-NEXT:    retl
   3258   call void @llvm.x86.avx.vzeroall()
   3259   ret void
   3260 }
   3261 declare void @llvm.x86.avx.vzeroall() nounwind
   3262 
   3263 
   3264 define void @test_x86_avx_vzeroupper() {
   3265 ; CHECK-LABEL: test_x86_avx_vzeroupper:
   3266 ; CHECK:       # BB#0:
   3267 ; CHECK-NEXT:    vzeroupper
   3268 ; CHECK-NEXT:    vzeroupper
   3269 ; CHECK-NEXT:    retl
   3270   call void @llvm.x86.avx.vzeroupper()
   3271   ret void
   3272 }
   3273 declare void @llvm.x86.avx.vzeroupper() nounwind
   3274 
   3275 ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
   3276 
   3277 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
   3278 ; CHECK-LABEL: monitor:
   3279 ; CHECK:       # BB#0:
   3280 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
   3281 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   3282 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3283 ; CHECK-NEXT:    leal (%eax), %eax
   3284 ; CHECK-NEXT:    monitor
   3285 ; CHECK-NEXT:    retl
   3286   tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
   3287   ret void
   3288 }
   3289 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
   3290 
   3291 define void @mwait(i32 %E, i32 %H) nounwind {
   3292 ; CHECK-LABEL: mwait:
   3293 ; CHECK:       # BB#0:
   3294 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
   3295 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3296 ; CHECK-NEXT:    mwait
   3297 ; CHECK-NEXT:    retl
   3298   tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
   3299   ret void
   3300 }
   3301 declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
   3302 
   3303 define void @sfence() nounwind {
   3304 ; CHECK-LABEL: sfence:
   3305 ; CHECK:       # BB#0:
   3306 ; CHECK-NEXT:    sfence
   3307 ; CHECK-NEXT:    retl
   3308   tail call void @llvm.x86.sse.sfence()
   3309   ret void
   3310 }
   3311 declare void @llvm.x86.sse.sfence() nounwind
   3312 
   3313 define void @lfence() nounwind {
   3314 ; CHECK-LABEL: lfence:
   3315 ; CHECK:       # BB#0:
   3316 ; CHECK-NEXT:    lfence
   3317 ; CHECK-NEXT:    retl
   3318   tail call void @llvm.x86.sse2.lfence()
   3319   ret void
   3320 }
   3321 declare void @llvm.x86.sse2.lfence() nounwind
   3322 
   3323 define void @mfence() nounwind {
   3324 ; CHECK-LABEL: mfence:
   3325 ; CHECK:       # BB#0:
   3326 ; CHECK-NEXT:    mfence
   3327 ; CHECK-NEXT:    retl
   3328   tail call void @llvm.x86.sse2.mfence()
   3329   ret void
   3330 }
   3331 declare void @llvm.x86.sse2.mfence() nounwind
   3332 
   3333 define void @clflush(i8* %p) nounwind {
   3334 ; CHECK-LABEL: clflush:
   3335 ; CHECK:       # BB#0:
   3336 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3337 ; CHECK-NEXT:    clflush (%eax)
   3338 ; CHECK-NEXT:    retl
   3339   tail call void @llvm.x86.sse2.clflush(i8* %p)
   3340   ret void
   3341 }
   3342 declare void @llvm.x86.sse2.clflush(i8*) nounwind
   3343 
   3344 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
   3345 ; CHECK-LABEL: crc32_32_8:
   3346 ; CHECK:       # BB#0:
   3347 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3348 ; CHECK-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
   3349 ; CHECK-NEXT:    retl
   3350   %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
   3351   ret i32 %tmp
   3352 }
   3353 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
   3354 
   3355 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
   3356 ; CHECK-LABEL: crc32_32_16:
   3357 ; CHECK:       # BB#0:
   3358 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3359 ; CHECK-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
   3360 ; CHECK-NEXT:    retl
   3361   %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
   3362   ret i32 %tmp
   3363 }
   3364 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
   3365 
   3366 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
   3367 ; CHECK-LABEL: crc32_32_32:
   3368 ; CHECK:       # BB#0:
   3369 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3370 ; CHECK-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
   3371 ; CHECK-NEXT:    retl
   3372   %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
   3373   ret i32 %tmp
   3374 }
   3375 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
   3376 
   3377 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
   3378 ; CHECK-LABEL: movnt_dq:
   3379 ; CHECK:       # BB#0:
   3380 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3381 ; CHECK-NEXT:    vpaddq LCPI282_0, %xmm0, %xmm0
   3382 ; CHECK-NEXT:    vmovntdq %ymm0, (%eax)
   3383 ; CHECK-NEXT:    vzeroupper
   3384 ; CHECK-NEXT:    retl
   3385   %a2 = add <2 x i64> %a1, <i64 1, i64 1>
   3386   %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   3387   tail call void @llvm.x86.avx.movnt.dq.256(i8* %p, <4 x i64> %a3) nounwind
   3388   ret void
   3389 }
   3390 declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
   3391 
   3392 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
   3393 ; CHECK-LABEL: movnt_ps:
   3394 ; CHECK:       # BB#0:
   3395 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3396 ; CHECK-NEXT:    vmovntps %ymm0, (%eax)
   3397 ; CHECK-NEXT:    vzeroupper
   3398 ; CHECK-NEXT:    retl
   3399   tail call void @llvm.x86.avx.movnt.ps.256(i8* %p, <8 x float> %a) nounwind
   3400   ret void
   3401 }
   3402 declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
   3403 
   3404 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
   3405   ; add operation forces the execution domain.
   3406 ; CHECK-LABEL: movnt_pd:
   3407 ; CHECK:       # BB#0:
   3408 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
   3409 ; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
   3410 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
   3411 ; CHECK-NEXT:    vmovntpd %ymm0, (%eax)
   3412 ; CHECK-NEXT:    vzeroupper
   3413 ; CHECK-NEXT:    retl
   3414   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
   3415   tail call void @llvm.x86.avx.movnt.pd.256(i8* %p, <4 x double> %a2) nounwind
   3416   ret void
   3417 }
   3418 declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
   3419 
   3420 
   3421 ; Check for pclmulqdq
   3422 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
   3423 ; CHECK-LABEL: test_x86_pclmulqdq:
   3424 ; CHECK:       # BB#0:
   3425 ; CHECK-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0
   3426 ; CHECK-NEXT:    retl
   3427   %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
   3428   ret <2 x i64> %res
   3429 }
   3430 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
   3431