Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SLM
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SANDY
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SANDY
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,HASWELL
     10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
     11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,BROADWELL
     12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
     13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SKYLAKE
     14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
     15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,SKX
     16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
     17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,BTVER2
     18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
     19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2   | FileCheck %s --check-prefixes=CHECK,ZNVER1
     20 
     21 define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
     22 ; GENERIC-LABEL: test_blendpd:
     23 ; GENERIC:       # %bb.0:
     24 ; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
     25 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     26 ; GENERIC-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
     27 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     28 ;
     29 ; SLM-LABEL: test_blendpd:
     30 ; SLM:       # %bb.0:
     31 ; SLM-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
     32 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     33 ; SLM-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00]
     34 ; SLM-NEXT:    retq # sched: [4:1.00]
     35 ;
     36 ; SANDY-SSE-LABEL: test_blendpd:
     37 ; SANDY-SSE:       # %bb.0:
     38 ; SANDY-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
     39 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     40 ; SANDY-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
     41 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
     42 ;
     43 ; SANDY-LABEL: test_blendpd:
     44 ; SANDY:       # %bb.0:
     45 ; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
     46 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
     47 ; SANDY-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
     48 ; SANDY-NEXT:    retq # sched: [1:1.00]
     49 ;
     50 ; HASWELL-SSE-LABEL: test_blendpd:
     51 ; HASWELL-SSE:       # %bb.0:
     52 ; HASWELL-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
     53 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     54 ; HASWELL-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
     55 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
     56 ;
     57 ; HASWELL-LABEL: test_blendpd:
     58 ; HASWELL:       # %bb.0:
     59 ; HASWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
     60 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
     61 ; HASWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
     62 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     63 ;
     64 ; BROADWELL-SSE-LABEL: test_blendpd:
     65 ; BROADWELL-SSE:       # %bb.0:
     66 ; BROADWELL-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
     67 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     68 ; BROADWELL-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
     69 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
     70 ;
     71 ; BROADWELL-LABEL: test_blendpd:
     72 ; BROADWELL:       # %bb.0:
     73 ; BROADWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
     74 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
     75 ; BROADWELL-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
     76 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     77 ;
     78 ; SKYLAKE-SSE-LABEL: test_blendpd:
     79 ; SKYLAKE-SSE:       # %bb.0:
     80 ; SKYLAKE-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
     81 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
     82 ; SKYLAKE-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
     83 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
     84 ;
     85 ; SKYLAKE-LABEL: test_blendpd:
     86 ; SKYLAKE:       # %bb.0:
     87 ; SKYLAKE-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
     88 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
     89 ; SKYLAKE-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
     90 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     91 ;
     92 ; SKX-SSE-LABEL: test_blendpd:
     93 ; SKX-SSE:       # %bb.0:
     94 ; SKX-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
     95 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
     96 ; SKX-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
     97 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
     98 ;
     99 ; SKX-LABEL: test_blendpd:
    100 ; SKX:       # %bb.0:
    101 ; SKX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
    102 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
    103 ; SKX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
    104 ; SKX-NEXT:    retq # sched: [7:1.00]
    105 ;
    106 ; BTVER2-SSE-LABEL: test_blendpd:
    107 ; BTVER2-SSE:       # %bb.0:
    108 ; BTVER2-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
    109 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    110 ; BTVER2-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
    111 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    112 ;
    113 ; BTVER2-LABEL: test_blendpd:
    114 ; BTVER2:       # %bb.0:
    115 ; BTVER2-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
    116 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    117 ; BTVER2-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
    118 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    119 ;
    120 ; ZNVER1-SSE-LABEL: test_blendpd:
    121 ; ZNVER1-SSE:       # %bb.0:
    122 ; ZNVER1-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
    123 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    124 ; ZNVER1-SSE-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50]
    125 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    126 ;
    127 ; ZNVER1-LABEL: test_blendpd:
    128 ; ZNVER1:       # %bb.0:
    129 ; ZNVER1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
    130 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    131 ; ZNVER1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50]
    132 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    133   %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3>
    134   %2 = load <2 x double>, <2 x double> *%a2, align 16
    135   %3 = fadd <2 x double> %a1, %1
    136   %4 = shufflevector <2 x double> %3, <2 x double> %2, <2 x i32> <i32 0, i32 3>
    137   ret <2 x double> %4
    138 }
    139 
    140 define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
    141 ; GENERIC-LABEL: test_blendps:
    142 ; GENERIC:       # %bb.0:
    143 ; GENERIC-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
    144 ; GENERIC-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
    145 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    146 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    147 ;
    148 ; SLM-LABEL: test_blendps:
    149 ; SLM:       # %bb.0:
    150 ; SLM-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
    151 ; SLM-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [4:1.00]
    152 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    153 ; SLM-NEXT:    retq # sched: [4:1.00]
    154 ;
    155 ; SANDY-SSE-LABEL: test_blendps:
    156 ; SANDY-SSE:       # %bb.0:
    157 ; SANDY-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
    158 ; SANDY-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
    159 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    160 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    161 ;
    162 ; SANDY-LABEL: test_blendps:
    163 ; SANDY:       # %bb.0:
    164 ; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
    165 ; SANDY-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
    166 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    167 ; SANDY-NEXT:    retq # sched: [1:1.00]
    168 ;
    169 ; HASWELL-SSE-LABEL: test_blendps:
    170 ; HASWELL-SSE:       # %bb.0:
    171 ; HASWELL-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
    172 ; HASWELL-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
    173 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    174 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    175 ;
    176 ; HASWELL-LABEL: test_blendps:
    177 ; HASWELL:       # %bb.0:
    178 ; HASWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
    179 ; HASWELL-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
    180 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    181 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    182 ;
    183 ; BROADWELL-SSE-LABEL: test_blendps:
    184 ; BROADWELL-SSE:       # %bb.0:
    185 ; BROADWELL-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
    186 ; BROADWELL-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50]
    187 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    188 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    189 ;
    190 ; BROADWELL-LABEL: test_blendps:
    191 ; BROADWELL:       # %bb.0:
    192 ; BROADWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
    193 ; BROADWELL-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50]
    194 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    195 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    196 ;
    197 ; SKYLAKE-SSE-LABEL: test_blendps:
    198 ; SKYLAKE-SSE:       # %bb.0:
    199 ; SKYLAKE-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
    200 ; SKYLAKE-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
    201 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
    202 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    203 ;
    204 ; SKYLAKE-LABEL: test_blendps:
    205 ; SKYLAKE:       # %bb.0:
    206 ; SKYLAKE-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
    207 ; SKYLAKE-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
    208 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    209 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    210 ;
    211 ; SKX-SSE-LABEL: test_blendps:
    212 ; SKX-SSE:       # %bb.0:
    213 ; SKX-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
    214 ; SKX-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
    215 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
    216 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    217 ;
    218 ; SKX-LABEL: test_blendps:
    219 ; SKX:       # %bb.0:
    220 ; SKX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
    221 ; SKX-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
    222 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    223 ; SKX-NEXT:    retq # sched: [7:1.00]
    224 ;
    225 ; BTVER2-SSE-LABEL: test_blendps:
    226 ; BTVER2-SSE:       # %bb.0:
    227 ; BTVER2-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
    228 ; BTVER2-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00]
    229 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    230 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    231 ;
    232 ; BTVER2-LABEL: test_blendps:
    233 ; BTVER2:       # %bb.0:
    234 ; BTVER2-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
    235 ; BTVER2-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00]
    236 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    237 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    238 ;
    239 ; ZNVER1-SSE-LABEL: test_blendps:
    240 ; ZNVER1-SSE:       # %bb.0:
    241 ; ZNVER1-SSE-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
    242 ; ZNVER1-SSE-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50]
    243 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    244 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    245 ;
    246 ; ZNVER1-LABEL: test_blendps:
    247 ; ZNVER1:       # %bb.0:
    248 ; ZNVER1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
    249 ; ZNVER1-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50]
    250 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    251 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    252   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
    253   %2 = load <4 x float>, <4 x float> *%a2, align 16
    254   %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
    255   %4 = fadd <4 x float> %1, %3
    256   ret <4 x float> %4
    257 }
    258 
    259 define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) {
    260 ; GENERIC-LABEL: test_blendvpd:
    261 ; GENERIC:       # %bb.0:
    262 ; GENERIC-NEXT:    movapd %xmm0, %xmm3 # sched: [1:1.00]
    263 ; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
    264 ; GENERIC-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
    265 ; GENERIC-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
    266 ; GENERIC-NEXT:    movapd %xmm3, %xmm0 # sched: [1:1.00]
    267 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    268 ;
    269 ; SLM-LABEL: test_blendvpd:
    270 ; SLM:       # %bb.0:
    271 ; SLM-NEXT:    movapd %xmm0, %xmm3 # sched: [1:0.50]
    272 ; SLM-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
    273 ; SLM-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
    274 ; SLM-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [4:1.00]
    275 ; SLM-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.50]
    276 ; SLM-NEXT:    retq # sched: [4:1.00]
    277 ;
    278 ; SANDY-SSE-LABEL: test_blendvpd:
    279 ; SANDY-SSE:       # %bb.0:
    280 ; SANDY-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:1.00]
    281 ; SANDY-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
    282 ; SANDY-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
    283 ; SANDY-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
    284 ; SANDY-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:1.00]
    285 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    286 ;
    287 ; SANDY-LABEL: test_blendvpd:
    288 ; SANDY:       # %bb.0:
    289 ; SANDY-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
    290 ; SANDY-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    291 ; SANDY-NEXT:    retq # sched: [1:1.00]
    292 ;
    293 ; HASWELL-SSE-LABEL: test_blendvpd:
    294 ; HASWELL-SSE:       # %bb.0:
    295 ; HASWELL-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:1.00]
    296 ; HASWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
    297 ; HASWELL-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
    298 ; HASWELL-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
    299 ; HASWELL-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:1.00]
    300 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    301 ;
    302 ; HASWELL-LABEL: test_blendvpd:
    303 ; HASWELL:       # %bb.0:
    304 ; HASWELL-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
    305 ; HASWELL-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
    306 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    307 ;
    308 ; BROADWELL-SSE-LABEL: test_blendvpd:
    309 ; BROADWELL-SSE:       # %bb.0:
    310 ; BROADWELL-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:1.00]
    311 ; BROADWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
    312 ; BROADWELL-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
    313 ; BROADWELL-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
    314 ; BROADWELL-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:1.00]
    315 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    316 ;
    317 ; BROADWELL-LABEL: test_blendvpd:
    318 ; BROADWELL:       # %bb.0:
    319 ; BROADWELL-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
    320 ; BROADWELL-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
    321 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    322 ;
    323 ; SKYLAKE-SSE-LABEL: test_blendvpd:
    324 ; SKYLAKE-SSE:       # %bb.0:
    325 ; SKYLAKE-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:0.33]
    326 ; SKYLAKE-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
    327 ; SKYLAKE-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
    328 ; SKYLAKE-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
    329 ; SKYLAKE-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.33]
    330 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    331 ;
    332 ; SKYLAKE-LABEL: test_blendvpd:
    333 ; SKYLAKE:       # %bb.0:
    334 ; SKYLAKE-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
    335 ; SKYLAKE-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
    336 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    337 ;
    338 ; SKX-SSE-LABEL: test_blendvpd:
    339 ; SKX-SSE:       # %bb.0:
    340 ; SKX-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:0.33]
    341 ; SKX-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
    342 ; SKX-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
    343 ; SKX-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
    344 ; SKX-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.33]
    345 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    346 ;
    347 ; SKX-LABEL: test_blendvpd:
    348 ; SKX:       # %bb.0:
    349 ; SKX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
    350 ; SKX-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
    351 ; SKX-NEXT:    retq # sched: [7:1.00]
    352 ;
    353 ; BTVER2-SSE-LABEL: test_blendvpd:
    354 ; BTVER2-SSE:       # %bb.0:
    355 ; BTVER2-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:0.50]
    356 ; BTVER2-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
    357 ; BTVER2-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
    358 ; BTVER2-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
    359 ; BTVER2-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.50]
    360 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    361 ;
    362 ; BTVER2-LABEL: test_blendvpd:
    363 ; BTVER2:       # %bb.0:
    364 ; BTVER2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
    365 ; BTVER2-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
    366 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    367 ;
    368 ; ZNVER1-SSE-LABEL: test_blendvpd:
    369 ; ZNVER1-SSE:       # %bb.0:
    370 ; ZNVER1-SSE-NEXT:    movapd %xmm0, %xmm3 # sched: [1:0.25]
    371 ; ZNVER1-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.25]
    372 ; ZNVER1-SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:0.50]
    373 ; ZNVER1-SSE-NEXT:    blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.50]
    374 ; ZNVER1-SSE-NEXT:    movapd %xmm3, %xmm0 # sched: [1:0.25]
    375 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    376 ;
    377 ; ZNVER1-LABEL: test_blendvpd:
    378 ; ZNVER1:       # %bb.0:
    379 ; ZNVER1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    380 ; ZNVER1-NEXT:    vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
    381 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    382   %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
    383   %2 = load <2 x double>, <2 x double> *%a3, align 16
    384   %3 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %1, <2 x double> %2, <2 x double> %a2)
    385   ret <2 x double> %3
    386 }
    387 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
    388 
    389 define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) {
    390 ; GENERIC-LABEL: test_blendvps:
    391 ; GENERIC:       # %bb.0:
    392 ; GENERIC-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
    393 ; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
    394 ; GENERIC-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
    395 ; GENERIC-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
    396 ; GENERIC-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
    397 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    398 ;
    399 ; SLM-LABEL: test_blendvps:
    400 ; SLM:       # %bb.0:
    401 ; SLM-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.50]
    402 ; SLM-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
    403 ; SLM-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
    404 ; SLM-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [4:1.00]
    405 ; SLM-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.50]
    406 ; SLM-NEXT:    retq # sched: [4:1.00]
    407 ;
    408 ; SANDY-SSE-LABEL: test_blendvps:
    409 ; SANDY-SSE:       # %bb.0:
    410 ; SANDY-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
    411 ; SANDY-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
    412 ; SANDY-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
    413 ; SANDY-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
    414 ; SANDY-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
    415 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    416 ;
    417 ; SANDY-LABEL: test_blendvps:
    418 ; SANDY:       # %bb.0:
    419 ; SANDY-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
    420 ; SANDY-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    421 ; SANDY-NEXT:    retq # sched: [1:1.00]
    422 ;
    423 ; HASWELL-SSE-LABEL: test_blendvps:
    424 ; HASWELL-SSE:       # %bb.0:
    425 ; HASWELL-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
    426 ; HASWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
    427 ; HASWELL-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
    428 ; HASWELL-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
    429 ; HASWELL-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
    430 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    431 ;
    432 ; HASWELL-LABEL: test_blendvps:
    433 ; HASWELL:       # %bb.0:
    434 ; HASWELL-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
    435 ; HASWELL-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
    436 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    437 ;
    438 ; BROADWELL-SSE-LABEL: test_blendvps:
    439 ; BROADWELL-SSE:       # %bb.0:
    440 ; BROADWELL-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:1.00]
    441 ; BROADWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
    442 ; BROADWELL-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
    443 ; BROADWELL-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
    444 ; BROADWELL-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:1.00]
    445 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    446 ;
    447 ; BROADWELL-LABEL: test_blendvps:
    448 ; BROADWELL:       # %bb.0:
    449 ; BROADWELL-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
    450 ; BROADWELL-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
    451 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    452 ;
    453 ; SKYLAKE-SSE-LABEL: test_blendvps:
    454 ; SKYLAKE-SSE:       # %bb.0:
    455 ; SKYLAKE-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.33]
    456 ; SKYLAKE-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
    457 ; SKYLAKE-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
    458 ; SKYLAKE-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
    459 ; SKYLAKE-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.33]
    460 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    461 ;
    462 ; SKYLAKE-LABEL: test_blendvps:
    463 ; SKYLAKE:       # %bb.0:
    464 ; SKYLAKE-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
    465 ; SKYLAKE-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
    466 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    467 ;
    468 ; SKX-SSE-LABEL: test_blendvps:
    469 ; SKX-SSE:       # %bb.0:
    470 ; SKX-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.33]
    471 ; SKX-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
    472 ; SKX-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
    473 ; SKX-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
    474 ; SKX-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.33]
    475 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    476 ;
    477 ; SKX-LABEL: test_blendvps:
    478 ; SKX:       # %bb.0:
    479 ; SKX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
    480 ; SKX-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
    481 ; SKX-NEXT:    retq # sched: [7:1.00]
    482 ;
    483 ; BTVER2-SSE-LABEL: test_blendvps:
    484 ; BTVER2-SSE:       # %bb.0:
    485 ; BTVER2-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.50]
    486 ; BTVER2-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
    487 ; BTVER2-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
    488 ; BTVER2-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
    489 ; BTVER2-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.50]
    490 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    491 ;
    492 ; BTVER2-LABEL: test_blendvps:
    493 ; BTVER2:       # %bb.0:
    494 ; BTVER2-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
    495 ; BTVER2-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
    496 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    497 ;
    498 ; ZNVER1-SSE-LABEL: test_blendvps:
    499 ; ZNVER1-SSE:       # %bb.0:
    500 ; ZNVER1-SSE-NEXT:    movaps %xmm0, %xmm3 # sched: [1:0.25]
    501 ; ZNVER1-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.25]
    502 ; ZNVER1-SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 # sched: [1:0.50]
    503 ; ZNVER1-SSE-NEXT:    blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.50]
    504 ; ZNVER1-SSE-NEXT:    movaps %xmm3, %xmm0 # sched: [1:0.25]
    505 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    506 ;
    507 ; ZNVER1-LABEL: test_blendvps:
    508 ; ZNVER1:       # %bb.0:
    509 ; ZNVER1-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    510 ; ZNVER1-NEXT:    vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
    511 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    512   %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
    513   %2 = load <4 x float>, <4 x float> *%a3
    514   %3 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %1, <4 x float> %2, <4 x float> %a2)
    515   ret <4 x float> %3
    516 }
    517 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
    518 
    519 define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
    520 ; GENERIC-LABEL: test_dppd:
    521 ; GENERIC:       # %bb.0:
    522 ; GENERIC-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
    523 ; GENERIC-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
    524 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    525 ;
    526 ; SLM-LABEL: test_dppd:
    527 ; SLM:       # %bb.0:
    528 ; SLM-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [3:1.00]
    529 ; SLM-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [6:1.00]
    530 ; SLM-NEXT:    retq # sched: [4:1.00]
    531 ;
    532 ; SANDY-SSE-LABEL: test_dppd:
    533 ; SANDY-SSE:       # %bb.0:
    534 ; SANDY-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
    535 ; SANDY-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
    536 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    537 ;
    538 ; SANDY-LABEL: test_dppd:
    539 ; SANDY:       # %bb.0:
    540 ; SANDY-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
    541 ; SANDY-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
    542 ; SANDY-NEXT:    retq # sched: [1:1.00]
    543 ;
    544 ; HASWELL-SSE-LABEL: test_dppd:
    545 ; HASWELL-SSE:       # %bb.0:
    546 ; HASWELL-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
    547 ; HASWELL-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
    548 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    549 ;
    550 ; HASWELL-LABEL: test_dppd:
    551 ; HASWELL:       # %bb.0:
    552 ; HASWELL-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
    553 ; HASWELL-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
    554 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    555 ;
    556 ; BROADWELL-SSE-LABEL: test_dppd:
    557 ; BROADWELL-SSE:       # %bb.0:
    558 ; BROADWELL-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
    559 ; BROADWELL-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [14:1.00]
    560 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    561 ;
    562 ; BROADWELL-LABEL: test_dppd:
    563 ; BROADWELL:       # %bb.0:
    564 ; BROADWELL-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
    565 ; BROADWELL-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
    566 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    567 ;
    568 ; SKYLAKE-SSE-LABEL: test_dppd:
    569 ; SKYLAKE-SSE:       # %bb.0:
    570 ; SKYLAKE-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
    571 ; SKYLAKE-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
    572 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    573 ;
    574 ; SKYLAKE-LABEL: test_dppd:
    575 ; SKYLAKE:       # %bb.0:
    576 ; SKYLAKE-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
    577 ; SKYLAKE-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
    578 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    579 ;
    580 ; SKX-SSE-LABEL: test_dppd:
    581 ; SKX-SSE:       # %bb.0:
    582 ; SKX-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
    583 ; SKX-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
    584 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    585 ;
    586 ; SKX-LABEL: test_dppd:
    587 ; SKX:       # %bb.0:
    588 ; SKX-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
    589 ; SKX-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
    590 ; SKX-NEXT:    retq # sched: [7:1.00]
    591 ;
    592 ; BTVER2-SSE-LABEL: test_dppd:
    593 ; BTVER2-SSE:       # %bb.0:
    594 ; BTVER2-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [9:3.00]
    595 ; BTVER2-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [14:3.00]
    596 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    597 ;
    598 ; BTVER2-LABEL: test_dppd:
    599 ; BTVER2:       # %bb.0:
    600 ; BTVER2-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00]
    601 ; BTVER2-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00]
    602 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    603 ;
    604 ; ZNVER1-SSE-LABEL: test_dppd:
    605 ; ZNVER1-SSE:       # %bb.0:
    606 ; ZNVER1-SSE-NEXT:    dppd $7, %xmm1, %xmm0 # sched: [100:0.25]
    607 ; ZNVER1-SSE-NEXT:    dppd $7, (%rdi), %xmm0 # sched: [100:0.25]
    608 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    609 ;
    610 ; ZNVER1-LABEL: test_dppd:
    611 ; ZNVER1:       # %bb.0:
    612 ; ZNVER1-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    613 ; ZNVER1-NEXT:    vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    614 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    615   %1 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
    616   %2 = load <2 x double>, <2 x double> *%a2, align 16
    617   %3 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %1, <2 x double> %2, i8 7)
    618   ret <2 x double> %3
    619 }
    620 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
    621 
    622 define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
    623 ; GENERIC-LABEL: test_dpps:
    624 ; GENERIC:       # %bb.0:
    625 ; GENERIC-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
    626 ; GENERIC-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [18:2.00]
    627 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    628 ;
    629 ; SLM-LABEL: test_dpps:
    630 ; SLM:       # %bb.0:
    631 ; SLM-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [3:1.00]
    632 ; SLM-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [6:1.00]
    633 ; SLM-NEXT:    retq # sched: [4:1.00]
    634 ;
    635 ; SANDY-SSE-LABEL: test_dpps:
    636 ; SANDY-SSE:       # %bb.0:
    637 ; SANDY-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
    638 ; SANDY-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [18:2.00]
    639 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    640 ;
    641 ; SANDY-LABEL: test_dpps:
    642 ; SANDY:       # %bb.0:
    643 ; SANDY-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
    644 ; SANDY-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00]
    645 ; SANDY-NEXT:    retq # sched: [1:1.00]
    646 ;
    647 ; HASWELL-SSE-LABEL: test_dpps:
    648 ; HASWELL-SSE:       # %bb.0:
    649 ; HASWELL-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
    650 ; HASWELL-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [20:2.00]
    651 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    652 ;
    653 ; HASWELL-LABEL: test_dpps:
    654 ; HASWELL:       # %bb.0:
    655 ; HASWELL-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
    656 ; HASWELL-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [20:2.00]
    657 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    658 ;
    659 ; BROADWELL-SSE-LABEL: test_dpps:
    660 ; BROADWELL-SSE:       # %bb.0:
    661 ; BROADWELL-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
    662 ; BROADWELL-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [19:2.00]
    663 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    664 ;
    665 ; BROADWELL-LABEL: test_dpps:
    666 ; BROADWELL:       # %bb.0:
    667 ; BROADWELL-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
    668 ; BROADWELL-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00]
    669 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    670 ;
    671 ; SKYLAKE-SSE-LABEL: test_dpps:
    672 ; SKYLAKE-SSE:       # %bb.0:
    673 ; SKYLAKE-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [13:1.50]
    674 ; SKYLAKE-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [19:1.50]
    675 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    676 ;
    677 ; SKYLAKE-LABEL: test_dpps:
    678 ; SKYLAKE:       # %bb.0:
    679 ; SKYLAKE-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.50]
    680 ; SKYLAKE-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.50]
    681 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    682 ;
    683 ; SKX-SSE-LABEL: test_dpps:
    684 ; SKX-SSE:       # %bb.0:
    685 ; SKX-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [13:1.33]
    686 ; SKX-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [19:1.33]
    687 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    688 ;
    689 ; SKX-LABEL: test_dpps:
    690 ; SKX:       # %bb.0:
    691 ; SKX-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33]
    692 ; SKX-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
    693 ; SKX-NEXT:    retq # sched: [7:1.00]
    694 ;
    695 ; BTVER2-SSE-LABEL: test_dpps:
    696 ; BTVER2-SSE:       # %bb.0:
    697 ; BTVER2-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [11:3.00]
    698 ; BTVER2-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [16:3.00]
    699 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    700 ;
    701 ; BTVER2-LABEL: test_dpps:
    702 ; BTVER2:       # %bb.0:
    703 ; BTVER2-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
    704 ; BTVER2-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
    705 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    706 ;
    707 ; ZNVER1-SSE-LABEL: test_dpps:
    708 ; ZNVER1-SSE:       # %bb.0:
    709 ; ZNVER1-SSE-NEXT:    dpps $7, %xmm1, %xmm0 # sched: [100:0.25]
    710 ; ZNVER1-SSE-NEXT:    dpps $7, (%rdi), %xmm0 # sched: [100:0.25]
    711 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    712 ;
    713 ; ZNVER1-LABEL: test_dpps:
    714 ; ZNVER1:       # %bb.0:
    715 ; ZNVER1-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    716 ; ZNVER1-NEXT:    vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    717 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    718   %1 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
    719   %2 = load <4 x float>, <4 x float> *%a2, align 16
    720   %3 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %1, <4 x float> %2, i8 7)
    721   ret <4 x float> %3
    722 }
    723 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
    724 
    725 define i32 @test_extractps(<4 x float> %a0, i32 *%a1) {
    726 ; GENERIC-LABEL: test_extractps:
    727 ; GENERIC:       # %bb.0:
    728 ; GENERIC-NEXT:    extractps $3, %xmm0, %eax # sched: [3:1.00]
    729 ; GENERIC-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
    730 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    731 ;
    732 ; SLM-LABEL: test_extractps:
    733 ; SLM:       # %bb.0:
    734 ; SLM-NEXT:    extractps $3, %xmm0, %eax # sched: [1:1.00]
    735 ; SLM-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [4:2.00]
    736 ; SLM-NEXT:    retq # sched: [4:1.00]
    737 ;
    738 ; SANDY-SSE-LABEL: test_extractps:
    739 ; SANDY-SSE:       # %bb.0:
    740 ; SANDY-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [3:1.00]
    741 ; SANDY-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
    742 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    743 ;
    744 ; SANDY-LABEL: test_extractps:
    745 ; SANDY:       # %bb.0:
    746 ; SANDY-NEXT:    vextractps $3, %xmm0, %eax # sched: [3:1.00]
    747 ; SANDY-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [5:1.00]
    748 ; SANDY-NEXT:    retq # sched: [1:1.00]
    749 ;
    750 ; HASWELL-SSE-LABEL: test_extractps:
    751 ; HASWELL-SSE:       # %bb.0:
    752 ; HASWELL-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [2:1.00]
    753 ; HASWELL-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
    754 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    755 ;
    756 ; HASWELL-LABEL: test_extractps:
    757 ; HASWELL:       # %bb.0:
    758 ; HASWELL-NEXT:    vextractps $3, %xmm0, %eax # sched: [2:1.00]
    759 ; HASWELL-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
    760 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    761 ;
    762 ; BROADWELL-SSE-LABEL: test_extractps:
    763 ; BROADWELL-SSE:       # %bb.0:
    764 ; BROADWELL-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [2:1.00]
    765 ; BROADWELL-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
    766 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    767 ;
    768 ; BROADWELL-LABEL: test_extractps:
    769 ; BROADWELL:       # %bb.0:
    770 ; BROADWELL-NEXT:    vextractps $3, %xmm0, %eax # sched: [2:1.00]
    771 ; BROADWELL-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
    772 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    773 ;
    774 ; SKYLAKE-SSE-LABEL: test_extractps:
    775 ; SKYLAKE-SSE:       # %bb.0:
    776 ; SKYLAKE-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [3:1.00]
    777 ; SKYLAKE-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
    778 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    779 ;
    780 ; SKYLAKE-LABEL: test_extractps:
    781 ; SKYLAKE:       # %bb.0:
    782 ; SKYLAKE-NEXT:    vextractps $3, %xmm0, %eax # sched: [3:1.00]
    783 ; SKYLAKE-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
    784 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    785 ;
    786 ; SKX-SSE-LABEL: test_extractps:
    787 ; SKX-SSE:       # %bb.0:
    788 ; SKX-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [3:1.00]
    789 ; SKX-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
    790 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    791 ;
    792 ; SKX-LABEL: test_extractps:
    793 ; SKX:       # %bb.0:
    794 ; SKX-NEXT:    vextractps $3, %xmm0, %eax # sched: [3:1.00]
    795 ; SKX-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
    796 ; SKX-NEXT:    retq # sched: [7:1.00]
    797 ;
    798 ; BTVER2-SSE-LABEL: test_extractps:
    799 ; BTVER2-SSE:       # %bb.0:
    800 ; BTVER2-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [3:1.00]
    801 ; BTVER2-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [3:1.00]
    802 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    803 ;
    804 ; BTVER2-LABEL: test_extractps:
    805 ; BTVER2:       # %bb.0:
    806 ; BTVER2-NEXT:    vextractps $3, %xmm0, %eax # sched: [3:1.00]
    807 ; BTVER2-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [3:1.00]
    808 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    809 ;
    810 ; ZNVER1-SSE-LABEL: test_extractps:
    811 ; ZNVER1-SSE:       # %bb.0:
    812 ; ZNVER1-SSE-NEXT:    extractps $3, %xmm0, %eax # sched: [2:2.00]
    813 ; ZNVER1-SSE-NEXT:    extractps $1, %xmm0, (%rdi) # sched: [5:2.50]
    814 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    815 ;
    816 ; ZNVER1-LABEL: test_extractps:
    817 ; ZNVER1:       # %bb.0:
    818 ; ZNVER1-NEXT:    vextractps $3, %xmm0, %eax # sched: [2:2.00]
    819 ; ZNVER1-NEXT:    vextractps $1, %xmm0, (%rdi) # sched: [5:2.50]
    820 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    821   %1 = extractelement <4 x float> %a0, i32 3
    822   %2 = extractelement <4 x float> %a0, i32 1
    823   %3 = bitcast float %1 to i32
    824   %4 = bitcast float %2 to i32
    825   store i32 %4, i32 *%a1
    826   ret i32 %3
    827 }
    828 
    829 define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) {
    830 ; GENERIC-LABEL: test_insertps:
    831 ; GENERIC:       # %bb.0:
    832 ; GENERIC-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    833 ; GENERIC-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
    834 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    835 ;
    836 ; SLM-LABEL: test_insertps:
    837 ; SLM:       # %bb.0:
    838 ; SLM-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    839 ; SLM-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00]
    840 ; SLM-NEXT:    retq # sched: [4:1.00]
    841 ;
    842 ; SANDY-SSE-LABEL: test_insertps:
    843 ; SANDY-SSE:       # %bb.0:
    844 ; SANDY-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    845 ; SANDY-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
    846 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    847 ;
    848 ; SANDY-LABEL: test_insertps:
    849 ; SANDY:       # %bb.0:
    850 ; SANDY-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    851 ; SANDY-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
    852 ; SANDY-NEXT:    retq # sched: [1:1.00]
    853 ;
    854 ; HASWELL-SSE-LABEL: test_insertps:
    855 ; HASWELL-SSE:       # %bb.0:
    856 ; HASWELL-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    857 ; HASWELL-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
    858 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    859 ;
    860 ; HASWELL-LABEL: test_insertps:
    861 ; HASWELL:       # %bb.0:
    862 ; HASWELL-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    863 ; HASWELL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
    864 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    865 ;
    866 ; BROADWELL-SSE-LABEL: test_insertps:
    867 ; BROADWELL-SSE:       # %bb.0:
    868 ; BROADWELL-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    869 ; BROADWELL-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
    870 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    871 ;
    872 ; BROADWELL-LABEL: test_insertps:
    873 ; BROADWELL:       # %bb.0:
    874 ; BROADWELL-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    875 ; BROADWELL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
    876 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    877 ;
    878 ; SKYLAKE-SSE-LABEL: test_insertps:
    879 ; SKYLAKE-SSE:       # %bb.0:
    880 ; SKYLAKE-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    881 ; SKYLAKE-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
    882 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    883 ;
    884 ; SKYLAKE-LABEL: test_insertps:
    885 ; SKYLAKE:       # %bb.0:
    886 ; SKYLAKE-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    887 ; SKYLAKE-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
    888 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    889 ;
    890 ; SKX-SSE-LABEL: test_insertps:
    891 ; SKX-SSE:       # %bb.0:
    892 ; SKX-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    893 ; SKX-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
    894 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    895 ;
    896 ; SKX-LABEL: test_insertps:
    897 ; SKX:       # %bb.0:
    898 ; SKX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
    899 ; SKX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
    900 ; SKX-NEXT:    retq # sched: [7:1.00]
    901 ;
    902 ; BTVER2-SSE-LABEL: test_insertps:
    903 ; BTVER2-SSE:       # %bb.0:
    904 ; BTVER2-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
    905 ; BTVER2-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
    906 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    907 ;
    908 ; BTVER2-LABEL: test_insertps:
    909 ; BTVER2:       # %bb.0:
    910 ; BTVER2-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
    911 ; BTVER2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
    912 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    913 ;
    914 ; ZNVER1-SSE-LABEL: test_insertps:
    915 ; ZNVER1-SSE:       # %bb.0:
    916 ; ZNVER1-SSE-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
    917 ; ZNVER1-SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50]
    918 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    919 ;
    920 ; ZNVER1-LABEL: test_insertps:
    921 ; ZNVER1:       # %bb.0:
    922 ; ZNVER1-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
    923 ; ZNVER1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50]
    924 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    925   %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17)
    926   %2 = load float, float *%a2
    927   %3 = insertelement <4 x float> %1, float %2, i32 3
    928   ret <4 x float> %3
    929 }
    930 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
    931 
    932 define <2 x i64> @test_movntdqa(i8* %a0) {
    933 ; GENERIC-LABEL: test_movntdqa:
    934 ; GENERIC:       # %bb.0:
    935 ; GENERIC-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
    936 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    937 ;
    938 ; SLM-LABEL: test_movntdqa:
    939 ; SLM:       # %bb.0:
    940 ; SLM-NEXT:    movntdqa (%rdi), %xmm0 # sched: [3:1.00]
    941 ; SLM-NEXT:    retq # sched: [4:1.00]
    942 ;
    943 ; SANDY-SSE-LABEL: test_movntdqa:
    944 ; SANDY-SSE:       # %bb.0:
    945 ; SANDY-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
    946 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    947 ;
    948 ; SANDY-LABEL: test_movntdqa:
    949 ; SANDY:       # %bb.0:
    950 ; SANDY-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
    951 ; SANDY-NEXT:    retq # sched: [1:1.00]
    952 ;
    953 ; HASWELL-SSE-LABEL: test_movntdqa:
    954 ; HASWELL-SSE:       # %bb.0:
    955 ; HASWELL-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
    956 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    957 ;
    958 ; HASWELL-LABEL: test_movntdqa:
    959 ; HASWELL:       # %bb.0:
    960 ; HASWELL-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
    961 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    962 ;
    963 ; BROADWELL-SSE-LABEL: test_movntdqa:
    964 ; BROADWELL-SSE:       # %bb.0:
    965 ; BROADWELL-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [5:0.50]
    966 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    967 ;
    968 ; BROADWELL-LABEL: test_movntdqa:
    969 ; BROADWELL:       # %bb.0:
    970 ; BROADWELL-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [5:0.50]
    971 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    972 ;
    973 ; SKYLAKE-SSE-LABEL: test_movntdqa:
    974 ; SKYLAKE-SSE:       # %bb.0:
    975 ; SKYLAKE-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
    976 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    977 ;
    978 ; SKYLAKE-LABEL: test_movntdqa:
    979 ; SKYLAKE:       # %bb.0:
    980 ; SKYLAKE-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
    981 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    982 ;
    983 ; SKX-SSE-LABEL: test_movntdqa:
    984 ; SKX-SSE:       # %bb.0:
    985 ; SKX-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [6:0.50]
    986 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    987 ;
    988 ; SKX-LABEL: test_movntdqa:
    989 ; SKX:       # %bb.0:
    990 ; SKX-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
    991 ; SKX-NEXT:    retq # sched: [7:1.00]
    992 ;
    993 ; BTVER2-SSE-LABEL: test_movntdqa:
    994 ; BTVER2-SSE:       # %bb.0:
    995 ; BTVER2-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [5:1.00]
    996 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    997 ;
    998 ; BTVER2-LABEL: test_movntdqa:
    999 ; BTVER2:       # %bb.0:
   1000 ; BTVER2-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [5:1.00]
   1001 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1002 ;
   1003 ; ZNVER1-SSE-LABEL: test_movntdqa:
   1004 ; ZNVER1-SSE:       # %bb.0:
   1005 ; ZNVER1-SSE-NEXT:    movntdqa (%rdi), %xmm0 # sched: [8:0.50]
   1006 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1007 ;
   1008 ; ZNVER1-LABEL: test_movntdqa:
   1009 ; ZNVER1:       # %bb.0:
   1010 ; ZNVER1-NEXT:    vmovntdqa (%rdi), %xmm0 # sched: [8:0.50]
   1011 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1012   %1 = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0)
   1013   ret <2 x i64> %1
   1014 }
   1015 declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone
   1016 
   1017 define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   1018 ; GENERIC-LABEL: test_mpsadbw:
   1019 ; GENERIC:       # %bb.0:
   1020 ; GENERIC-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
   1021 ; GENERIC-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
   1022 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1023 ;
   1024 ; SLM-LABEL: test_mpsadbw:
   1025 ; SLM:       # %bb.0:
   1026 ; SLM-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
   1027 ; SLM-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00]
   1028 ; SLM-NEXT:    retq # sched: [4:1.00]
   1029 ;
   1030 ; SANDY-SSE-LABEL: test_mpsadbw:
   1031 ; SANDY-SSE:       # %bb.0:
   1032 ; SANDY-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
   1033 ; SANDY-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
   1034 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1035 ;
   1036 ; SANDY-LABEL: test_mpsadbw:
   1037 ; SANDY:       # %bb.0:
   1038 ; SANDY-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
   1039 ; SANDY-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
   1040 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1041 ;
   1042 ; HASWELL-SSE-LABEL: test_mpsadbw:
   1043 ; HASWELL-SSE:       # %bb.0:
   1044 ; HASWELL-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
   1045 ; HASWELL-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [13:2.00]
   1046 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1047 ;
   1048 ; HASWELL-LABEL: test_mpsadbw:
   1049 ; HASWELL:       # %bb.0:
   1050 ; HASWELL-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
   1051 ; HASWELL-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:2.00]
   1052 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1053 ;
   1054 ; BROADWELL-SSE-LABEL: test_mpsadbw:
   1055 ; BROADWELL-SSE:       # %bb.0:
   1056 ; BROADWELL-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
   1057 ; BROADWELL-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [12:2.00]
   1058 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1059 ;
   1060 ; BROADWELL-LABEL: test_mpsadbw:
   1061 ; BROADWELL:       # %bb.0:
   1062 ; BROADWELL-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
   1063 ; BROADWELL-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
   1064 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1065 ;
   1066 ; SKYLAKE-SSE-LABEL: test_mpsadbw:
   1067 ; SKYLAKE-SSE:       # %bb.0:
   1068 ; SKYLAKE-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
   1069 ; SKYLAKE-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
   1070 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1071 ;
   1072 ; SKYLAKE-LABEL: test_mpsadbw:
   1073 ; SKYLAKE:       # %bb.0:
   1074 ; SKYLAKE-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
   1075 ; SKYLAKE-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
   1076 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1077 ;
   1078 ; SKX-SSE-LABEL: test_mpsadbw:
   1079 ; SKX-SSE:       # %bb.0:
   1080 ; SKX-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
   1081 ; SKX-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
   1082 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1083 ;
   1084 ; SKX-LABEL: test_mpsadbw:
   1085 ; SKX:       # %bb.0:
   1086 ; SKX-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
   1087 ; SKX-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
   1088 ; SKX-NEXT:    retq # sched: [7:1.00]
   1089 ;
   1090 ; BTVER2-SSE-LABEL: test_mpsadbw:
   1091 ; BTVER2-SSE:       # %bb.0:
   1092 ; BTVER2-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00]
   1093 ; BTVER2-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [8:2.00]
   1094 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1095 ;
   1096 ; BTVER2-LABEL: test_mpsadbw:
   1097 ; BTVER2:       # %bb.0:
   1098 ; BTVER2-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
   1099 ; BTVER2-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
   1100 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1101 ;
   1102 ; ZNVER1-SSE-LABEL: test_mpsadbw:
   1103 ; ZNVER1-SSE:       # %bb.0:
   1104 ; ZNVER1-SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 # sched: [100:0.25]
   1105 ; ZNVER1-SSE-NEXT:    mpsadbw $7, (%rdi), %xmm0 # sched: [100:0.25]
   1106 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1107 ;
   1108 ; ZNVER1-LABEL: test_mpsadbw:
   1109 ; ZNVER1:       # %bb.0:
   1110 ; ZNVER1-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
   1111 ; ZNVER1-NEXT:    vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
   1112 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1113   %1 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7)
   1114   %2 = bitcast <8 x i16> %1 to <16 x i8>
   1115   %3 = load <16 x i8>, <16 x i8> *%a2, align 16
   1116   %4 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %2, <16 x i8> %3, i8 7)
   1117   ret <8 x i16> %4
   1118 }
   1119 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
   1120 
   1121 define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   1122 ; GENERIC-LABEL: test_packusdw:
   1123 ; GENERIC:       # %bb.0:
   1124 ; GENERIC-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:0.50]
   1125 ; GENERIC-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:0.50]
   1126 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1127 ;
   1128 ; SLM-LABEL: test_packusdw:
   1129 ; SLM:       # %bb.0:
   1130 ; SLM-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:1.00]
   1131 ; SLM-NEXT:    packusdw (%rdi), %xmm0 # sched: [4:1.00]
   1132 ; SLM-NEXT:    retq # sched: [4:1.00]
   1133 ;
   1134 ; SANDY-SSE-LABEL: test_packusdw:
   1135 ; SANDY-SSE:       # %bb.0:
   1136 ; SANDY-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:0.50]
   1137 ; SANDY-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:0.50]
   1138 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1139 ;
   1140 ; SANDY-LABEL: test_packusdw:
   1141 ; SANDY:       # %bb.0:
   1142 ; SANDY-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1143 ; SANDY-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1144 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1145 ;
   1146 ; HASWELL-SSE-LABEL: test_packusdw:
   1147 ; HASWELL-SSE:       # %bb.0:
   1148 ; HASWELL-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:1.00]
   1149 ; HASWELL-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:1.00]
   1150 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1151 ;
   1152 ; HASWELL-LABEL: test_packusdw:
   1153 ; HASWELL:       # %bb.0:
   1154 ; HASWELL-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1155 ; HASWELL-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   1156 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1157 ;
   1158 ; BROADWELL-SSE-LABEL: test_packusdw:
   1159 ; BROADWELL-SSE:       # %bb.0:
   1160 ; BROADWELL-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:1.00]
   1161 ; BROADWELL-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [6:1.00]
   1162 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1163 ;
   1164 ; BROADWELL-LABEL: test_packusdw:
   1165 ; BROADWELL:       # %bb.0:
   1166 ; BROADWELL-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1167 ; BROADWELL-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   1168 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1169 ;
   1170 ; SKYLAKE-SSE-LABEL: test_packusdw:
   1171 ; SKYLAKE-SSE:       # %bb.0:
   1172 ; SKYLAKE-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:1.00]
   1173 ; SKYLAKE-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:1.00]
   1174 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1175 ;
   1176 ; SKYLAKE-LABEL: test_packusdw:
   1177 ; SKYLAKE:       # %bb.0:
   1178 ; SKYLAKE-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1179 ; SKYLAKE-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   1180 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1181 ;
   1182 ; SKX-SSE-LABEL: test_packusdw:
   1183 ; SKX-SSE:       # %bb.0:
   1184 ; SKX-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:1.00]
   1185 ; SKX-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [7:1.00]
   1186 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1187 ;
   1188 ; SKX-LABEL: test_packusdw:
   1189 ; SKX:       # %bb.0:
   1190 ; SKX-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1191 ; SKX-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   1192 ; SKX-NEXT:    retq # sched: [7:1.00]
   1193 ;
   1194 ; BTVER2-SSE-LABEL: test_packusdw:
   1195 ; BTVER2-SSE:       # %bb.0:
   1196 ; BTVER2-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:0.50]
   1197 ; BTVER2-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [6:1.00]
   1198 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1199 ;
   1200 ; BTVER2-LABEL: test_packusdw:
   1201 ; BTVER2:       # %bb.0:
   1202 ; BTVER2-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1203 ; BTVER2-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   1204 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1205 ;
   1206 ; ZNVER1-SSE-LABEL: test_packusdw:
   1207 ; ZNVER1-SSE:       # %bb.0:
   1208 ; ZNVER1-SSE-NEXT:    packusdw %xmm1, %xmm0 # sched: [1:0.25]
   1209 ; ZNVER1-SSE-NEXT:    packusdw (%rdi), %xmm0 # sched: [8:0.50]
   1210 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1211 ;
   1212 ; ZNVER1-LABEL: test_packusdw:
   1213 ; ZNVER1:       # %bb.0:
   1214 ; ZNVER1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   1215 ; ZNVER1-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   1216 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1217   %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
   1218   %2 = bitcast <8 x i16> %1 to <4 x i32>
   1219   %3 = load <4 x i32>, <4 x i32> *%a2, align 16
   1220   %4 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %2, <4 x i32> %3)
   1221   ret <8 x i16> %4
   1222 }
   1223 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
   1224 
   1225 define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> *%a3) {
   1226 ; GENERIC-LABEL: test_pblendvb:
   1227 ; GENERIC:       # %bb.0:
   1228 ; GENERIC-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
   1229 ; GENERIC-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
   1230 ; GENERIC-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
   1231 ; GENERIC-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
   1232 ; GENERIC-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
   1233 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1234 ;
   1235 ; SLM-LABEL: test_pblendvb:
   1236 ; SLM:       # %bb.0:
   1237 ; SLM-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.50]
   1238 ; SLM-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
   1239 ; SLM-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
   1240 ; SLM-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [4:1.00]
   1241 ; SLM-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.50]
   1242 ; SLM-NEXT:    retq # sched: [4:1.00]
   1243 ;
   1244 ; SANDY-SSE-LABEL: test_pblendvb:
   1245 ; SANDY-SSE:       # %bb.0:
   1246 ; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
   1247 ; SANDY-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
   1248 ; SANDY-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
   1249 ; SANDY-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
   1250 ; SANDY-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
   1251 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1252 ;
   1253 ; SANDY-LABEL: test_pblendvb:
   1254 ; SANDY:       # %bb.0:
   1255 ; SANDY-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   1256 ; SANDY-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   1257 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1258 ;
   1259 ; HASWELL-SSE-LABEL: test_pblendvb:
   1260 ; HASWELL-SSE:       # %bb.0:
   1261 ; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
   1262 ; HASWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
   1263 ; HASWELL-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
   1264 ; HASWELL-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
   1265 ; HASWELL-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
   1266 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1267 ;
   1268 ; HASWELL-LABEL: test_pblendvb:
   1269 ; HASWELL:       # %bb.0:
   1270 ; HASWELL-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
   1271 ; HASWELL-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
   1272 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1273 ;
   1274 ; BROADWELL-SSE-LABEL: test_pblendvb:
   1275 ; BROADWELL-SSE:       # %bb.0:
   1276 ; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
   1277 ; BROADWELL-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:1.00]
   1278 ; BROADWELL-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
   1279 ; BROADWELL-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
   1280 ; BROADWELL-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
   1281 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1282 ;
   1283 ; BROADWELL-LABEL: test_pblendvb:
   1284 ; BROADWELL:       # %bb.0:
   1285 ; BROADWELL-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
   1286 ; BROADWELL-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
   1287 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1288 ;
   1289 ; SKYLAKE-SSE-LABEL: test_pblendvb:
   1290 ; SKYLAKE-SSE:       # %bb.0:
   1291 ; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
   1292 ; SKYLAKE-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
   1293 ; SKYLAKE-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
   1294 ; SKYLAKE-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
   1295 ; SKYLAKE-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
   1296 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1297 ;
   1298 ; SKYLAKE-LABEL: test_pblendvb:
   1299 ; SKYLAKE:       # %bb.0:
   1300 ; SKYLAKE-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
   1301 ; SKYLAKE-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
   1302 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1303 ;
   1304 ; SKX-SSE-LABEL: test_pblendvb:
   1305 ; SKX-SSE:       # %bb.0:
   1306 ; SKX-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.33]
   1307 ; SKX-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.33]
   1308 ; SKX-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
   1309 ; SKX-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
   1310 ; SKX-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.33]
   1311 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1312 ;
   1313 ; SKX-LABEL: test_pblendvb:
   1314 ; SKX:       # %bb.0:
   1315 ; SKX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
   1316 ; SKX-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
   1317 ; SKX-NEXT:    retq # sched: [7:1.00]
   1318 ;
   1319 ; BTVER2-SSE-LABEL: test_pblendvb:
   1320 ; BTVER2-SSE:       # %bb.0:
   1321 ; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.50]
   1322 ; BTVER2-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.50]
   1323 ; BTVER2-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
   1324 ; BTVER2-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
   1325 ; BTVER2-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.50]
   1326 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1327 ;
   1328 ; BTVER2-LABEL: test_pblendvb:
   1329 ; BTVER2:       # %bb.0:
   1330 ; BTVER2-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
   1331 ; BTVER2-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
   1332 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1333 ;
   1334 ; ZNVER1-SSE-LABEL: test_pblendvb:
   1335 ; ZNVER1-SSE:       # %bb.0:
   1336 ; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm3 # sched: [1:0.25]
   1337 ; ZNVER1-SSE-NEXT:    movaps %xmm2, %xmm0 # sched: [1:0.25]
   1338 ; ZNVER1-SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
   1339 ; ZNVER1-SSE-NEXT:    pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
   1340 ; ZNVER1-SSE-NEXT:    movdqa %xmm3, %xmm0 # sched: [1:0.25]
   1341 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1342 ;
   1343 ; ZNVER1-LABEL: test_pblendvb:
   1344 ; ZNVER1:       # %bb.0:
   1345 ; ZNVER1-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   1346 ; ZNVER1-NEXT:    vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   1347 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1348   %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2)
   1349   %2 = load <16 x i8>, <16 x i8> *%a3, align 16
   1350   %3 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %1, <16 x i8> %2, <16 x i8> %a2)
   1351   ret <16 x i8> %3
   1352 }
   1353 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
   1354 
   1355 define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   1356 ; GENERIC-LABEL: test_pblendw:
   1357 ; GENERIC:       # %bb.0:
   1358 ; GENERIC-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
   1359 ; GENERIC-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
   1360 ; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   1361 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1362 ;
   1363 ; SLM-LABEL: test_pblendw:
   1364 ; SLM:       # %bb.0:
   1365 ; SLM-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
   1366 ; SLM-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [4:1.00]
   1367 ; SLM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   1368 ; SLM-NEXT:    retq # sched: [4:1.00]
   1369 ;
   1370 ; SANDY-SSE-LABEL: test_pblendw:
   1371 ; SANDY-SSE:       # %bb.0:
   1372 ; SANDY-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
   1373 ; SANDY-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
   1374 ; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   1375 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1376 ;
   1377 ; SANDY-LABEL: test_pblendw:
   1378 ; SANDY:       # %bb.0:
   1379 ; SANDY-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
   1380 ; SANDY-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
   1381 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1382 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1383 ;
   1384 ; HASWELL-SSE-LABEL: test_pblendw:
   1385 ; HASWELL-SSE:       # %bb.0:
   1386 ; HASWELL-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
   1387 ; HASWELL-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
   1388 ; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   1389 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1390 ;
   1391 ; HASWELL-LABEL: test_pblendw:
   1392 ; HASWELL:       # %bb.0:
   1393 ; HASWELL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
   1394 ; HASWELL-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
   1395 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1396 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1397 ;
   1398 ; BROADWELL-SSE-LABEL: test_pblendw:
   1399 ; BROADWELL-SSE:       # %bb.0:
   1400 ; BROADWELL-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
   1401 ; BROADWELL-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
   1402 ; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   1403 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1404 ;
   1405 ; BROADWELL-LABEL: test_pblendw:
   1406 ; BROADWELL:       # %bb.0:
   1407 ; BROADWELL-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
   1408 ; BROADWELL-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
   1409 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1410 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1411 ;
   1412 ; SKYLAKE-SSE-LABEL: test_pblendw:
   1413 ; SKYLAKE-SSE:       # %bb.0:
   1414 ; SKYLAKE-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
   1415 ; SKYLAKE-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
   1416 ; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   1417 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1418 ;
   1419 ; SKYLAKE-LABEL: test_pblendw:
   1420 ; SKYLAKE:       # %bb.0:
   1421 ; SKYLAKE-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
   1422 ; SKYLAKE-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
   1423 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   1424 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1425 ;
   1426 ; SKX-SSE-LABEL: test_pblendw:
   1427 ; SKX-SSE:       # %bb.0:
   1428 ; SKX-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
   1429 ; SKX-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
   1430 ; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   1431 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1432 ;
   1433 ; SKX-LABEL: test_pblendw:
   1434 ; SKX:       # %bb.0:
   1435 ; SKX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
   1436 ; SKX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
   1437 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   1438 ; SKX-NEXT:    retq # sched: [7:1.00]
   1439 ;
   1440 ; BTVER2-SSE-LABEL: test_pblendw:
   1441 ; BTVER2-SSE:       # %bb.0:
   1442 ; BTVER2-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
   1443 ; BTVER2-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
   1444 ; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   1445 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1446 ;
   1447 ; BTVER2-LABEL: test_pblendw:
   1448 ; BTVER2:       # %bb.0:
   1449 ; BTVER2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
   1450 ; BTVER2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
   1451 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1452 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1453 ;
   1454 ; ZNVER1-SSE-LABEL: test_pblendw:
   1455 ; ZNVER1-SSE:       # %bb.0:
   1456 ; ZNVER1-SSE-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
   1457 ; ZNVER1-SSE-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50]
   1458 ; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
   1459 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1460 ;
   1461 ; ZNVER1-LABEL: test_pblendw:
   1462 ; ZNVER1:       # %bb.0:
   1463 ; ZNVER1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
   1464 ; ZNVER1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50]
   1465 ; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   1466 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1467   %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
   1468   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   1469   %3 = shufflevector <8 x i16> %a1, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
   1470   %4 = add <8 x i16> %1, %3
   1471   ret <8 x i16> %4
   1472 }
   1473 
   1474 define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   1475 ; GENERIC-LABEL: test_pcmpeqq:
   1476 ; GENERIC:       # %bb.0:
   1477 ; GENERIC-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
   1478 ; GENERIC-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
   1479 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1480 ;
   1481 ; SLM-LABEL: test_pcmpeqq:
   1482 ; SLM:       # %bb.0:
   1483 ; SLM-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
   1484 ; SLM-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [4:1.00]
   1485 ; SLM-NEXT:    retq # sched: [4:1.00]
   1486 ;
   1487 ; SANDY-SSE-LABEL: test_pcmpeqq:
   1488 ; SANDY-SSE:       # %bb.0:
   1489 ; SANDY-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
   1490 ; SANDY-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
   1491 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1492 ;
   1493 ; SANDY-LABEL: test_pcmpeqq:
   1494 ; SANDY:       # %bb.0:
   1495 ; SANDY-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1496 ; SANDY-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1497 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1498 ;
   1499 ; HASWELL-SSE-LABEL: test_pcmpeqq:
   1500 ; HASWELL-SSE:       # %bb.0:
   1501 ; HASWELL-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
   1502 ; HASWELL-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
   1503 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1504 ;
   1505 ; HASWELL-LABEL: test_pcmpeqq:
   1506 ; HASWELL:       # %bb.0:
   1507 ; HASWELL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1508 ; HASWELL-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1509 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1510 ;
   1511 ; BROADWELL-SSE-LABEL: test_pcmpeqq:
   1512 ; BROADWELL-SSE:       # %bb.0:
   1513 ; BROADWELL-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
   1514 ; BROADWELL-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [6:0.50]
   1515 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1516 ;
   1517 ; BROADWELL-LABEL: test_pcmpeqq:
   1518 ; BROADWELL:       # %bb.0:
   1519 ; BROADWELL-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1520 ; BROADWELL-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   1521 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1522 ;
   1523 ; SKYLAKE-SSE-LABEL: test_pcmpeqq:
   1524 ; SKYLAKE-SSE:       # %bb.0:
   1525 ; SKYLAKE-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
   1526 ; SKYLAKE-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
   1527 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1528 ;
   1529 ; SKYLAKE-LABEL: test_pcmpeqq:
   1530 ; SKYLAKE:       # %bb.0:
   1531 ; SKYLAKE-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1532 ; SKYLAKE-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1533 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1534 ;
   1535 ; SKX-SSE-LABEL: test_pcmpeqq:
   1536 ; SKX-SSE:       # %bb.0:
   1537 ; SKX-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
   1538 ; SKX-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
   1539 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1540 ;
   1541 ; SKX-LABEL: test_pcmpeqq:
   1542 ; SKX:       # %bb.0:
   1543 ; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1544 ; SKX-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   1545 ; SKX-NEXT:    retq # sched: [7:1.00]
   1546 ;
   1547 ; BTVER2-SSE-LABEL: test_pcmpeqq:
   1548 ; BTVER2-SSE:       # %bb.0:
   1549 ; BTVER2-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
   1550 ; BTVER2-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [6:1.00]
   1551 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1552 ;
   1553 ; BTVER2-LABEL: test_pcmpeqq:
   1554 ; BTVER2:       # %bb.0:
   1555 ; BTVER2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1556 ; BTVER2-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   1557 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1558 ;
   1559 ; ZNVER1-SSE-LABEL: test_pcmpeqq:
   1560 ; ZNVER1-SSE:       # %bb.0:
   1561 ; ZNVER1-SSE-NEXT:    pcmpeqq %xmm1, %xmm0 # sched: [1:0.25]
   1562 ; ZNVER1-SSE-NEXT:    pcmpeqq (%rdi), %xmm0 # sched: [8:0.50]
   1563 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1564 ;
   1565 ; ZNVER1-LABEL: test_pcmpeqq:
   1566 ; ZNVER1:       # %bb.0:
   1567 ; ZNVER1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   1568 ; ZNVER1-NEXT:    vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   1569 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1570   %1 = icmp eq <2 x i64> %a0, %a1
   1571   %2 = sext <2 x i1> %1 to <2 x i64>
   1572   %3 = load <2 x i64>, <2 x i64>*%a2, align 16
   1573   %4 = icmp eq <2 x i64> %2, %3
   1574   %5 = sext <2 x i1> %4 to <2 x i64>
   1575   ret <2 x i64> %5
   1576 }
   1577 
   1578 define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
   1579 ; GENERIC-LABEL: test_pextrb:
   1580 ; GENERIC:       # %bb.0:
   1581 ; GENERIC-NEXT:    pextrb $3, %xmm0, %eax # sched: [3:1.00]
   1582 ; GENERIC-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
   1583 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1584 ;
   1585 ; SLM-LABEL: test_pextrb:
   1586 ; SLM:       # %bb.0:
   1587 ; SLM-NEXT:    pextrb $3, %xmm0, %eax # sched: [1:1.00]
   1588 ; SLM-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [4:2.00]
   1589 ; SLM-NEXT:    retq # sched: [4:1.00]
   1590 ;
   1591 ; SANDY-SSE-LABEL: test_pextrb:
   1592 ; SANDY-SSE:       # %bb.0:
   1593 ; SANDY-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [3:1.00]
   1594 ; SANDY-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
   1595 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1596 ;
   1597 ; SANDY-LABEL: test_pextrb:
   1598 ; SANDY:       # %bb.0:
   1599 ; SANDY-NEXT:    vpextrb $3, %xmm0, %eax # sched: [3:1.00]
   1600 ; SANDY-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
   1601 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1602 ;
   1603 ; HASWELL-SSE-LABEL: test_pextrb:
   1604 ; HASWELL-SSE:       # %bb.0:
   1605 ; HASWELL-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [2:1.00]
   1606 ; HASWELL-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
   1607 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1608 ;
   1609 ; HASWELL-LABEL: test_pextrb:
   1610 ; HASWELL:       # %bb.0:
   1611 ; HASWELL-NEXT:    vpextrb $3, %xmm0, %eax # sched: [2:1.00]
   1612 ; HASWELL-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
   1613 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1614 ;
   1615 ; BROADWELL-SSE-LABEL: test_pextrb:
   1616 ; BROADWELL-SSE:       # %bb.0:
   1617 ; BROADWELL-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [2:1.00]
   1618 ; BROADWELL-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
   1619 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1620 ;
   1621 ; BROADWELL-LABEL: test_pextrb:
   1622 ; BROADWELL:       # %bb.0:
   1623 ; BROADWELL-NEXT:    vpextrb $3, %xmm0, %eax # sched: [2:1.00]
   1624 ; BROADWELL-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
   1625 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1626 ;
   1627 ; SKYLAKE-SSE-LABEL: test_pextrb:
   1628 ; SKYLAKE-SSE:       # %bb.0:
   1629 ; SKYLAKE-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [3:1.00]
   1630 ; SKYLAKE-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
   1631 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1632 ;
   1633 ; SKYLAKE-LABEL: test_pextrb:
   1634 ; SKYLAKE:       # %bb.0:
   1635 ; SKYLAKE-NEXT:    vpextrb $3, %xmm0, %eax # sched: [3:1.00]
   1636 ; SKYLAKE-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
   1637 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1638 ;
   1639 ; SKX-SSE-LABEL: test_pextrb:
   1640 ; SKX-SSE:       # %bb.0:
   1641 ; SKX-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [3:1.00]
   1642 ; SKX-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
   1643 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1644 ;
   1645 ; SKX-LABEL: test_pextrb:
   1646 ; SKX:       # %bb.0:
   1647 ; SKX-NEXT:    vpextrb $3, %xmm0, %eax # sched: [3:1.00]
   1648 ; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
   1649 ; SKX-NEXT:    retq # sched: [7:1.00]
   1650 ;
   1651 ; BTVER2-SSE-LABEL: test_pextrb:
   1652 ; BTVER2-SSE:       # %bb.0:
   1653 ; BTVER2-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [3:1.00]
   1654 ; BTVER2-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [3:1.00]
   1655 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1656 ;
   1657 ; BTVER2-LABEL: test_pextrb:
   1658 ; BTVER2:       # %bb.0:
   1659 ; BTVER2-NEXT:    vpextrb $3, %xmm0, %eax # sched: [3:1.00]
   1660 ; BTVER2-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [3:1.00]
   1661 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1662 ;
   1663 ; ZNVER1-SSE-LABEL: test_pextrb:
   1664 ; ZNVER1-SSE:       # %bb.0:
   1665 ; ZNVER1-SSE-NEXT:    pextrb $3, %xmm0, %eax # sched: [2:2.00]
   1666 ; ZNVER1-SSE-NEXT:    pextrb $1, %xmm0, (%rdi) # sched: [5:3.00]
   1667 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1668 ;
   1669 ; ZNVER1-LABEL: test_pextrb:
   1670 ; ZNVER1:       # %bb.0:
   1671 ; ZNVER1-NEXT:    vpextrb $3, %xmm0, %eax # sched: [2:2.00]
   1672 ; ZNVER1-NEXT:    vpextrb $1, %xmm0, (%rdi) # sched: [5:3.00]
   1673 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1674   %1 = extractelement <16 x i8> %a0, i32 3
   1675   %2 = extractelement <16 x i8> %a0, i32 1
   1676   store i8 %2, i8 *%a1
   1677   %3 = zext i8 %1 to i32
   1678   ret i32 %3
   1679 }
   1680 
   1681 define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
   1682 ; GENERIC-LABEL: test_pextrd:
   1683 ; GENERIC:       # %bb.0:
   1684 ; GENERIC-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
   1685 ; GENERIC-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
   1686 ; GENERIC-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
   1687 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1688 ;
   1689 ; SLM-LABEL: test_pextrd:
   1690 ; SLM:       # %bb.0:
   1691 ; SLM-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
   1692 ; SLM-NEXT:    pextrd $3, %xmm0, %eax # sched: [1:1.00]
   1693 ; SLM-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
   1694 ; SLM-NEXT:    retq # sched: [4:1.00]
   1695 ;
   1696 ; SANDY-SSE-LABEL: test_pextrd:
   1697 ; SANDY-SSE:       # %bb.0:
   1698 ; SANDY-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
   1699 ; SANDY-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
   1700 ; SANDY-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
   1701 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1702 ;
   1703 ; SANDY-LABEL: test_pextrd:
   1704 ; SANDY:       # %bb.0:
   1705 ; SANDY-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   1706 ; SANDY-NEXT:    vpextrd $3, %xmm0, %eax # sched: [3:1.00]
   1707 ; SANDY-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
   1708 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1709 ;
   1710 ; HASWELL-SSE-LABEL: test_pextrd:
   1711 ; HASWELL-SSE:       # %bb.0:
   1712 ; HASWELL-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
   1713 ; HASWELL-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [2:1.00]
   1714 ; HASWELL-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
   1715 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1716 ;
   1717 ; HASWELL-LABEL: test_pextrd:
   1718 ; HASWELL:       # %bb.0:
   1719 ; HASWELL-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   1720 ; HASWELL-NEXT:    vpextrd $3, %xmm0, %eax # sched: [2:1.00]
   1721 ; HASWELL-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
   1722 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1723 ;
   1724 ; BROADWELL-SSE-LABEL: test_pextrd:
   1725 ; BROADWELL-SSE:       # %bb.0:
   1726 ; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
   1727 ; BROADWELL-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [2:1.00]
   1728 ; BROADWELL-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
   1729 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1730 ;
   1731 ; BROADWELL-LABEL: test_pextrd:
   1732 ; BROADWELL:       # %bb.0:
   1733 ; BROADWELL-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   1734 ; BROADWELL-NEXT:    vpextrd $3, %xmm0, %eax # sched: [2:1.00]
   1735 ; BROADWELL-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
   1736 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1737 ;
   1738 ; SKYLAKE-SSE-LABEL: test_pextrd:
   1739 ; SKYLAKE-SSE:       # %bb.0:
   1740 ; SKYLAKE-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.33]
   1741 ; SKYLAKE-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
   1742 ; SKYLAKE-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
   1743 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1744 ;
   1745 ; SKYLAKE-LABEL: test_pextrd:
   1746 ; SKYLAKE:       # %bb.0:
   1747 ; SKYLAKE-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   1748 ; SKYLAKE-NEXT:    vpextrd $3, %xmm0, %eax # sched: [3:1.00]
   1749 ; SKYLAKE-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
   1750 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1751 ;
   1752 ; SKX-SSE-LABEL: test_pextrd:
   1753 ; SKX-SSE:       # %bb.0:
   1754 ; SKX-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.33]
   1755 ; SKX-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
   1756 ; SKX-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
   1757 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1758 ;
   1759 ; SKX-LABEL: test_pextrd:
   1760 ; SKX:       # %bb.0:
   1761 ; SKX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   1762 ; SKX-NEXT:    vpextrd $3, %xmm0, %eax # sched: [3:1.00]
   1763 ; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
   1764 ; SKX-NEXT:    retq # sched: [7:1.00]
   1765 ;
   1766 ; BTVER2-SSE-LABEL: test_pextrd:
   1767 ; BTVER2-SSE:       # %bb.0:
   1768 ; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.50]
   1769 ; BTVER2-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [3:1.00]
   1770 ; BTVER2-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [3:1.00]
   1771 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1772 ;
   1773 ; BTVER2-LABEL: test_pextrd:
   1774 ; BTVER2:       # %bb.0:
   1775 ; BTVER2-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   1776 ; BTVER2-NEXT:    vpextrd $3, %xmm0, %eax # sched: [3:1.00]
   1777 ; BTVER2-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [3:1.00]
   1778 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1779 ;
   1780 ; ZNVER1-SSE-LABEL: test_pextrd:
   1781 ; ZNVER1-SSE:       # %bb.0:
   1782 ; ZNVER1-SSE-NEXT:    paddd %xmm0, %xmm0 # sched: [1:0.25]
   1783 ; ZNVER1-SSE-NEXT:    pextrd $3, %xmm0, %eax # sched: [2:2.00]
   1784 ; ZNVER1-SSE-NEXT:    pextrd $1, %xmm0, (%rdi) # sched: [5:3.00]
   1785 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1786 ;
   1787 ; ZNVER1-LABEL: test_pextrd:
   1788 ; ZNVER1:       # %bb.0:
   1789 ; ZNVER1-NEXT:    vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
   1790 ; ZNVER1-NEXT:    vpextrd $3, %xmm0, %eax # sched: [2:2.00]
   1791 ; ZNVER1-NEXT:    vpextrd $1, %xmm0, (%rdi) # sched: [5:3.00]
   1792 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1793   %1 = add <4 x i32> %a0, %a0
   1794   %2 = extractelement <4 x i32> %1, i32 3
   1795   %3 = extractelement <4 x i32> %1, i32 1
   1796   store i32 %3, i32 *%a1
   1797   ret i32 %2
   1798 }
   1799 
   1800 define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
   1801 ; GENERIC-LABEL: test_pextrq:
   1802 ; GENERIC:       # %bb.0:
   1803 ; GENERIC-NEXT:    pextrq $1, %xmm0, %rax # sched: [3:1.00]
   1804 ; GENERIC-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
   1805 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1806 ;
   1807 ; SLM-LABEL: test_pextrq:
   1808 ; SLM:       # %bb.0:
   1809 ; SLM-NEXT:    pextrq $1, %xmm0, %rax # sched: [1:1.00]
   1810 ; SLM-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [4:2.00]
   1811 ; SLM-NEXT:    retq # sched: [4:1.00]
   1812 ;
   1813 ; SANDY-SSE-LABEL: test_pextrq:
   1814 ; SANDY-SSE:       # %bb.0:
   1815 ; SANDY-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [3:1.00]
   1816 ; SANDY-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
   1817 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1818 ;
   1819 ; SANDY-LABEL: test_pextrq:
   1820 ; SANDY:       # %bb.0:
   1821 ; SANDY-NEXT:    vpextrq $1, %xmm0, %rax # sched: [3:1.00]
   1822 ; SANDY-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
   1823 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1824 ;
   1825 ; HASWELL-SSE-LABEL: test_pextrq:
   1826 ; HASWELL-SSE:       # %bb.0:
   1827 ; HASWELL-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [2:1.00]
   1828 ; HASWELL-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   1829 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1830 ;
   1831 ; HASWELL-LABEL: test_pextrq:
   1832 ; HASWELL:       # %bb.0:
   1833 ; HASWELL-NEXT:    vpextrq $1, %xmm0, %rax # sched: [2:1.00]
   1834 ; HASWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   1835 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1836 ;
   1837 ; BROADWELL-SSE-LABEL: test_pextrq:
   1838 ; BROADWELL-SSE:       # %bb.0:
   1839 ; BROADWELL-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [2:1.00]
   1840 ; BROADWELL-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   1841 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1842 ;
   1843 ; BROADWELL-LABEL: test_pextrq:
   1844 ; BROADWELL:       # %bb.0:
   1845 ; BROADWELL-NEXT:    vpextrq $1, %xmm0, %rax # sched: [2:1.00]
   1846 ; BROADWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   1847 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1848 ;
   1849 ; SKYLAKE-SSE-LABEL: test_pextrq:
   1850 ; SKYLAKE-SSE:       # %bb.0:
   1851 ; SKYLAKE-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [3:1.00]
   1852 ; SKYLAKE-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   1853 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1854 ;
   1855 ; SKYLAKE-LABEL: test_pextrq:
   1856 ; SKYLAKE:       # %bb.0:
   1857 ; SKYLAKE-NEXT:    vpextrq $1, %xmm0, %rax # sched: [3:1.00]
   1858 ; SKYLAKE-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   1859 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1860 ;
   1861 ; SKX-SSE-LABEL: test_pextrq:
   1862 ; SKX-SSE:       # %bb.0:
   1863 ; SKX-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [3:1.00]
   1864 ; SKX-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   1865 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1866 ;
   1867 ; SKX-LABEL: test_pextrq:
   1868 ; SKX:       # %bb.0:
   1869 ; SKX-NEXT:    vpextrq $1, %xmm0, %rax # sched: [3:1.00]
   1870 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   1871 ; SKX-NEXT:    retq # sched: [7:1.00]
   1872 ;
   1873 ; BTVER2-SSE-LABEL: test_pextrq:
   1874 ; BTVER2-SSE:       # %bb.0:
   1875 ; BTVER2-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [3:1.00]
   1876 ; BTVER2-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [3:1.00]
   1877 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1878 ;
   1879 ; BTVER2-LABEL: test_pextrq:
   1880 ; BTVER2:       # %bb.0:
   1881 ; BTVER2-NEXT:    vpextrq $1, %xmm0, %rax # sched: [3:1.00]
   1882 ; BTVER2-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00]
   1883 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1884 ;
   1885 ; ZNVER1-SSE-LABEL: test_pextrq:
   1886 ; ZNVER1-SSE:       # %bb.0:
   1887 ; ZNVER1-SSE-NEXT:    pextrq $1, %xmm0, %rax # sched: [2:2.00]
   1888 ; ZNVER1-SSE-NEXT:    pextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
   1889 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1890 ;
   1891 ; ZNVER1-LABEL: test_pextrq:
   1892 ; ZNVER1:       # %bb.0:
   1893 ; ZNVER1-NEXT:    vpextrq $1, %xmm0, %rax # sched: [2:2.00]
   1894 ; ZNVER1-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
   1895 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1896   %1 = extractelement <2 x i64> %a0, i32 1
   1897   %2 = extractelement <2 x i64> %a0, i32 1
   1898   store i64 %2, i64 *%a2
   1899   ret i64 %1
   1900 }
   1901 
   1902 define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
   1903 ; GENERIC-LABEL: test_pextrw:
   1904 ; GENERIC:       # %bb.0:
   1905 ; GENERIC-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
   1906 ; GENERIC-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
   1907 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1908 ;
   1909 ; SLM-LABEL: test_pextrw:
   1910 ; SLM:       # %bb.0:
   1911 ; SLM-NEXT:    pextrw $3, %xmm0, %eax # sched: [1:1.00]
   1912 ; SLM-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [4:2.00]
   1913 ; SLM-NEXT:    retq # sched: [4:1.00]
   1914 ;
   1915 ; SANDY-SSE-LABEL: test_pextrw:
   1916 ; SANDY-SSE:       # %bb.0:
   1917 ; SANDY-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
   1918 ; SANDY-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
   1919 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1920 ;
   1921 ; SANDY-LABEL: test_pextrw:
   1922 ; SANDY:       # %bb.0:
   1923 ; SANDY-NEXT:    vpextrw $3, %xmm0, %eax # sched: [3:1.00]
   1924 ; SANDY-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
   1925 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1926 ;
   1927 ; HASWELL-SSE-LABEL: test_pextrw:
   1928 ; HASWELL-SSE:       # %bb.0:
   1929 ; HASWELL-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [2:1.00]
   1930 ; HASWELL-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
   1931 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1932 ;
   1933 ; HASWELL-LABEL: test_pextrw:
   1934 ; HASWELL:       # %bb.0:
   1935 ; HASWELL-NEXT:    vpextrw $3, %xmm0, %eax # sched: [2:1.00]
   1936 ; HASWELL-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
   1937 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1938 ;
   1939 ; BROADWELL-SSE-LABEL: test_pextrw:
   1940 ; BROADWELL-SSE:       # %bb.0:
   1941 ; BROADWELL-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [2:1.00]
   1942 ; BROADWELL-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
   1943 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1944 ;
   1945 ; BROADWELL-LABEL: test_pextrw:
   1946 ; BROADWELL:       # %bb.0:
   1947 ; BROADWELL-NEXT:    vpextrw $3, %xmm0, %eax # sched: [2:1.00]
   1948 ; BROADWELL-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
   1949 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1950 ;
   1951 ; SKYLAKE-SSE-LABEL: test_pextrw:
   1952 ; SKYLAKE-SSE:       # %bb.0:
   1953 ; SKYLAKE-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
   1954 ; SKYLAKE-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
   1955 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1956 ;
   1957 ; SKYLAKE-LABEL: test_pextrw:
   1958 ; SKYLAKE:       # %bb.0:
   1959 ; SKYLAKE-NEXT:    vpextrw $3, %xmm0, %eax # sched: [3:1.00]
   1960 ; SKYLAKE-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
   1961 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1962 ;
   1963 ; SKX-SSE-LABEL: test_pextrw:
   1964 ; SKX-SSE:       # %bb.0:
   1965 ; SKX-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
   1966 ; SKX-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
   1967 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1968 ;
   1969 ; SKX-LABEL: test_pextrw:
   1970 ; SKX:       # %bb.0:
   1971 ; SKX-NEXT:    vpextrw $3, %xmm0, %eax # sched: [3:1.00]
   1972 ; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
   1973 ; SKX-NEXT:    retq # sched: [7:1.00]
   1974 ;
   1975 ; BTVER2-SSE-LABEL: test_pextrw:
   1976 ; BTVER2-SSE:       # %bb.0:
   1977 ; BTVER2-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [3:1.00]
   1978 ; BTVER2-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [3:1.00]
   1979 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1980 ;
   1981 ; BTVER2-LABEL: test_pextrw:
   1982 ; BTVER2:       # %bb.0:
   1983 ; BTVER2-NEXT:    vpextrw $3, %xmm0, %eax # sched: [3:1.00]
   1984 ; BTVER2-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [3:1.00]
   1985 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1986 ;
   1987 ; ZNVER1-SSE-LABEL: test_pextrw:
   1988 ; ZNVER1-SSE:       # %bb.0:
   1989 ; ZNVER1-SSE-NEXT:    pextrw $3, %xmm0, %eax # sched: [2:2.00]
   1990 ; ZNVER1-SSE-NEXT:    pextrw $1, %xmm0, (%rdi) # sched: [5:3.00]
   1991 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1992 ;
   1993 ; ZNVER1-LABEL: test_pextrw:
   1994 ; ZNVER1:       # %bb.0:
   1995 ; ZNVER1-NEXT:    vpextrw $3, %xmm0, %eax # sched: [2:2.00]
   1996 ; ZNVER1-NEXT:    vpextrw $1, %xmm0, (%rdi) # sched: [5:3.00]
   1997 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1998   %1 = extractelement <8 x i16> %a0, i32 3
   1999   %2 = extractelement <8 x i16> %a0, i32 1
   2000   store i16 %2, i16 *%a1
   2001   %3 = zext i16 %1 to i32
   2002   ret i32 %3
   2003 }
   2004 
   2005 define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
   2006 ; GENERIC-LABEL: test_phminposuw:
   2007 ; GENERIC:       # %bb.0:
   2008 ; GENERIC-NEXT:    phminposuw (%rdi), %xmm0 # sched: [11:1.00]
   2009 ; GENERIC-NEXT:    phminposuw %xmm0, %xmm0 # sched: [5:1.00]
   2010 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2011 ;
   2012 ; SLM-LABEL: test_phminposuw:
   2013 ; SLM:       # %bb.0:
   2014 ; SLM-NEXT:    phminposuw (%rdi), %xmm0 # sched: [7:1.00]
   2015 ; SLM-NEXT:    phminposuw %xmm0, %xmm0 # sched: [4:1.00]
   2016 ; SLM-NEXT:    retq # sched: [4:1.00]
   2017 ;
   2018 ; SANDY-SSE-LABEL: test_phminposuw:
   2019 ; SANDY-SSE:       # %bb.0:
   2020 ; SANDY-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [11:1.00]
   2021 ; SANDY-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [5:1.00]
   2022 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2023 ;
   2024 ; SANDY-LABEL: test_phminposuw:
   2025 ; SANDY:       # %bb.0:
   2026 ; SANDY-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
   2027 ; SANDY-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
   2028 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2029 ;
   2030 ; HASWELL-SSE-LABEL: test_phminposuw:
   2031 ; HASWELL-SSE:       # %bb.0:
   2032 ; HASWELL-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [11:1.00]
   2033 ; HASWELL-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [5:1.00]
   2034 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2035 ;
   2036 ; HASWELL-LABEL: test_phminposuw:
   2037 ; HASWELL:       # %bb.0:
   2038 ; HASWELL-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
   2039 ; HASWELL-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
   2040 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2041 ;
   2042 ; BROADWELL-SSE-LABEL: test_phminposuw:
   2043 ; BROADWELL-SSE:       # %bb.0:
   2044 ; BROADWELL-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [10:1.00]
   2045 ; BROADWELL-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [5:1.00]
   2046 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2047 ;
   2048 ; BROADWELL-LABEL: test_phminposuw:
   2049 ; BROADWELL:       # %bb.0:
   2050 ; BROADWELL-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
   2051 ; BROADWELL-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
   2052 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2053 ;
   2054 ; SKYLAKE-SSE-LABEL: test_phminposuw:
   2055 ; SKYLAKE-SSE:       # %bb.0:
   2056 ; SKYLAKE-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [10:0.50]
   2057 ; SKYLAKE-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [4:0.50]
   2058 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2059 ;
   2060 ; SKYLAKE-LABEL: test_phminposuw:
   2061 ; SKYLAKE:       # %bb.0:
   2062 ; SKYLAKE-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
   2063 ; SKYLAKE-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [4:0.50]
   2064 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2065 ;
   2066 ; SKX-SSE-LABEL: test_phminposuw:
   2067 ; SKX-SSE:       # %bb.0:
   2068 ; SKX-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [10:1.00]
   2069 ; SKX-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [4:1.00]
   2070 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2071 ;
   2072 ; SKX-LABEL: test_phminposuw:
   2073 ; SKX:       # %bb.0:
   2074 ; SKX-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
   2075 ; SKX-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
   2076 ; SKX-NEXT:    retq # sched: [7:1.00]
   2077 ;
   2078 ; BTVER2-SSE-LABEL: test_phminposuw:
   2079 ; BTVER2-SSE:       # %bb.0:
   2080 ; BTVER2-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [7:1.00]
   2081 ; BTVER2-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [2:1.00]
   2082 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2083 ;
   2084 ; BTVER2-LABEL: test_phminposuw:
   2085 ; BTVER2:       # %bb.0:
   2086 ; BTVER2-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [7:1.00]
   2087 ; BTVER2-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [2:1.00]
   2088 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2089 ;
   2090 ; ZNVER1-SSE-LABEL: test_phminposuw:
   2091 ; ZNVER1-SSE:       # %bb.0:
   2092 ; ZNVER1-SSE-NEXT:    phminposuw (%rdi), %xmm0 # sched: [11:1.00]
   2093 ; ZNVER1-SSE-NEXT:    phminposuw %xmm0, %xmm0 # sched: [4:1.00]
   2094 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2095 ;
   2096 ; ZNVER1-LABEL: test_phminposuw:
   2097 ; ZNVER1:       # %bb.0:
   2098 ; ZNVER1-NEXT:    vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
   2099 ; ZNVER1-NEXT:    vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
   2100 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2101   %1 = load <8 x i16>, <8 x i16> *%a0, align 16
   2102   %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %1)
   2103   %3 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %2)
   2104   ret <8 x i16> %3
   2105 }
   2106 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
   2107 
   2108 define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
   2109 ; GENERIC-LABEL: test_pinsrb:
   2110 ; GENERIC:       # %bb.0:
   2111 ; GENERIC-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
   2112 ; GENERIC-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
   2113 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2114 ;
   2115 ; SLM-LABEL: test_pinsrb:
   2116 ; SLM:       # %bb.0:
   2117 ; SLM-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [1:1.00]
   2118 ; SLM-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00]
   2119 ; SLM-NEXT:    retq # sched: [4:1.00]
   2120 ;
   2121 ; SANDY-SSE-LABEL: test_pinsrb:
   2122 ; SANDY-SSE:       # %bb.0:
   2123 ; SANDY-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
   2124 ; SANDY-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
   2125 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2126 ;
   2127 ; SANDY-LABEL: test_pinsrb:
   2128 ; SANDY:       # %bb.0:
   2129 ; SANDY-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
   2130 ; SANDY-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
   2131 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2132 ;
   2133 ; HASWELL-SSE-LABEL: test_pinsrb:
   2134 ; HASWELL-SSE:       # %bb.0:
   2135 ; HASWELL-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
   2136 ; HASWELL-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
   2137 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2138 ;
   2139 ; HASWELL-LABEL: test_pinsrb:
   2140 ; HASWELL:       # %bb.0:
   2141 ; HASWELL-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   2142 ; HASWELL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   2143 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2144 ;
   2145 ; BROADWELL-SSE-LABEL: test_pinsrb:
   2146 ; BROADWELL-SSE:       # %bb.0:
   2147 ; BROADWELL-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
   2148 ; BROADWELL-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
   2149 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2150 ;
   2151 ; BROADWELL-LABEL: test_pinsrb:
   2152 ; BROADWELL:       # %bb.0:
   2153 ; BROADWELL-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   2154 ; BROADWELL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   2155 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2156 ;
   2157 ; SKYLAKE-SSE-LABEL: test_pinsrb:
   2158 ; SKYLAKE-SSE:       # %bb.0:
   2159 ; SKYLAKE-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
   2160 ; SKYLAKE-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
   2161 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2162 ;
   2163 ; SKYLAKE-LABEL: test_pinsrb:
   2164 ; SKYLAKE:       # %bb.0:
   2165 ; SKYLAKE-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   2166 ; SKYLAKE-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   2167 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2168 ;
   2169 ; SKX-SSE-LABEL: test_pinsrb:
   2170 ; SKX-SSE:       # %bb.0:
   2171 ; SKX-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
   2172 ; SKX-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
   2173 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2174 ;
   2175 ; SKX-LABEL: test_pinsrb:
   2176 ; SKX:       # %bb.0:
   2177 ; SKX-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   2178 ; SKX-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   2179 ; SKX-NEXT:    retq # sched: [7:1.00]
   2180 ;
   2181 ; BTVER2-SSE-LABEL: test_pinsrb:
   2182 ; BTVER2-SSE:       # %bb.0:
   2183 ; BTVER2-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [7:0.50]
   2184 ; BTVER2-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00]
   2185 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2186 ;
   2187 ; BTVER2-LABEL: test_pinsrb:
   2188 ; BTVER2:       # %bb.0:
   2189 ; BTVER2-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
   2190 ; BTVER2-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
   2191 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2192 ;
   2193 ; ZNVER1-SSE-LABEL: test_pinsrb:
   2194 ; ZNVER1-SSE:       # %bb.0:
   2195 ; ZNVER1-SSE-NEXT:    pinsrb $1, %edi, %xmm0 # sched: [1:0.25]
   2196 ; ZNVER1-SSE-NEXT:    pinsrb $3, (%rsi), %xmm0 # sched: [8:0.50]
   2197 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2198 ;
   2199 ; ZNVER1-LABEL: test_pinsrb:
   2200 ; ZNVER1:       # %bb.0:
   2201 ; ZNVER1-NEXT:    vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
   2202 ; ZNVER1-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50]
   2203 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2204   %1 = insertelement <16 x i8> %a0, i8 %a1, i32 1
   2205   %2 = load i8, i8 *%a2
   2206   %3 = insertelement <16 x i8> %1, i8 %2, i32 3
   2207   ret <16 x i8> %3
   2208 }
   2209 
   2210 define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
   2211 ; GENERIC-LABEL: test_pinsrd:
   2212 ; GENERIC:       # %bb.0:
   2213 ; GENERIC-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
   2214 ; GENERIC-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
   2215 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2216 ;
   2217 ; SLM-LABEL: test_pinsrd:
   2218 ; SLM:       # %bb.0:
   2219 ; SLM-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [1:1.00]
   2220 ; SLM-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00]
   2221 ; SLM-NEXT:    retq # sched: [4:1.00]
   2222 ;
   2223 ; SANDY-SSE-LABEL: test_pinsrd:
   2224 ; SANDY-SSE:       # %bb.0:
   2225 ; SANDY-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
   2226 ; SANDY-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
   2227 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2228 ;
   2229 ; SANDY-LABEL: test_pinsrd:
   2230 ; SANDY:       # %bb.0:
   2231 ; SANDY-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
   2232 ; SANDY-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
   2233 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2234 ;
   2235 ; HASWELL-SSE-LABEL: test_pinsrd:
   2236 ; HASWELL-SSE:       # %bb.0:
   2237 ; HASWELL-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
   2238 ; HASWELL-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
   2239 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2240 ;
   2241 ; HASWELL-LABEL: test_pinsrd:
   2242 ; HASWELL:       # %bb.0:
   2243 ; HASWELL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   2244 ; HASWELL-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   2245 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2246 ;
   2247 ; BROADWELL-SSE-LABEL: test_pinsrd:
   2248 ; BROADWELL-SSE:       # %bb.0:
   2249 ; BROADWELL-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
   2250 ; BROADWELL-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
   2251 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2252 ;
   2253 ; BROADWELL-LABEL: test_pinsrd:
   2254 ; BROADWELL:       # %bb.0:
   2255 ; BROADWELL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   2256 ; BROADWELL-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   2257 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2258 ;
   2259 ; SKYLAKE-SSE-LABEL: test_pinsrd:
   2260 ; SKYLAKE-SSE:       # %bb.0:
   2261 ; SKYLAKE-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
   2262 ; SKYLAKE-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
   2263 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2264 ;
   2265 ; SKYLAKE-LABEL: test_pinsrd:
   2266 ; SKYLAKE:       # %bb.0:
   2267 ; SKYLAKE-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   2268 ; SKYLAKE-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   2269 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2270 ;
   2271 ; SKX-SSE-LABEL: test_pinsrd:
   2272 ; SKX-SSE:       # %bb.0:
   2273 ; SKX-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
   2274 ; SKX-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
   2275 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2276 ;
   2277 ; SKX-LABEL: test_pinsrd:
   2278 ; SKX:       # %bb.0:
   2279 ; SKX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   2280 ; SKX-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   2281 ; SKX-NEXT:    retq # sched: [7:1.00]
   2282 ;
   2283 ; BTVER2-SSE-LABEL: test_pinsrd:
   2284 ; BTVER2-SSE:       # %bb.0:
   2285 ; BTVER2-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [7:0.50]
   2286 ; BTVER2-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00]
   2287 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2288 ;
   2289 ; BTVER2-LABEL: test_pinsrd:
   2290 ; BTVER2:       # %bb.0:
   2291 ; BTVER2-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
   2292 ; BTVER2-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
   2293 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2294 ;
   2295 ; ZNVER1-SSE-LABEL: test_pinsrd:
   2296 ; ZNVER1-SSE:       # %bb.0:
   2297 ; ZNVER1-SSE-NEXT:    pinsrd $1, %edi, %xmm0 # sched: [1:0.25]
   2298 ; ZNVER1-SSE-NEXT:    pinsrd $3, (%rsi), %xmm0 # sched: [8:0.50]
   2299 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2300 ;
   2301 ; ZNVER1-LABEL: test_pinsrd:
   2302 ; ZNVER1:       # %bb.0:
   2303 ; ZNVER1-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
   2304 ; ZNVER1-NEXT:    vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50]
   2305 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2306   %1 = insertelement <4 x i32> %a0, i32 %a1, i32 1
   2307   %2 = load i32, i32 *%a2
   2308   %3 = insertelement <4 x i32> %1, i32 %2, i32 3
   2309   ret <4 x i32> %3
   2310 }
   2311 
   2312 define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
   2313 ; GENERIC-LABEL: test_pinsrq:
   2314 ; GENERIC:       # %bb.0:
   2315 ; GENERIC-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
   2316 ; GENERIC-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
   2317 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   2318 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2319 ;
   2320 ; SLM-LABEL: test_pinsrq:
   2321 ; SLM:       # %bb.0:
   2322 ; SLM-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00]
   2323 ; SLM-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [1:1.00]
   2324 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   2325 ; SLM-NEXT:    retq # sched: [4:1.00]
   2326 ;
   2327 ; SANDY-SSE-LABEL: test_pinsrq:
   2328 ; SANDY-SSE:       # %bb.0:
   2329 ; SANDY-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
   2330 ; SANDY-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
   2331 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   2332 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2333 ;
   2334 ; SANDY-LABEL: test_pinsrq:
   2335 ; SANDY:       # %bb.0:
   2336 ; SANDY-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
   2337 ; SANDY-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
   2338 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2339 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2340 ;
   2341 ; HASWELL-SSE-LABEL: test_pinsrq:
   2342 ; HASWELL-SSE:       # %bb.0:
   2343 ; HASWELL-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
   2344 ; HASWELL-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
   2345 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   2346 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2347 ;
   2348 ; HASWELL-LABEL: test_pinsrq:
   2349 ; HASWELL:       # %bb.0:
   2350 ; HASWELL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
   2351 ; HASWELL-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
   2352 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2353 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2354 ;
   2355 ; BROADWELL-SSE-LABEL: test_pinsrq:
   2356 ; BROADWELL-SSE:       # %bb.0:
   2357 ; BROADWELL-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
   2358 ; BROADWELL-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
   2359 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   2360 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2361 ;
   2362 ; BROADWELL-LABEL: test_pinsrq:
   2363 ; BROADWELL:       # %bb.0:
   2364 ; BROADWELL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
   2365 ; BROADWELL-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
   2366 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2367 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2368 ;
   2369 ; SKYLAKE-SSE-LABEL: test_pinsrq:
   2370 ; SKYLAKE-SSE:       # %bb.0:
   2371 ; SKYLAKE-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
   2372 ; SKYLAKE-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
   2373 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   2374 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2375 ;
   2376 ; SKYLAKE-LABEL: test_pinsrq:
   2377 ; SKYLAKE:       # %bb.0:
   2378 ; SKYLAKE-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
   2379 ; SKYLAKE-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
   2380 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   2381 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2382 ;
   2383 ; SKX-SSE-LABEL: test_pinsrq:
   2384 ; SKX-SSE:       # %bb.0:
   2385 ; SKX-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
   2386 ; SKX-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
   2387 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   2388 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2389 ;
   2390 ; SKX-LABEL: test_pinsrq:
   2391 ; SKX:       # %bb.0:
   2392 ; SKX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
   2393 ; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
   2394 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   2395 ; SKX-NEXT:    retq # sched: [7:1.00]
   2396 ;
   2397 ; BTVER2-SSE-LABEL: test_pinsrq:
   2398 ; BTVER2-SSE:       # %bb.0:
   2399 ; BTVER2-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [7:0.50]
   2400 ; BTVER2-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00]
   2401 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   2402 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2403 ;
   2404 ; BTVER2-LABEL: test_pinsrq:
   2405 ; BTVER2:       # %bb.0:
   2406 ; BTVER2-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [7:0.50]
   2407 ; BTVER2-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [4:1.00]
   2408 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2409 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2410 ;
   2411 ; ZNVER1-SSE-LABEL: test_pinsrq:
   2412 ; ZNVER1-SSE:       # %bb.0:
   2413 ; ZNVER1-SSE-NEXT:    pinsrq $1, (%rsi), %xmm1 # sched: [8:0.50]
   2414 ; ZNVER1-SSE-NEXT:    pinsrq $1, %rdi, %xmm0 # sched: [1:0.25]
   2415 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   2416 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2417 ;
   2418 ; ZNVER1-LABEL: test_pinsrq:
   2419 ; ZNVER1:       # %bb.0:
   2420 ; ZNVER1-NEXT:    vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50]
   2421 ; ZNVER1-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.25]
   2422 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   2423 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2424   %1 = insertelement <2 x i64> %a0, i64 %a2, i32 1
   2425   %2 = load i64, i64 *%a3
   2426   %3 = insertelement <2 x i64> %a1, i64 %2, i32 1
   2427   %4 = add <2 x i64> %1, %3
   2428   ret <2 x i64> %4
   2429 }
   2430 
   2431 define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   2432 ; GENERIC-LABEL: test_pmaxsb:
   2433 ; GENERIC:       # %bb.0:
   2434 ; GENERIC-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
   2435 ; GENERIC-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
   2436 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2437 ;
   2438 ; SLM-LABEL: test_pmaxsb:
   2439 ; SLM:       # %bb.0:
   2440 ; SLM-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
   2441 ; SLM-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [4:1.00]
   2442 ; SLM-NEXT:    retq # sched: [4:1.00]
   2443 ;
   2444 ; SANDY-SSE-LABEL: test_pmaxsb:
   2445 ; SANDY-SSE:       # %bb.0:
   2446 ; SANDY-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
   2447 ; SANDY-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
   2448 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2449 ;
   2450 ; SANDY-LABEL: test_pmaxsb:
   2451 ; SANDY:       # %bb.0:
   2452 ; SANDY-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2453 ; SANDY-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2454 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2455 ;
   2456 ; HASWELL-SSE-LABEL: test_pmaxsb:
   2457 ; HASWELL-SSE:       # %bb.0:
   2458 ; HASWELL-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
   2459 ; HASWELL-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
   2460 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2461 ;
   2462 ; HASWELL-LABEL: test_pmaxsb:
   2463 ; HASWELL:       # %bb.0:
   2464 ; HASWELL-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2465 ; HASWELL-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2466 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2467 ;
   2468 ; BROADWELL-SSE-LABEL: test_pmaxsb:
   2469 ; BROADWELL-SSE:       # %bb.0:
   2470 ; BROADWELL-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
   2471 ; BROADWELL-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [6:0.50]
   2472 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2473 ;
   2474 ; BROADWELL-LABEL: test_pmaxsb:
   2475 ; BROADWELL:       # %bb.0:
   2476 ; BROADWELL-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2477 ; BROADWELL-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   2478 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2479 ;
   2480 ; SKYLAKE-SSE-LABEL: test_pmaxsb:
   2481 ; SKYLAKE-SSE:       # %bb.0:
   2482 ; SKYLAKE-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
   2483 ; SKYLAKE-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
   2484 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2485 ;
   2486 ; SKYLAKE-LABEL: test_pmaxsb:
   2487 ; SKYLAKE:       # %bb.0:
   2488 ; SKYLAKE-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2489 ; SKYLAKE-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2490 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2491 ;
   2492 ; SKX-SSE-LABEL: test_pmaxsb:
   2493 ; SKX-SSE:       # %bb.0:
   2494 ; SKX-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
   2495 ; SKX-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
   2496 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2497 ;
   2498 ; SKX-LABEL: test_pmaxsb:
   2499 ; SKX:       # %bb.0:
   2500 ; SKX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2501 ; SKX-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2502 ; SKX-NEXT:    retq # sched: [7:1.00]
   2503 ;
   2504 ; BTVER2-SSE-LABEL: test_pmaxsb:
   2505 ; BTVER2-SSE:       # %bb.0:
   2506 ; BTVER2-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
   2507 ; BTVER2-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [6:1.00]
   2508 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2509 ;
   2510 ; BTVER2-LABEL: test_pmaxsb:
   2511 ; BTVER2:       # %bb.0:
   2512 ; BTVER2-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2513 ; BTVER2-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   2514 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2515 ;
   2516 ; ZNVER1-SSE-LABEL: test_pmaxsb:
   2517 ; ZNVER1-SSE:       # %bb.0:
   2518 ; ZNVER1-SSE-NEXT:    pmaxsb %xmm1, %xmm0 # sched: [1:0.25]
   2519 ; ZNVER1-SSE-NEXT:    pmaxsb (%rdi), %xmm0 # sched: [8:0.50]
   2520 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2521 ;
   2522 ; ZNVER1-LABEL: test_pmaxsb:
   2523 ; ZNVER1:       # %bb.0:
   2524 ; ZNVER1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   2525 ; ZNVER1-NEXT:    vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   2526 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2527   %1 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
   2528   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   2529   %3 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %1, <16 x i8> %2)
   2530   ret <16 x i8> %3
   2531 }
   2532 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
   2533 
   2534 define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   2535 ; GENERIC-LABEL: test_pmaxsd:
   2536 ; GENERIC:       # %bb.0:
   2537 ; GENERIC-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
   2538 ; GENERIC-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
   2539 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2540 ;
   2541 ; SLM-LABEL: test_pmaxsd:
   2542 ; SLM:       # %bb.0:
   2543 ; SLM-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
   2544 ; SLM-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [4:1.00]
   2545 ; SLM-NEXT:    retq # sched: [4:1.00]
   2546 ;
   2547 ; SANDY-SSE-LABEL: test_pmaxsd:
   2548 ; SANDY-SSE:       # %bb.0:
   2549 ; SANDY-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
   2550 ; SANDY-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
   2551 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2552 ;
   2553 ; SANDY-LABEL: test_pmaxsd:
   2554 ; SANDY:       # %bb.0:
   2555 ; SANDY-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2556 ; SANDY-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2557 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2558 ;
   2559 ; HASWELL-SSE-LABEL: test_pmaxsd:
   2560 ; HASWELL-SSE:       # %bb.0:
   2561 ; HASWELL-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
   2562 ; HASWELL-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
   2563 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2564 ;
   2565 ; HASWELL-LABEL: test_pmaxsd:
   2566 ; HASWELL:       # %bb.0:
   2567 ; HASWELL-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2568 ; HASWELL-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2569 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2570 ;
   2571 ; BROADWELL-SSE-LABEL: test_pmaxsd:
   2572 ; BROADWELL-SSE:       # %bb.0:
   2573 ; BROADWELL-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
   2574 ; BROADWELL-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [6:0.50]
   2575 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2576 ;
   2577 ; BROADWELL-LABEL: test_pmaxsd:
   2578 ; BROADWELL:       # %bb.0:
   2579 ; BROADWELL-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2580 ; BROADWELL-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   2581 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2582 ;
   2583 ; SKYLAKE-SSE-LABEL: test_pmaxsd:
   2584 ; SKYLAKE-SSE:       # %bb.0:
   2585 ; SKYLAKE-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
   2586 ; SKYLAKE-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
   2587 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2588 ;
   2589 ; SKYLAKE-LABEL: test_pmaxsd:
   2590 ; SKYLAKE:       # %bb.0:
   2591 ; SKYLAKE-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2592 ; SKYLAKE-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2593 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2594 ;
   2595 ; SKX-SSE-LABEL: test_pmaxsd:
   2596 ; SKX-SSE:       # %bb.0:
   2597 ; SKX-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
   2598 ; SKX-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
   2599 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2600 ;
   2601 ; SKX-LABEL: test_pmaxsd:
   2602 ; SKX:       # %bb.0:
   2603 ; SKX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2604 ; SKX-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2605 ; SKX-NEXT:    retq # sched: [7:1.00]
   2606 ;
   2607 ; BTVER2-SSE-LABEL: test_pmaxsd:
   2608 ; BTVER2-SSE:       # %bb.0:
   2609 ; BTVER2-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
   2610 ; BTVER2-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [6:1.00]
   2611 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2612 ;
   2613 ; BTVER2-LABEL: test_pmaxsd:
   2614 ; BTVER2:       # %bb.0:
   2615 ; BTVER2-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2616 ; BTVER2-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   2617 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2618 ;
   2619 ; ZNVER1-SSE-LABEL: test_pmaxsd:
   2620 ; ZNVER1-SSE:       # %bb.0:
   2621 ; ZNVER1-SSE-NEXT:    pmaxsd %xmm1, %xmm0 # sched: [1:0.25]
   2622 ; ZNVER1-SSE-NEXT:    pmaxsd (%rdi), %xmm0 # sched: [8:0.50]
   2623 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2624 ;
   2625 ; ZNVER1-LABEL: test_pmaxsd:
   2626 ; ZNVER1:       # %bb.0:
   2627 ; ZNVER1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   2628 ; ZNVER1-NEXT:    vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   2629 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2630   %1 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
   2631   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   2632   %3 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> %2)
   2633   ret <4 x i32> %3
   2634 }
   2635 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
   2636 
   2637 define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   2638 ; GENERIC-LABEL: test_pmaxud:
   2639 ; GENERIC:       # %bb.0:
   2640 ; GENERIC-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
   2641 ; GENERIC-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
   2642 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2643 ;
   2644 ; SLM-LABEL: test_pmaxud:
   2645 ; SLM:       # %bb.0:
   2646 ; SLM-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
   2647 ; SLM-NEXT:    pmaxud (%rdi), %xmm0 # sched: [4:1.00]
   2648 ; SLM-NEXT:    retq # sched: [4:1.00]
   2649 ;
   2650 ; SANDY-SSE-LABEL: test_pmaxud:
   2651 ; SANDY-SSE:       # %bb.0:
   2652 ; SANDY-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
   2653 ; SANDY-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
   2654 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2655 ;
   2656 ; SANDY-LABEL: test_pmaxud:
   2657 ; SANDY:       # %bb.0:
   2658 ; SANDY-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2659 ; SANDY-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2660 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2661 ;
   2662 ; HASWELL-SSE-LABEL: test_pmaxud:
   2663 ; HASWELL-SSE:       # %bb.0:
   2664 ; HASWELL-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
   2665 ; HASWELL-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
   2666 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2667 ;
   2668 ; HASWELL-LABEL: test_pmaxud:
   2669 ; HASWELL:       # %bb.0:
   2670 ; HASWELL-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2671 ; HASWELL-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2672 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2673 ;
   2674 ; BROADWELL-SSE-LABEL: test_pmaxud:
   2675 ; BROADWELL-SSE:       # %bb.0:
   2676 ; BROADWELL-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
   2677 ; BROADWELL-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [6:0.50]
   2678 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2679 ;
   2680 ; BROADWELL-LABEL: test_pmaxud:
   2681 ; BROADWELL:       # %bb.0:
   2682 ; BROADWELL-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2683 ; BROADWELL-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   2684 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2685 ;
   2686 ; SKYLAKE-SSE-LABEL: test_pmaxud:
   2687 ; SKYLAKE-SSE:       # %bb.0:
   2688 ; SKYLAKE-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
   2689 ; SKYLAKE-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
   2690 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2691 ;
   2692 ; SKYLAKE-LABEL: test_pmaxud:
   2693 ; SKYLAKE:       # %bb.0:
   2694 ; SKYLAKE-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2695 ; SKYLAKE-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2696 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2697 ;
   2698 ; SKX-SSE-LABEL: test_pmaxud:
   2699 ; SKX-SSE:       # %bb.0:
   2700 ; SKX-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
   2701 ; SKX-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [7:0.50]
   2702 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2703 ;
   2704 ; SKX-LABEL: test_pmaxud:
   2705 ; SKX:       # %bb.0:
   2706 ; SKX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2707 ; SKX-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2708 ; SKX-NEXT:    retq # sched: [7:1.00]
   2709 ;
   2710 ; BTVER2-SSE-LABEL: test_pmaxud:
   2711 ; BTVER2-SSE:       # %bb.0:
   2712 ; BTVER2-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.50]
   2713 ; BTVER2-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [6:1.00]
   2714 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2715 ;
   2716 ; BTVER2-LABEL: test_pmaxud:
   2717 ; BTVER2:       # %bb.0:
   2718 ; BTVER2-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2719 ; BTVER2-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   2720 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2721 ;
   2722 ; ZNVER1-SSE-LABEL: test_pmaxud:
   2723 ; ZNVER1-SSE:       # %bb.0:
   2724 ; ZNVER1-SSE-NEXT:    pmaxud %xmm1, %xmm0 # sched: [1:0.25]
   2725 ; ZNVER1-SSE-NEXT:    pmaxud (%rdi), %xmm0 # sched: [8:0.50]
   2726 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2727 ;
   2728 ; ZNVER1-LABEL: test_pmaxud:
   2729 ; ZNVER1:       # %bb.0:
   2730 ; ZNVER1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   2731 ; ZNVER1-NEXT:    vpmaxud (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   2732 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2733   %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
   2734   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   2735   %3 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> %2)
   2736   ret <4 x i32> %3
   2737 }
   2738 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
   2739 
   2740 define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   2741 ; GENERIC-LABEL: test_pmaxuw:
   2742 ; GENERIC:       # %bb.0:
   2743 ; GENERIC-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
   2744 ; GENERIC-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
   2745 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2746 ;
   2747 ; SLM-LABEL: test_pmaxuw:
   2748 ; SLM:       # %bb.0:
   2749 ; SLM-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
   2750 ; SLM-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [4:1.00]
   2751 ; SLM-NEXT:    retq # sched: [4:1.00]
   2752 ;
   2753 ; SANDY-SSE-LABEL: test_pmaxuw:
   2754 ; SANDY-SSE:       # %bb.0:
   2755 ; SANDY-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
   2756 ; SANDY-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
   2757 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2758 ;
   2759 ; SANDY-LABEL: test_pmaxuw:
   2760 ; SANDY:       # %bb.0:
   2761 ; SANDY-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2762 ; SANDY-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2763 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2764 ;
   2765 ; HASWELL-SSE-LABEL: test_pmaxuw:
   2766 ; HASWELL-SSE:       # %bb.0:
   2767 ; HASWELL-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
   2768 ; HASWELL-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
   2769 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2770 ;
   2771 ; HASWELL-LABEL: test_pmaxuw:
   2772 ; HASWELL:       # %bb.0:
   2773 ; HASWELL-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2774 ; HASWELL-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2775 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2776 ;
   2777 ; BROADWELL-SSE-LABEL: test_pmaxuw:
   2778 ; BROADWELL-SSE:       # %bb.0:
   2779 ; BROADWELL-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
   2780 ; BROADWELL-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [6:0.50]
   2781 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2782 ;
   2783 ; BROADWELL-LABEL: test_pmaxuw:
   2784 ; BROADWELL:       # %bb.0:
   2785 ; BROADWELL-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2786 ; BROADWELL-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   2787 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2788 ;
   2789 ; SKYLAKE-SSE-LABEL: test_pmaxuw:
   2790 ; SKYLAKE-SSE:       # %bb.0:
   2791 ; SKYLAKE-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
   2792 ; SKYLAKE-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
   2793 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2794 ;
   2795 ; SKYLAKE-LABEL: test_pmaxuw:
   2796 ; SKYLAKE:       # %bb.0:
   2797 ; SKYLAKE-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2798 ; SKYLAKE-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2799 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2800 ;
   2801 ; SKX-SSE-LABEL: test_pmaxuw:
   2802 ; SKX-SSE:       # %bb.0:
   2803 ; SKX-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
   2804 ; SKX-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
   2805 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2806 ;
   2807 ; SKX-LABEL: test_pmaxuw:
   2808 ; SKX:       # %bb.0:
   2809 ; SKX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2810 ; SKX-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2811 ; SKX-NEXT:    retq # sched: [7:1.00]
   2812 ;
   2813 ; BTVER2-SSE-LABEL: test_pmaxuw:
   2814 ; BTVER2-SSE:       # %bb.0:
   2815 ; BTVER2-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
   2816 ; BTVER2-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [6:1.00]
   2817 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2818 ;
   2819 ; BTVER2-LABEL: test_pmaxuw:
   2820 ; BTVER2:       # %bb.0:
   2821 ; BTVER2-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2822 ; BTVER2-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   2823 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2824 ;
   2825 ; ZNVER1-SSE-LABEL: test_pmaxuw:
   2826 ; ZNVER1-SSE:       # %bb.0:
   2827 ; ZNVER1-SSE-NEXT:    pmaxuw %xmm1, %xmm0 # sched: [1:0.25]
   2828 ; ZNVER1-SSE-NEXT:    pmaxuw (%rdi), %xmm0 # sched: [8:0.50]
   2829 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2830 ;
   2831 ; ZNVER1-LABEL: test_pmaxuw:
   2832 ; ZNVER1:       # %bb.0:
   2833 ; ZNVER1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   2834 ; ZNVER1-NEXT:    vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   2835 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2836   %1 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
   2837   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   2838   %3 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %1, <8 x i16> %2)
   2839   ret <8 x i16> %3
   2840 }
   2841 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
   2842 
   2843 define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   2844 ; GENERIC-LABEL: test_pminsb:
   2845 ; GENERIC:       # %bb.0:
   2846 ; GENERIC-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
   2847 ; GENERIC-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
   2848 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2849 ;
   2850 ; SLM-LABEL: test_pminsb:
   2851 ; SLM:       # %bb.0:
   2852 ; SLM-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
   2853 ; SLM-NEXT:    pminsb (%rdi), %xmm0 # sched: [4:1.00]
   2854 ; SLM-NEXT:    retq # sched: [4:1.00]
   2855 ;
   2856 ; SANDY-SSE-LABEL: test_pminsb:
   2857 ; SANDY-SSE:       # %bb.0:
   2858 ; SANDY-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
   2859 ; SANDY-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
   2860 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2861 ;
   2862 ; SANDY-LABEL: test_pminsb:
   2863 ; SANDY:       # %bb.0:
   2864 ; SANDY-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2865 ; SANDY-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2866 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2867 ;
   2868 ; HASWELL-SSE-LABEL: test_pminsb:
   2869 ; HASWELL-SSE:       # %bb.0:
   2870 ; HASWELL-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
   2871 ; HASWELL-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
   2872 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2873 ;
   2874 ; HASWELL-LABEL: test_pminsb:
   2875 ; HASWELL:       # %bb.0:
   2876 ; HASWELL-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2877 ; HASWELL-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2878 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2879 ;
   2880 ; BROADWELL-SSE-LABEL: test_pminsb:
   2881 ; BROADWELL-SSE:       # %bb.0:
   2882 ; BROADWELL-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
   2883 ; BROADWELL-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [6:0.50]
   2884 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2885 ;
   2886 ; BROADWELL-LABEL: test_pminsb:
   2887 ; BROADWELL:       # %bb.0:
   2888 ; BROADWELL-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2889 ; BROADWELL-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   2890 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2891 ;
   2892 ; SKYLAKE-SSE-LABEL: test_pminsb:
   2893 ; SKYLAKE-SSE:       # %bb.0:
   2894 ; SKYLAKE-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
   2895 ; SKYLAKE-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
   2896 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2897 ;
   2898 ; SKYLAKE-LABEL: test_pminsb:
   2899 ; SKYLAKE:       # %bb.0:
   2900 ; SKYLAKE-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2901 ; SKYLAKE-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2902 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2903 ;
   2904 ; SKX-SSE-LABEL: test_pminsb:
   2905 ; SKX-SSE:       # %bb.0:
   2906 ; SKX-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
   2907 ; SKX-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [7:0.50]
   2908 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2909 ;
   2910 ; SKX-LABEL: test_pminsb:
   2911 ; SKX:       # %bb.0:
   2912 ; SKX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2913 ; SKX-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2914 ; SKX-NEXT:    retq # sched: [7:1.00]
   2915 ;
   2916 ; BTVER2-SSE-LABEL: test_pminsb:
   2917 ; BTVER2-SSE:       # %bb.0:
   2918 ; BTVER2-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.50]
   2919 ; BTVER2-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [6:1.00]
   2920 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2921 ;
   2922 ; BTVER2-LABEL: test_pminsb:
   2923 ; BTVER2:       # %bb.0:
   2924 ; BTVER2-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2925 ; BTVER2-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   2926 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2927 ;
   2928 ; ZNVER1-SSE-LABEL: test_pminsb:
   2929 ; ZNVER1-SSE:       # %bb.0:
   2930 ; ZNVER1-SSE-NEXT:    pminsb %xmm1, %xmm0 # sched: [1:0.25]
   2931 ; ZNVER1-SSE-NEXT:    pminsb (%rdi), %xmm0 # sched: [8:0.50]
   2932 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2933 ;
   2934 ; ZNVER1-LABEL: test_pminsb:
   2935 ; ZNVER1:       # %bb.0:
   2936 ; ZNVER1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   2937 ; ZNVER1-NEXT:    vpminsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   2938 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2939   %1 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
   2940   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   2941   %3 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %1, <16 x i8> %2)
   2942   ret <16 x i8> %3
   2943 }
   2944 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
   2945 
   2946 define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   2947 ; GENERIC-LABEL: test_pminsd:
   2948 ; GENERIC:       # %bb.0:
   2949 ; GENERIC-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
   2950 ; GENERIC-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
   2951 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2952 ;
   2953 ; SLM-LABEL: test_pminsd:
   2954 ; SLM:       # %bb.0:
   2955 ; SLM-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
   2956 ; SLM-NEXT:    pminsd (%rdi), %xmm0 # sched: [4:1.00]
   2957 ; SLM-NEXT:    retq # sched: [4:1.00]
   2958 ;
   2959 ; SANDY-SSE-LABEL: test_pminsd:
   2960 ; SANDY-SSE:       # %bb.0:
   2961 ; SANDY-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
   2962 ; SANDY-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
   2963 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2964 ;
   2965 ; SANDY-LABEL: test_pminsd:
   2966 ; SANDY:       # %bb.0:
   2967 ; SANDY-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2968 ; SANDY-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2969 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2970 ;
   2971 ; HASWELL-SSE-LABEL: test_pminsd:
   2972 ; HASWELL-SSE:       # %bb.0:
   2973 ; HASWELL-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
   2974 ; HASWELL-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
   2975 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2976 ;
   2977 ; HASWELL-LABEL: test_pminsd:
   2978 ; HASWELL:       # %bb.0:
   2979 ; HASWELL-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2980 ; HASWELL-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   2981 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2982 ;
   2983 ; BROADWELL-SSE-LABEL: test_pminsd:
   2984 ; BROADWELL-SSE:       # %bb.0:
   2985 ; BROADWELL-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
   2986 ; BROADWELL-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [6:0.50]
   2987 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2988 ;
   2989 ; BROADWELL-LABEL: test_pminsd:
   2990 ; BROADWELL:       # %bb.0:
   2991 ; BROADWELL-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2992 ; BROADWELL-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   2993 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2994 ;
   2995 ; SKYLAKE-SSE-LABEL: test_pminsd:
   2996 ; SKYLAKE-SSE:       # %bb.0:
   2997 ; SKYLAKE-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
   2998 ; SKYLAKE-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
   2999 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3000 ;
   3001 ; SKYLAKE-LABEL: test_pminsd:
   3002 ; SKYLAKE:       # %bb.0:
   3003 ; SKYLAKE-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3004 ; SKYLAKE-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3005 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3006 ;
   3007 ; SKX-SSE-LABEL: test_pminsd:
   3008 ; SKX-SSE:       # %bb.0:
   3009 ; SKX-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
   3010 ; SKX-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [7:0.50]
   3011 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3012 ;
   3013 ; SKX-LABEL: test_pminsd:
   3014 ; SKX:       # %bb.0:
   3015 ; SKX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3016 ; SKX-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3017 ; SKX-NEXT:    retq # sched: [7:1.00]
   3018 ;
   3019 ; BTVER2-SSE-LABEL: test_pminsd:
   3020 ; BTVER2-SSE:       # %bb.0:
   3021 ; BTVER2-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.50]
   3022 ; BTVER2-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [6:1.00]
   3023 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3024 ;
   3025 ; BTVER2-LABEL: test_pminsd:
   3026 ; BTVER2:       # %bb.0:
   3027 ; BTVER2-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3028 ; BTVER2-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   3029 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3030 ;
   3031 ; ZNVER1-SSE-LABEL: test_pminsd:
   3032 ; ZNVER1-SSE:       # %bb.0:
   3033 ; ZNVER1-SSE-NEXT:    pminsd %xmm1, %xmm0 # sched: [1:0.25]
   3034 ; ZNVER1-SSE-NEXT:    pminsd (%rdi), %xmm0 # sched: [8:0.50]
   3035 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3036 ;
   3037 ; ZNVER1-LABEL: test_pminsd:
   3038 ; ZNVER1:       # %bb.0:
   3039 ; ZNVER1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3040 ; ZNVER1-NEXT:    vpminsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   3041 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3042   %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
   3043   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   3044   %3 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %1, <4 x i32> %2)
   3045   ret <4 x i32> %3
   3046 }
   3047 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
   3048 
   3049 define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   3050 ; GENERIC-LABEL: test_pminud:
   3051 ; GENERIC:       # %bb.0:
   3052 ; GENERIC-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
   3053 ; GENERIC-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
   3054 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3055 ;
   3056 ; SLM-LABEL: test_pminud:
   3057 ; SLM:       # %bb.0:
   3058 ; SLM-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
   3059 ; SLM-NEXT:    pminud (%rdi), %xmm0 # sched: [4:1.00]
   3060 ; SLM-NEXT:    retq # sched: [4:1.00]
   3061 ;
   3062 ; SANDY-SSE-LABEL: test_pminud:
   3063 ; SANDY-SSE:       # %bb.0:
   3064 ; SANDY-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
   3065 ; SANDY-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
   3066 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3067 ;
   3068 ; SANDY-LABEL: test_pminud:
   3069 ; SANDY:       # %bb.0:
   3070 ; SANDY-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3071 ; SANDY-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3072 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3073 ;
   3074 ; HASWELL-SSE-LABEL: test_pminud:
   3075 ; HASWELL-SSE:       # %bb.0:
   3076 ; HASWELL-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
   3077 ; HASWELL-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
   3078 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3079 ;
   3080 ; HASWELL-LABEL: test_pminud:
   3081 ; HASWELL:       # %bb.0:
   3082 ; HASWELL-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3083 ; HASWELL-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3084 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3085 ;
   3086 ; BROADWELL-SSE-LABEL: test_pminud:
   3087 ; BROADWELL-SSE:       # %bb.0:
   3088 ; BROADWELL-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
   3089 ; BROADWELL-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [6:0.50]
   3090 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3091 ;
   3092 ; BROADWELL-LABEL: test_pminud:
   3093 ; BROADWELL:       # %bb.0:
   3094 ; BROADWELL-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3095 ; BROADWELL-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   3096 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3097 ;
   3098 ; SKYLAKE-SSE-LABEL: test_pminud:
   3099 ; SKYLAKE-SSE:       # %bb.0:
   3100 ; SKYLAKE-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
   3101 ; SKYLAKE-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
   3102 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3103 ;
   3104 ; SKYLAKE-LABEL: test_pminud:
   3105 ; SKYLAKE:       # %bb.0:
   3106 ; SKYLAKE-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3107 ; SKYLAKE-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3108 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3109 ;
   3110 ; SKX-SSE-LABEL: test_pminud:
   3111 ; SKX-SSE:       # %bb.0:
   3112 ; SKX-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
   3113 ; SKX-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [7:0.50]
   3114 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3115 ;
   3116 ; SKX-LABEL: test_pminud:
   3117 ; SKX:       # %bb.0:
   3118 ; SKX-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3119 ; SKX-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3120 ; SKX-NEXT:    retq # sched: [7:1.00]
   3121 ;
   3122 ; BTVER2-SSE-LABEL: test_pminud:
   3123 ; BTVER2-SSE:       # %bb.0:
   3124 ; BTVER2-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.50]
   3125 ; BTVER2-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [6:1.00]
   3126 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3127 ;
   3128 ; BTVER2-LABEL: test_pminud:
   3129 ; BTVER2:       # %bb.0:
   3130 ; BTVER2-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3131 ; BTVER2-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   3132 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3133 ;
   3134 ; ZNVER1-SSE-LABEL: test_pminud:
   3135 ; ZNVER1-SSE:       # %bb.0:
   3136 ; ZNVER1-SSE-NEXT:    pminud %xmm1, %xmm0 # sched: [1:0.25]
   3137 ; ZNVER1-SSE-NEXT:    pminud (%rdi), %xmm0 # sched: [8:0.50]
   3138 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3139 ;
   3140 ; ZNVER1-LABEL: test_pminud:
   3141 ; ZNVER1:       # %bb.0:
   3142 ; ZNVER1-NEXT:    vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3143 ; ZNVER1-NEXT:    vpminud (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   3144 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3145   %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
   3146   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   3147   %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> %2)
   3148   ret <4 x i32> %3
   3149 }
   3150 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
   3151 
   3152 define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   3153 ; GENERIC-LABEL: test_pminuw:
   3154 ; GENERIC:       # %bb.0:
   3155 ; GENERIC-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
   3156 ; GENERIC-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
   3157 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3158 ;
   3159 ; SLM-LABEL: test_pminuw:
   3160 ; SLM:       # %bb.0:
   3161 ; SLM-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
   3162 ; SLM-NEXT:    pminuw (%rdi), %xmm0 # sched: [4:1.00]
   3163 ; SLM-NEXT:    retq # sched: [4:1.00]
   3164 ;
   3165 ; SANDY-SSE-LABEL: test_pminuw:
   3166 ; SANDY-SSE:       # %bb.0:
   3167 ; SANDY-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
   3168 ; SANDY-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
   3169 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3170 ;
   3171 ; SANDY-LABEL: test_pminuw:
   3172 ; SANDY:       # %bb.0:
   3173 ; SANDY-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3174 ; SANDY-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3175 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3176 ;
   3177 ; HASWELL-SSE-LABEL: test_pminuw:
   3178 ; HASWELL-SSE:       # %bb.0:
   3179 ; HASWELL-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
   3180 ; HASWELL-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
   3181 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3182 ;
   3183 ; HASWELL-LABEL: test_pminuw:
   3184 ; HASWELL:       # %bb.0:
   3185 ; HASWELL-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3186 ; HASWELL-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3187 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3188 ;
   3189 ; BROADWELL-SSE-LABEL: test_pminuw:
   3190 ; BROADWELL-SSE:       # %bb.0:
   3191 ; BROADWELL-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
   3192 ; BROADWELL-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [6:0.50]
   3193 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3194 ;
   3195 ; BROADWELL-LABEL: test_pminuw:
   3196 ; BROADWELL:       # %bb.0:
   3197 ; BROADWELL-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3198 ; BROADWELL-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   3199 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3200 ;
   3201 ; SKYLAKE-SSE-LABEL: test_pminuw:
   3202 ; SKYLAKE-SSE:       # %bb.0:
   3203 ; SKYLAKE-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
   3204 ; SKYLAKE-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
   3205 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3206 ;
   3207 ; SKYLAKE-LABEL: test_pminuw:
   3208 ; SKYLAKE:       # %bb.0:
   3209 ; SKYLAKE-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3210 ; SKYLAKE-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3211 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3212 ;
   3213 ; SKX-SSE-LABEL: test_pminuw:
   3214 ; SKX-SSE:       # %bb.0:
   3215 ; SKX-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
   3216 ; SKX-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [7:0.50]
   3217 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3218 ;
   3219 ; SKX-LABEL: test_pminuw:
   3220 ; SKX:       # %bb.0:
   3221 ; SKX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3222 ; SKX-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3223 ; SKX-NEXT:    retq # sched: [7:1.00]
   3224 ;
   3225 ; BTVER2-SSE-LABEL: test_pminuw:
   3226 ; BTVER2-SSE:       # %bb.0:
   3227 ; BTVER2-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.50]
   3228 ; BTVER2-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [6:1.00]
   3229 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3230 ;
   3231 ; BTVER2-LABEL: test_pminuw:
   3232 ; BTVER2:       # %bb.0:
   3233 ; BTVER2-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3234 ; BTVER2-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   3235 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3236 ;
   3237 ; ZNVER1-SSE-LABEL: test_pminuw:
   3238 ; ZNVER1-SSE:       # %bb.0:
   3239 ; ZNVER1-SSE-NEXT:    pminuw %xmm1, %xmm0 # sched: [1:0.25]
   3240 ; ZNVER1-SSE-NEXT:    pminuw (%rdi), %xmm0 # sched: [8:0.50]
   3241 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3242 ;
   3243 ; ZNVER1-LABEL: test_pminuw:
   3244 ; ZNVER1:       # %bb.0:
   3245 ; ZNVER1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3246 ; ZNVER1-NEXT:    vpminuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   3247 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3248   %1 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
   3249   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   3250   %3 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %1, <8 x i16> %2)
   3251   ret <8 x i16> %3
   3252 }
   3253 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
   3254 
   3255 define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
   3256 ; GENERIC-LABEL: test_pmovsxbw:
   3257 ; GENERIC:       # %bb.0:
   3258 ; GENERIC-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
   3259 ; GENERIC-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
   3260 ; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   3261 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3262 ;
   3263 ; SLM-LABEL: test_pmovsxbw:
   3264 ; SLM:       # %bb.0:
   3265 ; SLM-NEXT:    pmovsxbw (%rdi), %xmm1 # sched: [4:1.00]
   3266 ; SLM-NEXT:    pmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
   3267 ; SLM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
   3268 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   3269 ; SLM-NEXT:    retq # sched: [4:1.00]
   3270 ;
   3271 ; SANDY-SSE-LABEL: test_pmovsxbw:
   3272 ; SANDY-SSE:       # %bb.0:
   3273 ; SANDY-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
   3274 ; SANDY-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
   3275 ; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   3276 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3277 ;
   3278 ; SANDY-LABEL: test_pmovsxbw:
   3279 ; SANDY:       # %bb.0:
   3280 ; SANDY-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
   3281 ; SANDY-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
   3282 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3283 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3284 ;
   3285 ; HASWELL-SSE-LABEL: test_pmovsxbw:
   3286 ; HASWELL-SSE:       # %bb.0:
   3287 ; HASWELL-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
   3288 ; HASWELL-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
   3289 ; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   3290 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3291 ;
   3292 ; HASWELL-LABEL: test_pmovsxbw:
   3293 ; HASWELL:       # %bb.0:
   3294 ; HASWELL-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
   3295 ; HASWELL-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
   3296 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3297 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3298 ;
   3299 ; BROADWELL-SSE-LABEL: test_pmovsxbw:
   3300 ; BROADWELL-SSE:       # %bb.0:
   3301 ; BROADWELL-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
   3302 ; BROADWELL-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
   3303 ; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   3304 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3305 ;
   3306 ; BROADWELL-LABEL: test_pmovsxbw:
   3307 ; BROADWELL:       # %bb.0:
   3308 ; BROADWELL-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
   3309 ; BROADWELL-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
   3310 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3311 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3312 ;
   3313 ; SKYLAKE-SSE-LABEL: test_pmovsxbw:
   3314 ; SKYLAKE-SSE:       # %bb.0:
   3315 ; SKYLAKE-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
   3316 ; SKYLAKE-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
   3317 ; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   3318 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3319 ;
   3320 ; SKYLAKE-LABEL: test_pmovsxbw:
   3321 ; SKYLAKE:       # %bb.0:
   3322 ; SKYLAKE-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
   3323 ; SKYLAKE-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
   3324 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3325 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3326 ;
   3327 ; SKX-SSE-LABEL: test_pmovsxbw:
   3328 ; SKX-SSE:       # %bb.0:
   3329 ; SKX-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
   3330 ; SKX-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
   3331 ; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   3332 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3333 ;
   3334 ; SKX-LABEL: test_pmovsxbw:
   3335 ; SKX:       # %bb.0:
   3336 ; SKX-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
   3337 ; SKX-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
   3338 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3339 ; SKX-NEXT:    retq # sched: [7:1.00]
   3340 ;
   3341 ; BTVER2-SSE-LABEL: test_pmovsxbw:
   3342 ; BTVER2-SSE:       # %bb.0:
   3343 ; BTVER2-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
   3344 ; BTVER2-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
   3345 ; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   3346 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3347 ;
   3348 ; BTVER2-LABEL: test_pmovsxbw:
   3349 ; BTVER2:       # %bb.0:
   3350 ; BTVER2-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
   3351 ; BTVER2-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
   3352 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3353 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3354 ;
   3355 ; ZNVER1-SSE-LABEL: test_pmovsxbw:
   3356 ; ZNVER1-SSE:       # %bb.0:
   3357 ; ZNVER1-SSE-NEXT:    pmovsxbw %xmm0, %xmm1 # sched: [1:0.25]
   3358 ; ZNVER1-SSE-NEXT:    pmovsxbw (%rdi), %xmm0 # sched: [8:0.50]
   3359 ; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
   3360 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3361 ;
   3362 ; ZNVER1-LABEL: test_pmovsxbw:
   3363 ; ZNVER1:       # %bb.0:
   3364 ; ZNVER1-NEXT:    vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50]
   3365 ; ZNVER1-NEXT:    vpmovsxbw %xmm0, %xmm0 # sched: [1:0.25]
   3366 ; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3367 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3368   %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   3369   %2 = sext <8 x i8> %1 to <8 x i16>
   3370   %3 = load <8 x i8>, <8 x i8>* %a1, align 1
   3371   %4 = sext <8 x i8> %3 to <8 x i16>
   3372   %5 = add <8 x i16> %2, %4
   3373   ret <8 x i16> %5
   3374 }
   3375 
   3376 define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
   3377 ; GENERIC-LABEL: test_pmovsxbd:
   3378 ; GENERIC:       # %bb.0:
   3379 ; GENERIC-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
   3380 ; GENERIC-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
   3381 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3382 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3383 ;
   3384 ; SLM-LABEL: test_pmovsxbd:
   3385 ; SLM:       # %bb.0:
   3386 ; SLM-NEXT:    pmovsxbd (%rdi), %xmm1 # sched: [4:1.00]
   3387 ; SLM-NEXT:    pmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
   3388 ; SLM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   3389 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   3390 ; SLM-NEXT:    retq # sched: [4:1.00]
   3391 ;
   3392 ; SANDY-SSE-LABEL: test_pmovsxbd:
   3393 ; SANDY-SSE:       # %bb.0:
   3394 ; SANDY-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
   3395 ; SANDY-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
   3396 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3397 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3398 ;
   3399 ; SANDY-LABEL: test_pmovsxbd:
   3400 ; SANDY:       # %bb.0:
   3401 ; SANDY-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
   3402 ; SANDY-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
   3403 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3404 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3405 ;
   3406 ; HASWELL-SSE-LABEL: test_pmovsxbd:
   3407 ; HASWELL-SSE:       # %bb.0:
   3408 ; HASWELL-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
   3409 ; HASWELL-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
   3410 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3411 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3412 ;
   3413 ; HASWELL-LABEL: test_pmovsxbd:
   3414 ; HASWELL:       # %bb.0:
   3415 ; HASWELL-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
   3416 ; HASWELL-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
   3417 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3418 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3419 ;
   3420 ; BROADWELL-SSE-LABEL: test_pmovsxbd:
   3421 ; BROADWELL-SSE:       # %bb.0:
   3422 ; BROADWELL-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
   3423 ; BROADWELL-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
   3424 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3425 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3426 ;
   3427 ; BROADWELL-LABEL: test_pmovsxbd:
   3428 ; BROADWELL:       # %bb.0:
   3429 ; BROADWELL-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
   3430 ; BROADWELL-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
   3431 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3432 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3433 ;
   3434 ; SKYLAKE-SSE-LABEL: test_pmovsxbd:
   3435 ; SKYLAKE-SSE:       # %bb.0:
   3436 ; SKYLAKE-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
   3437 ; SKYLAKE-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
   3438 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   3439 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3440 ;
   3441 ; SKYLAKE-LABEL: test_pmovsxbd:
   3442 ; SKYLAKE:       # %bb.0:
   3443 ; SKYLAKE-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
   3444 ; SKYLAKE-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
   3445 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3446 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3447 ;
   3448 ; SKX-SSE-LABEL: test_pmovsxbd:
   3449 ; SKX-SSE:       # %bb.0:
   3450 ; SKX-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
   3451 ; SKX-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
   3452 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   3453 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3454 ;
   3455 ; SKX-LABEL: test_pmovsxbd:
   3456 ; SKX:       # %bb.0:
   3457 ; SKX-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
   3458 ; SKX-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
   3459 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3460 ; SKX-NEXT:    retq # sched: [7:1.00]
   3461 ;
   3462 ; BTVER2-SSE-LABEL: test_pmovsxbd:
   3463 ; BTVER2-SSE:       # %bb.0:
   3464 ; BTVER2-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
   3465 ; BTVER2-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
   3466 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3467 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3468 ;
   3469 ; BTVER2-LABEL: test_pmovsxbd:
   3470 ; BTVER2:       # %bb.0:
   3471 ; BTVER2-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
   3472 ; BTVER2-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
   3473 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3474 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3475 ;
   3476 ; ZNVER1-SSE-LABEL: test_pmovsxbd:
   3477 ; ZNVER1-SSE:       # %bb.0:
   3478 ; ZNVER1-SSE-NEXT:    pmovsxbd %xmm0, %xmm1 # sched: [1:0.25]
   3479 ; ZNVER1-SSE-NEXT:    pmovsxbd (%rdi), %xmm0 # sched: [8:0.50]
   3480 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   3481 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3482 ;
   3483 ; ZNVER1-LABEL: test_pmovsxbd:
   3484 ; ZNVER1:       # %bb.0:
   3485 ; ZNVER1-NEXT:    vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50]
   3486 ; ZNVER1-NEXT:    vpmovsxbd %xmm0, %xmm0 # sched: [1:0.25]
   3487 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3488 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3489   %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   3490   %2 = sext <4 x i8> %1 to <4 x i32>
   3491   %3 = load <4 x i8>, <4 x i8>* %a1, align 1
   3492   %4 = sext <4 x i8> %3 to <4 x i32>
   3493   %5 = add <4 x i32> %2, %4
   3494   ret <4 x i32> %5
   3495 }
   3496 
   3497 define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
   3498 ; GENERIC-LABEL: test_pmovsxbq:
   3499 ; GENERIC:       # %bb.0:
   3500 ; GENERIC-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
   3501 ; GENERIC-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
   3502 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3503 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3504 ;
   3505 ; SLM-LABEL: test_pmovsxbq:
   3506 ; SLM:       # %bb.0:
   3507 ; SLM-NEXT:    pmovsxbq (%rdi), %xmm1 # sched: [4:1.00]
   3508 ; SLM-NEXT:    pmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
   3509 ; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   3510 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   3511 ; SLM-NEXT:    retq # sched: [4:1.00]
   3512 ;
   3513 ; SANDY-SSE-LABEL: test_pmovsxbq:
   3514 ; SANDY-SSE:       # %bb.0:
   3515 ; SANDY-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
   3516 ; SANDY-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
   3517 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3518 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3519 ;
   3520 ; SANDY-LABEL: test_pmovsxbq:
   3521 ; SANDY:       # %bb.0:
   3522 ; SANDY-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
   3523 ; SANDY-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
   3524 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3525 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3526 ;
   3527 ; HASWELL-SSE-LABEL: test_pmovsxbq:
   3528 ; HASWELL-SSE:       # %bb.0:
   3529 ; HASWELL-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
   3530 ; HASWELL-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
   3531 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3532 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3533 ;
   3534 ; HASWELL-LABEL: test_pmovsxbq:
   3535 ; HASWELL:       # %bb.0:
   3536 ; HASWELL-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
   3537 ; HASWELL-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
   3538 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3539 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3540 ;
   3541 ; BROADWELL-SSE-LABEL: test_pmovsxbq:
   3542 ; BROADWELL-SSE:       # %bb.0:
   3543 ; BROADWELL-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
   3544 ; BROADWELL-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
   3545 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3546 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3547 ;
   3548 ; BROADWELL-LABEL: test_pmovsxbq:
   3549 ; BROADWELL:       # %bb.0:
   3550 ; BROADWELL-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
   3551 ; BROADWELL-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
   3552 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3553 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3554 ;
   3555 ; SKYLAKE-SSE-LABEL: test_pmovsxbq:
   3556 ; SKYLAKE-SSE:       # %bb.0:
   3557 ; SKYLAKE-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
   3558 ; SKYLAKE-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
   3559 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   3560 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3561 ;
   3562 ; SKYLAKE-LABEL: test_pmovsxbq:
   3563 ; SKYLAKE:       # %bb.0:
   3564 ; SKYLAKE-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
   3565 ; SKYLAKE-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
   3566 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3567 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3568 ;
   3569 ; SKX-SSE-LABEL: test_pmovsxbq:
   3570 ; SKX-SSE:       # %bb.0:
   3571 ; SKX-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
   3572 ; SKX-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
   3573 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   3574 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3575 ;
   3576 ; SKX-LABEL: test_pmovsxbq:
   3577 ; SKX:       # %bb.0:
   3578 ; SKX-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
   3579 ; SKX-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
   3580 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3581 ; SKX-NEXT:    retq # sched: [7:1.00]
   3582 ;
   3583 ; BTVER2-SSE-LABEL: test_pmovsxbq:
   3584 ; BTVER2-SSE:       # %bb.0:
   3585 ; BTVER2-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
   3586 ; BTVER2-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
   3587 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3588 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3589 ;
   3590 ; BTVER2-LABEL: test_pmovsxbq:
   3591 ; BTVER2:       # %bb.0:
   3592 ; BTVER2-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
   3593 ; BTVER2-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
   3594 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3595 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3596 ;
   3597 ; ZNVER1-SSE-LABEL: test_pmovsxbq:
   3598 ; ZNVER1-SSE:       # %bb.0:
   3599 ; ZNVER1-SSE-NEXT:    pmovsxbq %xmm0, %xmm1 # sched: [1:0.25]
   3600 ; ZNVER1-SSE-NEXT:    pmovsxbq (%rdi), %xmm0 # sched: [8:0.50]
   3601 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   3602 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3603 ;
   3604 ; ZNVER1-LABEL: test_pmovsxbq:
   3605 ; ZNVER1:       # %bb.0:
   3606 ; ZNVER1-NEXT:    vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50]
   3607 ; ZNVER1-NEXT:    vpmovsxbq %xmm0, %xmm0 # sched: [1:0.25]
   3608 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3609 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3610   %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
   3611   %2 = sext <2 x i8> %1 to <2 x i64>
   3612   %3 = load <2 x i8>, <2 x i8>* %a1, align 1
   3613   %4 = sext <2 x i8> %3 to <2 x i64>
   3614   %5 = add <2 x i64> %2, %4
   3615   ret <2 x i64> %5
   3616 }
   3617 
   3618 define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
   3619 ; GENERIC-LABEL: test_pmovsxdq:
   3620 ; GENERIC:       # %bb.0:
   3621 ; GENERIC-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
   3622 ; GENERIC-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
   3623 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3624 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3625 ;
   3626 ; SLM-LABEL: test_pmovsxdq:
   3627 ; SLM:       # %bb.0:
   3628 ; SLM-NEXT:    pmovsxdq (%rdi), %xmm1 # sched: [4:1.00]
   3629 ; SLM-NEXT:    pmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
   3630 ; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   3631 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   3632 ; SLM-NEXT:    retq # sched: [4:1.00]
   3633 ;
   3634 ; SANDY-SSE-LABEL: test_pmovsxdq:
   3635 ; SANDY-SSE:       # %bb.0:
   3636 ; SANDY-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
   3637 ; SANDY-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
   3638 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3639 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3640 ;
   3641 ; SANDY-LABEL: test_pmovsxdq:
   3642 ; SANDY:       # %bb.0:
   3643 ; SANDY-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
   3644 ; SANDY-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
   3645 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3646 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3647 ;
   3648 ; HASWELL-SSE-LABEL: test_pmovsxdq:
   3649 ; HASWELL-SSE:       # %bb.0:
   3650 ; HASWELL-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
   3651 ; HASWELL-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
   3652 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3653 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3654 ;
   3655 ; HASWELL-LABEL: test_pmovsxdq:
   3656 ; HASWELL:       # %bb.0:
   3657 ; HASWELL-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
   3658 ; HASWELL-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
   3659 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3660 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3661 ;
   3662 ; BROADWELL-SSE-LABEL: test_pmovsxdq:
   3663 ; BROADWELL-SSE:       # %bb.0:
   3664 ; BROADWELL-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
   3665 ; BROADWELL-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
   3666 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3667 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3668 ;
   3669 ; BROADWELL-LABEL: test_pmovsxdq:
   3670 ; BROADWELL:       # %bb.0:
   3671 ; BROADWELL-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
   3672 ; BROADWELL-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
   3673 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3674 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3675 ;
   3676 ; SKYLAKE-SSE-LABEL: test_pmovsxdq:
   3677 ; SKYLAKE-SSE:       # %bb.0:
   3678 ; SKYLAKE-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
   3679 ; SKYLAKE-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
   3680 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   3681 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3682 ;
   3683 ; SKYLAKE-LABEL: test_pmovsxdq:
   3684 ; SKYLAKE:       # %bb.0:
   3685 ; SKYLAKE-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
   3686 ; SKYLAKE-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
   3687 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3688 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3689 ;
   3690 ; SKX-SSE-LABEL: test_pmovsxdq:
   3691 ; SKX-SSE:       # %bb.0:
   3692 ; SKX-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
   3693 ; SKX-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
   3694 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   3695 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3696 ;
   3697 ; SKX-LABEL: test_pmovsxdq:
   3698 ; SKX:       # %bb.0:
   3699 ; SKX-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
   3700 ; SKX-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
   3701 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3702 ; SKX-NEXT:    retq # sched: [7:1.00]
   3703 ;
   3704 ; BTVER2-SSE-LABEL: test_pmovsxdq:
   3705 ; BTVER2-SSE:       # %bb.0:
   3706 ; BTVER2-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
   3707 ; BTVER2-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
   3708 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3709 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3710 ;
   3711 ; BTVER2-LABEL: test_pmovsxdq:
   3712 ; BTVER2:       # %bb.0:
   3713 ; BTVER2-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
   3714 ; BTVER2-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
   3715 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3716 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3717 ;
   3718 ; ZNVER1-SSE-LABEL: test_pmovsxdq:
   3719 ; ZNVER1-SSE:       # %bb.0:
   3720 ; ZNVER1-SSE-NEXT:    pmovsxdq %xmm0, %xmm1 # sched: [1:0.25]
   3721 ; ZNVER1-SSE-NEXT:    pmovsxdq (%rdi), %xmm0 # sched: [8:0.50]
   3722 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   3723 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3724 ;
   3725 ; ZNVER1-LABEL: test_pmovsxdq:
   3726 ; ZNVER1:       # %bb.0:
   3727 ; ZNVER1-NEXT:    vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50]
   3728 ; ZNVER1-NEXT:    vpmovsxdq %xmm0, %xmm0 # sched: [1:0.25]
   3729 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3730 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3731   %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
   3732   %2 = sext <2 x i32> %1 to <2 x i64>
   3733   %3 = load <2 x i32>, <2 x i32>* %a1, align 1
   3734   %4 = sext <2 x i32> %3 to <2 x i64>
   3735   %5 = add <2 x i64> %2, %4
   3736   ret <2 x i64> %5
   3737 }
   3738 
   3739 define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
   3740 ; GENERIC-LABEL: test_pmovsxwd:
   3741 ; GENERIC:       # %bb.0:
   3742 ; GENERIC-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
   3743 ; GENERIC-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
   3744 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3745 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3746 ;
   3747 ; SLM-LABEL: test_pmovsxwd:
   3748 ; SLM:       # %bb.0:
   3749 ; SLM-NEXT:    pmovsxwd (%rdi), %xmm1 # sched: [4:1.00]
   3750 ; SLM-NEXT:    pmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
   3751 ; SLM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   3752 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   3753 ; SLM-NEXT:    retq # sched: [4:1.00]
   3754 ;
   3755 ; SANDY-SSE-LABEL: test_pmovsxwd:
   3756 ; SANDY-SSE:       # %bb.0:
   3757 ; SANDY-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
   3758 ; SANDY-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
   3759 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3760 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3761 ;
   3762 ; SANDY-LABEL: test_pmovsxwd:
   3763 ; SANDY:       # %bb.0:
   3764 ; SANDY-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
   3765 ; SANDY-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
   3766 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3767 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3768 ;
   3769 ; HASWELL-SSE-LABEL: test_pmovsxwd:
   3770 ; HASWELL-SSE:       # %bb.0:
   3771 ; HASWELL-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
   3772 ; HASWELL-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
   3773 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3774 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3775 ;
   3776 ; HASWELL-LABEL: test_pmovsxwd:
   3777 ; HASWELL:       # %bb.0:
   3778 ; HASWELL-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
   3779 ; HASWELL-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
   3780 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3781 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3782 ;
   3783 ; BROADWELL-SSE-LABEL: test_pmovsxwd:
   3784 ; BROADWELL-SSE:       # %bb.0:
   3785 ; BROADWELL-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
   3786 ; BROADWELL-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
   3787 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3788 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3789 ;
   3790 ; BROADWELL-LABEL: test_pmovsxwd:
   3791 ; BROADWELL:       # %bb.0:
   3792 ; BROADWELL-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
   3793 ; BROADWELL-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
   3794 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3795 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3796 ;
   3797 ; SKYLAKE-SSE-LABEL: test_pmovsxwd:
   3798 ; SKYLAKE-SSE:       # %bb.0:
   3799 ; SKYLAKE-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
   3800 ; SKYLAKE-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
   3801 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   3802 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3803 ;
   3804 ; SKYLAKE-LABEL: test_pmovsxwd:
   3805 ; SKYLAKE:       # %bb.0:
   3806 ; SKYLAKE-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
   3807 ; SKYLAKE-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
   3808 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3809 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3810 ;
   3811 ; SKX-SSE-LABEL: test_pmovsxwd:
   3812 ; SKX-SSE:       # %bb.0:
   3813 ; SKX-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
   3814 ; SKX-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
   3815 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   3816 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3817 ;
   3818 ; SKX-LABEL: test_pmovsxwd:
   3819 ; SKX:       # %bb.0:
   3820 ; SKX-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
   3821 ; SKX-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
   3822 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3823 ; SKX-NEXT:    retq # sched: [7:1.00]
   3824 ;
   3825 ; BTVER2-SSE-LABEL: test_pmovsxwd:
   3826 ; BTVER2-SSE:       # %bb.0:
   3827 ; BTVER2-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
   3828 ; BTVER2-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
   3829 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   3830 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3831 ;
   3832 ; BTVER2-LABEL: test_pmovsxwd:
   3833 ; BTVER2:       # %bb.0:
   3834 ; BTVER2-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
   3835 ; BTVER2-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
   3836 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3837 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3838 ;
   3839 ; ZNVER1-SSE-LABEL: test_pmovsxwd:
   3840 ; ZNVER1-SSE:       # %bb.0:
   3841 ; ZNVER1-SSE-NEXT:    pmovsxwd %xmm0, %xmm1 # sched: [1:0.25]
   3842 ; ZNVER1-SSE-NEXT:    pmovsxwd (%rdi), %xmm0 # sched: [8:0.50]
   3843 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   3844 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3845 ;
   3846 ; ZNVER1-LABEL: test_pmovsxwd:
   3847 ; ZNVER1:       # %bb.0:
   3848 ; ZNVER1-NEXT:    vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50]
   3849 ; ZNVER1-NEXT:    vpmovsxwd %xmm0, %xmm0 # sched: [1:0.25]
   3850 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3851 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3852   %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   3853   %2 = sext <4 x i16> %1 to <4 x i32>
   3854   %3 = load <4 x i16>, <4 x i16>* %a1, align 1
   3855   %4 = sext <4 x i16> %3 to <4 x i32>
   3856   %5 = add <4 x i32> %2, %4
   3857   ret <4 x i32> %5
   3858 }
   3859 
   3860 define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
   3861 ; GENERIC-LABEL: test_pmovsxwq:
   3862 ; GENERIC:       # %bb.0:
   3863 ; GENERIC-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
   3864 ; GENERIC-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
   3865 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3866 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3867 ;
   3868 ; SLM-LABEL: test_pmovsxwq:
   3869 ; SLM:       # %bb.0:
   3870 ; SLM-NEXT:    pmovsxwq (%rdi), %xmm1 # sched: [4:1.00]
   3871 ; SLM-NEXT:    pmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
   3872 ; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   3873 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   3874 ; SLM-NEXT:    retq # sched: [4:1.00]
   3875 ;
   3876 ; SANDY-SSE-LABEL: test_pmovsxwq:
   3877 ; SANDY-SSE:       # %bb.0:
   3878 ; SANDY-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
   3879 ; SANDY-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
   3880 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3881 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3882 ;
   3883 ; SANDY-LABEL: test_pmovsxwq:
   3884 ; SANDY:       # %bb.0:
   3885 ; SANDY-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
   3886 ; SANDY-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
   3887 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3888 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3889 ;
   3890 ; HASWELL-SSE-LABEL: test_pmovsxwq:
   3891 ; HASWELL-SSE:       # %bb.0:
   3892 ; HASWELL-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
   3893 ; HASWELL-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
   3894 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3895 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3896 ;
   3897 ; HASWELL-LABEL: test_pmovsxwq:
   3898 ; HASWELL:       # %bb.0:
   3899 ; HASWELL-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
   3900 ; HASWELL-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
   3901 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3902 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3903 ;
   3904 ; BROADWELL-SSE-LABEL: test_pmovsxwq:
   3905 ; BROADWELL-SSE:       # %bb.0:
   3906 ; BROADWELL-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
   3907 ; BROADWELL-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
   3908 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3909 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3910 ;
   3911 ; BROADWELL-LABEL: test_pmovsxwq:
   3912 ; BROADWELL:       # %bb.0:
   3913 ; BROADWELL-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
   3914 ; BROADWELL-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
   3915 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3916 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3917 ;
   3918 ; SKYLAKE-SSE-LABEL: test_pmovsxwq:
   3919 ; SKYLAKE-SSE:       # %bb.0:
   3920 ; SKYLAKE-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
   3921 ; SKYLAKE-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
   3922 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   3923 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3924 ;
   3925 ; SKYLAKE-LABEL: test_pmovsxwq:
   3926 ; SKYLAKE:       # %bb.0:
   3927 ; SKYLAKE-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
   3928 ; SKYLAKE-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
   3929 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3930 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3931 ;
   3932 ; SKX-SSE-LABEL: test_pmovsxwq:
   3933 ; SKX-SSE:       # %bb.0:
   3934 ; SKX-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
   3935 ; SKX-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
   3936 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   3937 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3938 ;
   3939 ; SKX-LABEL: test_pmovsxwq:
   3940 ; SKX:       # %bb.0:
   3941 ; SKX-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
   3942 ; SKX-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
   3943 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3944 ; SKX-NEXT:    retq # sched: [7:1.00]
   3945 ;
   3946 ; BTVER2-SSE-LABEL: test_pmovsxwq:
   3947 ; BTVER2-SSE:       # %bb.0:
   3948 ; BTVER2-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
   3949 ; BTVER2-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
   3950 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   3951 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3952 ;
   3953 ; BTVER2-LABEL: test_pmovsxwq:
   3954 ; BTVER2:       # %bb.0:
   3955 ; BTVER2-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
   3956 ; BTVER2-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
   3957 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3958 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3959 ;
   3960 ; ZNVER1-SSE-LABEL: test_pmovsxwq:
   3961 ; ZNVER1-SSE:       # %bb.0:
   3962 ; ZNVER1-SSE-NEXT:    pmovsxwq %xmm0, %xmm1 # sched: [1:0.25]
   3963 ; ZNVER1-SSE-NEXT:    pmovsxwq (%rdi), %xmm0 # sched: [8:0.50]
   3964 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   3965 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3966 ;
   3967 ; ZNVER1-LABEL: test_pmovsxwq:
   3968 ; ZNVER1:       # %bb.0:
   3969 ; ZNVER1-NEXT:    vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50]
   3970 ; ZNVER1-NEXT:    vpmovsxwq %xmm0, %xmm0 # sched: [1:0.25]
   3971 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3972 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3973   %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
   3974   %2 = sext <2 x i16> %1 to <2 x i64>
   3975   %3 = load <2 x i16>, <2 x i16>* %a1, align 1
   3976   %4 = sext <2 x i16> %3 to <2 x i64>
   3977   %5 = add <2 x i64> %2, %4
   3978   ret <2 x i64> %5
   3979 }
   3980 
   3981 define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
   3982 ; GENERIC-LABEL: test_pmovzxbw:
   3983 ; GENERIC:       # %bb.0:
   3984 ; GENERIC-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
   3985 ; GENERIC-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
   3986 ; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   3987 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3988 ;
   3989 ; SLM-LABEL: test_pmovzxbw:
   3990 ; SLM:       # %bb.0:
   3991 ; SLM-NEXT:    pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [4:1.00]
   3992 ; SLM-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   3993 ; SLM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
   3994 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   3995 ; SLM-NEXT:    retq # sched: [4:1.00]
   3996 ;
   3997 ; SANDY-SSE-LABEL: test_pmovzxbw:
   3998 ; SANDY-SSE:       # %bb.0:
   3999 ; SANDY-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
   4000 ; SANDY-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
   4001 ; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   4002 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4003 ;
   4004 ; SANDY-LABEL: test_pmovzxbw:
   4005 ; SANDY:       # %bb.0:
   4006 ; SANDY-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
   4007 ; SANDY-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
   4008 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4009 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4010 ;
   4011 ; HASWELL-SSE-LABEL: test_pmovzxbw:
   4012 ; HASWELL-SSE:       # %bb.0:
   4013 ; HASWELL-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   4014 ; HASWELL-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4015 ; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   4016 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4017 ;
   4018 ; HASWELL-LABEL: test_pmovzxbw:
   4019 ; HASWELL:       # %bb.0:
   4020 ; HASWELL-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   4021 ; HASWELL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4022 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4023 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4024 ;
   4025 ; BROADWELL-SSE-LABEL: test_pmovzxbw:
   4026 ; BROADWELL-SSE:       # %bb.0:
   4027 ; BROADWELL-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   4028 ; BROADWELL-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4029 ; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   4030 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4031 ;
   4032 ; BROADWELL-LABEL: test_pmovzxbw:
   4033 ; BROADWELL:       # %bb.0:
   4034 ; BROADWELL-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   4035 ; BROADWELL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4036 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4037 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4038 ;
   4039 ; SKYLAKE-SSE-LABEL: test_pmovzxbw:
   4040 ; SKYLAKE-SSE:       # %bb.0:
   4041 ; SKYLAKE-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   4042 ; SKYLAKE-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4043 ; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   4044 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4045 ;
   4046 ; SKYLAKE-LABEL: test_pmovzxbw:
   4047 ; SKYLAKE:       # %bb.0:
   4048 ; SKYLAKE-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   4049 ; SKYLAKE-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4050 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4051 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4052 ;
   4053 ; SKX-SSE-LABEL: test_pmovzxbw:
   4054 ; SKX-SSE:       # %bb.0:
   4055 ; SKX-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   4056 ; SKX-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4057 ; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   4058 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4059 ;
   4060 ; SKX-LABEL: test_pmovzxbw:
   4061 ; SKX:       # %bb.0:
   4062 ; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
   4063 ; SKX-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4064 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4065 ; SKX-NEXT:    retq # sched: [7:1.00]
   4066 ;
   4067 ; BTVER2-SSE-LABEL: test_pmovzxbw:
   4068 ; BTVER2-SSE:       # %bb.0:
   4069 ; BTVER2-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
   4070 ; BTVER2-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4071 ; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   4072 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4073 ;
   4074 ; BTVER2-LABEL: test_pmovzxbw:
   4075 ; BTVER2:       # %bb.0:
   4076 ; BTVER2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
   4077 ; BTVER2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
   4078 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4079 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4080 ;
   4081 ; ZNVER1-SSE-LABEL: test_pmovzxbw:
   4082 ; ZNVER1-SSE:       # %bb.0:
   4083 ; ZNVER1-SSE-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
   4084 ; ZNVER1-SSE-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
   4085 ; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
   4086 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4087 ;
   4088 ; ZNVER1-LABEL: test_pmovzxbw:
   4089 ; ZNVER1:       # %bb.0:
   4090 ; ZNVER1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
   4091 ; ZNVER1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
   4092 ; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   4093 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4094   %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   4095   %2 = zext <8 x i8> %1 to <8 x i16>
   4096   %3 = load <8 x i8>, <8 x i8>* %a1, align 1
   4097   %4 = zext <8 x i8> %3 to <8 x i16>
   4098   %5 = add <8 x i16> %2, %4
   4099   ret <8 x i16> %5
   4100 }
   4101 
   4102 define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
   4103 ; GENERIC-LABEL: test_pmovzxbd:
   4104 ; GENERIC:       # %bb.0:
   4105 ; GENERIC-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
   4106 ; GENERIC-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
   4107 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4108 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4109 ;
   4110 ; SLM-LABEL: test_pmovzxbd:
   4111 ; SLM:       # %bb.0:
   4112 ; SLM-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [4:1.00]
   4113 ; SLM-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4114 ; SLM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4115 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   4116 ; SLM-NEXT:    retq # sched: [4:1.00]
   4117 ;
   4118 ; SANDY-SSE-LABEL: test_pmovzxbd:
   4119 ; SANDY-SSE:       # %bb.0:
   4120 ; SANDY-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
   4121 ; SANDY-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
   4122 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4123 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4124 ;
   4125 ; SANDY-LABEL: test_pmovzxbd:
   4126 ; SANDY:       # %bb.0:
   4127 ; SANDY-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
   4128 ; SANDY-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
   4129 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4130 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4131 ;
   4132 ; HASWELL-SSE-LABEL: test_pmovzxbd:
   4133 ; HASWELL-SSE:       # %bb.0:
   4134 ; HASWELL-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4135 ; HASWELL-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4136 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4137 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4138 ;
   4139 ; HASWELL-LABEL: test_pmovzxbd:
   4140 ; HASWELL:       # %bb.0:
   4141 ; HASWELL-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4142 ; HASWELL-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4143 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4144 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4145 ;
   4146 ; BROADWELL-SSE-LABEL: test_pmovzxbd:
   4147 ; BROADWELL-SSE:       # %bb.0:
   4148 ; BROADWELL-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4149 ; BROADWELL-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4150 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4151 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4152 ;
   4153 ; BROADWELL-LABEL: test_pmovzxbd:
   4154 ; BROADWELL:       # %bb.0:
   4155 ; BROADWELL-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4156 ; BROADWELL-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4157 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4158 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4159 ;
   4160 ; SKYLAKE-SSE-LABEL: test_pmovzxbd:
   4161 ; SKYLAKE-SSE:       # %bb.0:
   4162 ; SKYLAKE-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4163 ; SKYLAKE-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4164 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   4165 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4166 ;
   4167 ; SKYLAKE-LABEL: test_pmovzxbd:
   4168 ; SKYLAKE:       # %bb.0:
   4169 ; SKYLAKE-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4170 ; SKYLAKE-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4171 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4172 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4173 ;
   4174 ; SKX-SSE-LABEL: test_pmovzxbd:
   4175 ; SKX-SSE:       # %bb.0:
   4176 ; SKX-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4177 ; SKX-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4178 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   4179 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4180 ;
   4181 ; SKX-LABEL: test_pmovzxbd:
   4182 ; SKX:       # %bb.0:
   4183 ; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
   4184 ; SKX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4185 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4186 ; SKX-NEXT:    retq # sched: [7:1.00]
   4187 ;
   4188 ; BTVER2-SSE-LABEL: test_pmovzxbd:
   4189 ; BTVER2-SSE:       # %bb.0:
   4190 ; BTVER2-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
   4191 ; BTVER2-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4192 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4193 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4194 ;
   4195 ; BTVER2-LABEL: test_pmovzxbd:
   4196 ; BTVER2:       # %bb.0:
   4197 ; BTVER2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
   4198 ; BTVER2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
   4199 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4200 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4201 ;
   4202 ; ZNVER1-SSE-LABEL: test_pmovzxbd:
   4203 ; ZNVER1-SSE:       # %bb.0:
   4204 ; ZNVER1-SSE-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
   4205 ; ZNVER1-SSE-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
   4206 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   4207 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4208 ;
   4209 ; ZNVER1-LABEL: test_pmovzxbd:
   4210 ; ZNVER1:       # %bb.0:
   4211 ; ZNVER1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
   4212 ; ZNVER1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
   4213 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   4214 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4215   %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   4216   %2 = zext <4 x i8> %1 to <4 x i32>
   4217   %3 = load <4 x i8>, <4 x i8>* %a1, align 1
   4218   %4 = zext <4 x i8> %3 to <4 x i32>
   4219   %5 = add <4 x i32> %2, %4
   4220   ret <4 x i32> %5
   4221 }
   4222 
   4223 define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
   4224 ; GENERIC-LABEL: test_pmovzxbq:
   4225 ; GENERIC:       # %bb.0:
   4226 ; GENERIC-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
   4227 ; GENERIC-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
   4228 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4229 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4230 ;
   4231 ; SLM-LABEL: test_pmovzxbq:
   4232 ; SLM:       # %bb.0:
   4233 ; SLM-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [4:1.00]
   4234 ; SLM-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4235 ; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   4236 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   4237 ; SLM-NEXT:    retq # sched: [4:1.00]
   4238 ;
   4239 ; SANDY-SSE-LABEL: test_pmovzxbq:
   4240 ; SANDY-SSE:       # %bb.0:
   4241 ; SANDY-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
   4242 ; SANDY-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
   4243 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4244 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4245 ;
   4246 ; SANDY-LABEL: test_pmovzxbq:
   4247 ; SANDY:       # %bb.0:
   4248 ; SANDY-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
   4249 ; SANDY-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
   4250 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4251 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4252 ;
   4253 ; HASWELL-SSE-LABEL: test_pmovzxbq:
   4254 ; HASWELL-SSE:       # %bb.0:
   4255 ; HASWELL-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4256 ; HASWELL-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4257 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4258 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4259 ;
   4260 ; HASWELL-LABEL: test_pmovzxbq:
   4261 ; HASWELL:       # %bb.0:
   4262 ; HASWELL-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4263 ; HASWELL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4264 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4265 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4266 ;
   4267 ; BROADWELL-SSE-LABEL: test_pmovzxbq:
   4268 ; BROADWELL-SSE:       # %bb.0:
   4269 ; BROADWELL-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4270 ; BROADWELL-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4271 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4272 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4273 ;
   4274 ; BROADWELL-LABEL: test_pmovzxbq:
   4275 ; BROADWELL:       # %bb.0:
   4276 ; BROADWELL-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4277 ; BROADWELL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4278 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4279 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4280 ;
   4281 ; SKYLAKE-SSE-LABEL: test_pmovzxbq:
   4282 ; SKYLAKE-SSE:       # %bb.0:
   4283 ; SKYLAKE-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4284 ; SKYLAKE-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4285 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   4286 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4287 ;
   4288 ; SKYLAKE-LABEL: test_pmovzxbq:
   4289 ; SKYLAKE:       # %bb.0:
   4290 ; SKYLAKE-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4291 ; SKYLAKE-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4292 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4293 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4294 ;
   4295 ; SKX-SSE-LABEL: test_pmovzxbq:
   4296 ; SKX-SSE:       # %bb.0:
   4297 ; SKX-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4298 ; SKX-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4299 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   4300 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4301 ;
   4302 ; SKX-LABEL: test_pmovzxbq:
   4303 ; SKX:       # %bb.0:
   4304 ; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
   4305 ; SKX-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4306 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4307 ; SKX-NEXT:    retq # sched: [7:1.00]
   4308 ;
   4309 ; BTVER2-SSE-LABEL: test_pmovzxbq:
   4310 ; BTVER2-SSE:       # %bb.0:
   4311 ; BTVER2-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
   4312 ; BTVER2-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4313 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4314 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4315 ;
   4316 ; BTVER2-LABEL: test_pmovzxbq:
   4317 ; BTVER2:       # %bb.0:
   4318 ; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
   4319 ; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
   4320 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4321 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4322 ;
   4323 ; ZNVER1-SSE-LABEL: test_pmovzxbq:
   4324 ; ZNVER1-SSE:       # %bb.0:
   4325 ; ZNVER1-SSE-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25]
   4326 ; ZNVER1-SSE-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
   4327 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   4328 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4329 ;
   4330 ; ZNVER1-LABEL: test_pmovzxbq:
   4331 ; ZNVER1:       # %bb.0:
   4332 ; ZNVER1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
   4333 ; ZNVER1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25]
   4334 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   4335 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4336   %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
   4337   %2 = zext <2 x i8> %1 to <2 x i64>
   4338   %3 = load <2 x i8>, <2 x i8>* %a1, align 1
   4339   %4 = zext <2 x i8> %3 to <2 x i64>
   4340   %5 = add <2 x i64> %2, %4
   4341   ret <2 x i64> %5
   4342 }
   4343 
   4344 define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
   4345 ; GENERIC-LABEL: test_pmovzxdq:
   4346 ; GENERIC:       # %bb.0:
   4347 ; GENERIC-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
   4348 ; GENERIC-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
   4349 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4350 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4351 ;
   4352 ; SLM-LABEL: test_pmovzxdq:
   4353 ; SLM:       # %bb.0:
   4354 ; SLM-NEXT:    pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [4:1.00]
   4355 ; SLM-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
   4356 ; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   4357 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   4358 ; SLM-NEXT:    retq # sched: [4:1.00]
   4359 ;
   4360 ; SANDY-SSE-LABEL: test_pmovzxdq:
   4361 ; SANDY-SSE:       # %bb.0:
   4362 ; SANDY-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
   4363 ; SANDY-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
   4364 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4365 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4366 ;
   4367 ; SANDY-LABEL: test_pmovzxdq:
   4368 ; SANDY:       # %bb.0:
   4369 ; SANDY-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
   4370 ; SANDY-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
   4371 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4372 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4373 ;
   4374 ; HASWELL-SSE-LABEL: test_pmovzxdq:
   4375 ; HASWELL-SSE:       # %bb.0:
   4376 ; HASWELL-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
   4377 ; HASWELL-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4378 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4379 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4380 ;
   4381 ; HASWELL-LABEL: test_pmovzxdq:
   4382 ; HASWELL:       # %bb.0:
   4383 ; HASWELL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
   4384 ; HASWELL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4385 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4386 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4387 ;
   4388 ; BROADWELL-SSE-LABEL: test_pmovzxdq:
   4389 ; BROADWELL-SSE:       # %bb.0:
   4390 ; BROADWELL-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
   4391 ; BROADWELL-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4392 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4393 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4394 ;
   4395 ; BROADWELL-LABEL: test_pmovzxdq:
   4396 ; BROADWELL:       # %bb.0:
   4397 ; BROADWELL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
   4398 ; BROADWELL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4399 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4400 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4401 ;
   4402 ; SKYLAKE-SSE-LABEL: test_pmovzxdq:
   4403 ; SKYLAKE-SSE:       # %bb.0:
   4404 ; SKYLAKE-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
   4405 ; SKYLAKE-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4406 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   4407 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4408 ;
   4409 ; SKYLAKE-LABEL: test_pmovzxdq:
   4410 ; SKYLAKE:       # %bb.0:
   4411 ; SKYLAKE-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
   4412 ; SKYLAKE-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4413 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4414 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4415 ;
   4416 ; SKX-SSE-LABEL: test_pmovzxdq:
   4417 ; SKX-SSE:       # %bb.0:
   4418 ; SKX-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
   4419 ; SKX-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4420 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   4421 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4422 ;
   4423 ; SKX-LABEL: test_pmovzxdq:
   4424 ; SKX:       # %bb.0:
   4425 ; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
   4426 ; SKX-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4427 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4428 ; SKX-NEXT:    retq # sched: [7:1.00]
   4429 ;
   4430 ; BTVER2-SSE-LABEL: test_pmovzxdq:
   4431 ; BTVER2-SSE:       # %bb.0:
   4432 ; BTVER2-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
   4433 ; BTVER2-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4434 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4435 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4436 ;
   4437 ; BTVER2-LABEL: test_pmovzxdq:
   4438 ; BTVER2:       # %bb.0:
   4439 ; BTVER2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
   4440 ; BTVER2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
   4441 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4442 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4443 ;
   4444 ; ZNVER1-SSE-LABEL: test_pmovzxdq:
   4445 ; ZNVER1-SSE:       # %bb.0:
   4446 ; ZNVER1-SSE-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25]
   4447 ; ZNVER1-SSE-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [8:0.50]
   4448 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   4449 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4450 ;
   4451 ; ZNVER1-LABEL: test_pmovzxdq:
   4452 ; ZNVER1:       # %bb.0:
   4453 ; ZNVER1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50]
   4454 ; ZNVER1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25]
   4455 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   4456 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4457   %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
   4458   %2 = zext <2 x i32> %1 to <2 x i64>
   4459   %3 = load <2 x i32>, <2 x i32>* %a1, align 1
   4460   %4 = zext <2 x i32> %3 to <2 x i64>
   4461   %5 = add <2 x i64> %2, %4
   4462   ret <2 x i64> %5
   4463 }
   4464 
   4465 define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
   4466 ; GENERIC-LABEL: test_pmovzxwd:
   4467 ; GENERIC:       # %bb.0:
   4468 ; GENERIC-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
   4469 ; GENERIC-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
   4470 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4471 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4472 ;
   4473 ; SLM-LABEL: test_pmovzxwd:
   4474 ; SLM:       # %bb.0:
   4475 ; SLM-NEXT:    pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [4:1.00]
   4476 ; SLM-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4477 ; SLM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4478 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   4479 ; SLM-NEXT:    retq # sched: [4:1.00]
   4480 ;
   4481 ; SANDY-SSE-LABEL: test_pmovzxwd:
   4482 ; SANDY-SSE:       # %bb.0:
   4483 ; SANDY-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
   4484 ; SANDY-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
   4485 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4486 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4487 ;
   4488 ; SANDY-LABEL: test_pmovzxwd:
   4489 ; SANDY:       # %bb.0:
   4490 ; SANDY-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
   4491 ; SANDY-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
   4492 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4493 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4494 ;
   4495 ; HASWELL-SSE-LABEL: test_pmovzxwd:
   4496 ; HASWELL-SSE:       # %bb.0:
   4497 ; HASWELL-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4498 ; HASWELL-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4499 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4500 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4501 ;
   4502 ; HASWELL-LABEL: test_pmovzxwd:
   4503 ; HASWELL:       # %bb.0:
   4504 ; HASWELL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4505 ; HASWELL-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4506 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4507 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4508 ;
   4509 ; BROADWELL-SSE-LABEL: test_pmovzxwd:
   4510 ; BROADWELL-SSE:       # %bb.0:
   4511 ; BROADWELL-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4512 ; BROADWELL-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4513 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4514 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4515 ;
   4516 ; BROADWELL-LABEL: test_pmovzxwd:
   4517 ; BROADWELL:       # %bb.0:
   4518 ; BROADWELL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4519 ; BROADWELL-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4520 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4521 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4522 ;
   4523 ; SKYLAKE-SSE-LABEL: test_pmovzxwd:
   4524 ; SKYLAKE-SSE:       # %bb.0:
   4525 ; SKYLAKE-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4526 ; SKYLAKE-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4527 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   4528 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4529 ;
   4530 ; SKYLAKE-LABEL: test_pmovzxwd:
   4531 ; SKYLAKE:       # %bb.0:
   4532 ; SKYLAKE-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4533 ; SKYLAKE-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4534 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4535 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4536 ;
   4537 ; SKX-SSE-LABEL: test_pmovzxwd:
   4538 ; SKX-SSE:       # %bb.0:
   4539 ; SKX-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4540 ; SKX-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4541 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   4542 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4543 ;
   4544 ; SKX-LABEL: test_pmovzxwd:
   4545 ; SKX:       # %bb.0:
   4546 ; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
   4547 ; SKX-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4548 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4549 ; SKX-NEXT:    retq # sched: [7:1.00]
   4550 ;
   4551 ; BTVER2-SSE-LABEL: test_pmovzxwd:
   4552 ; BTVER2-SSE:       # %bb.0:
   4553 ; BTVER2-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
   4554 ; BTVER2-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4555 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   4556 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4557 ;
   4558 ; BTVER2-LABEL: test_pmovzxwd:
   4559 ; BTVER2:       # %bb.0:
   4560 ; BTVER2-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
   4561 ; BTVER2-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
   4562 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4563 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4564 ;
   4565 ; ZNVER1-SSE-LABEL: test_pmovzxwd:
   4566 ; ZNVER1-SSE:       # %bb.0:
   4567 ; ZNVER1-SSE-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25]
   4568 ; ZNVER1-SSE-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
   4569 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   4570 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4571 ;
   4572 ; ZNVER1-LABEL: test_pmovzxwd:
   4573 ; ZNVER1:       # %bb.0:
   4574 ; ZNVER1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
   4575 ; ZNVER1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25]
   4576 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   4577 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4578   %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   4579   %2 = zext <4 x i16> %1 to <4 x i32>
   4580   %3 = load <4 x i16>, <4 x i16>* %a1, align 1
   4581   %4 = zext <4 x i16> %3 to <4 x i32>
   4582   %5 = add <4 x i32> %2, %4
   4583   ret <4 x i32> %5
   4584 }
   4585 
   4586 define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
   4587 ; GENERIC-LABEL: test_pmovzxwq:
   4588 ; GENERIC:       # %bb.0:
   4589 ; GENERIC-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
   4590 ; GENERIC-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
   4591 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4592 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4593 ;
   4594 ; SLM-LABEL: test_pmovzxwq:
   4595 ; SLM:       # %bb.0:
   4596 ; SLM-NEXT:    pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [4:1.00]
   4597 ; SLM-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
   4598 ; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   4599 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   4600 ; SLM-NEXT:    retq # sched: [4:1.00]
   4601 ;
   4602 ; SANDY-SSE-LABEL: test_pmovzxwq:
   4603 ; SANDY-SSE:       # %bb.0:
   4604 ; SANDY-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
   4605 ; SANDY-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
   4606 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4607 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4608 ;
   4609 ; SANDY-LABEL: test_pmovzxwq:
   4610 ; SANDY:       # %bb.0:
   4611 ; SANDY-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
   4612 ; SANDY-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
   4613 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4614 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4615 ;
   4616 ; HASWELL-SSE-LABEL: test_pmovzxwq:
   4617 ; HASWELL-SSE:       # %bb.0:
   4618 ; HASWELL-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
   4619 ; HASWELL-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4620 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4621 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4622 ;
   4623 ; HASWELL-LABEL: test_pmovzxwq:
   4624 ; HASWELL:       # %bb.0:
   4625 ; HASWELL-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
   4626 ; HASWELL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4627 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4628 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4629 ;
   4630 ; BROADWELL-SSE-LABEL: test_pmovzxwq:
   4631 ; BROADWELL-SSE:       # %bb.0:
   4632 ; BROADWELL-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
   4633 ; BROADWELL-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4634 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4635 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4636 ;
   4637 ; BROADWELL-LABEL: test_pmovzxwq:
   4638 ; BROADWELL:       # %bb.0:
   4639 ; BROADWELL-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
   4640 ; BROADWELL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4641 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4642 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4643 ;
   4644 ; SKYLAKE-SSE-LABEL: test_pmovzxwq:
   4645 ; SKYLAKE-SSE:       # %bb.0:
   4646 ; SKYLAKE-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
   4647 ; SKYLAKE-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4648 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   4649 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4650 ;
   4651 ; SKYLAKE-LABEL: test_pmovzxwq:
   4652 ; SKYLAKE:       # %bb.0:
   4653 ; SKYLAKE-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
   4654 ; SKYLAKE-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4655 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4656 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4657 ;
   4658 ; SKX-SSE-LABEL: test_pmovzxwq:
   4659 ; SKX-SSE:       # %bb.0:
   4660 ; SKX-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
   4661 ; SKX-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4662 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   4663 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4664 ;
   4665 ; SKX-LABEL: test_pmovzxwq:
   4666 ; SKX:       # %bb.0:
   4667 ; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
   4668 ; SKX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4669 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   4670 ; SKX-NEXT:    retq # sched: [7:1.00]
   4671 ;
   4672 ; BTVER2-SSE-LABEL: test_pmovzxwq:
   4673 ; BTVER2-SSE:       # %bb.0:
   4674 ; BTVER2-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
   4675 ; BTVER2-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4676 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   4677 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4678 ;
   4679 ; BTVER2-LABEL: test_pmovzxwq:
   4680 ; BTVER2:       # %bb.0:
   4681 ; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
   4682 ; BTVER2-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
   4683 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   4684 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4685 ;
   4686 ; ZNVER1-SSE-LABEL: test_pmovzxwq:
   4687 ; ZNVER1-SSE:       # %bb.0:
   4688 ; ZNVER1-SSE-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25]
   4689 ; ZNVER1-SSE-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
   4690 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   4691 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4692 ;
   4693 ; ZNVER1-LABEL: test_pmovzxwq:
   4694 ; ZNVER1:       # %bb.0:
   4695 ; ZNVER1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
   4696 ; ZNVER1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25]
   4697 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   4698 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4699   %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
   4700   %2 = zext <2 x i16> %1 to <2 x i64>
   4701   %3 = load <2 x i16>, <2 x i16>* %a1, align 1
   4702   %4 = zext <2 x i16> %3 to <2 x i64>
   4703   %5 = add <2 x i64> %2, %4
   4704   ret <2 x i64> %5
   4705 }
   4706 
   4707 define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   4708 ; GENERIC-LABEL: test_pmuldq:
   4709 ; GENERIC:       # %bb.0:
   4710 ; GENERIC-NEXT:    pmuldq %xmm1, %xmm0 # sched: [5:1.00]
   4711 ; GENERIC-NEXT:    pmuldq (%rdi), %xmm0 # sched: [11:1.00]
   4712 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4713 ;
   4714 ; SLM-LABEL: test_pmuldq:
   4715 ; SLM:       # %bb.0:
   4716 ; SLM-NEXT:    pmuldq %xmm1, %xmm0 # sched: [4:1.00]
   4717 ; SLM-NEXT:    pmuldq (%rdi), %xmm0 # sched: [7:1.00]
   4718 ; SLM-NEXT:    retq # sched: [4:1.00]
   4719 ;
   4720 ; SANDY-SSE-LABEL: test_pmuldq:
   4721 ; SANDY-SSE:       # %bb.0:
   4722 ; SANDY-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [5:1.00]
   4723 ; SANDY-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [11:1.00]
   4724 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4725 ;
   4726 ; SANDY-LABEL: test_pmuldq:
   4727 ; SANDY:       # %bb.0:
   4728 ; SANDY-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   4729 ; SANDY-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   4730 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4731 ;
   4732 ; HASWELL-SSE-LABEL: test_pmuldq:
   4733 ; HASWELL-SSE:       # %bb.0:
   4734 ; HASWELL-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [5:1.00]
   4735 ; HASWELL-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [11:1.00]
   4736 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4737 ;
   4738 ; HASWELL-LABEL: test_pmuldq:
   4739 ; HASWELL:       # %bb.0:
   4740 ; HASWELL-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   4741 ; HASWELL-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   4742 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4743 ;
   4744 ; BROADWELL-SSE-LABEL: test_pmuldq:
   4745 ; BROADWELL-SSE:       # %bb.0:
   4746 ; BROADWELL-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [5:1.00]
   4747 ; BROADWELL-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [10:1.00]
   4748 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4749 ;
   4750 ; BROADWELL-LABEL: test_pmuldq:
   4751 ; BROADWELL:       # %bb.0:
   4752 ; BROADWELL-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   4753 ; BROADWELL-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   4754 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4755 ;
   4756 ; SKYLAKE-SSE-LABEL: test_pmuldq:
   4757 ; SKYLAKE-SSE:       # %bb.0:
   4758 ; SKYLAKE-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [4:0.50]
   4759 ; SKYLAKE-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [10:0.50]
   4760 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4761 ;
   4762 ; SKYLAKE-LABEL: test_pmuldq:
   4763 ; SKYLAKE:       # %bb.0:
   4764 ; SKYLAKE-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4765 ; SKYLAKE-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   4766 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4767 ;
   4768 ; SKX-SSE-LABEL: test_pmuldq:
   4769 ; SKX-SSE:       # %bb.0:
   4770 ; SKX-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [4:0.50]
   4771 ; SKX-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [10:0.50]
   4772 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4773 ;
   4774 ; SKX-LABEL: test_pmuldq:
   4775 ; SKX:       # %bb.0:
   4776 ; SKX-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4777 ; SKX-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   4778 ; SKX-NEXT:    retq # sched: [7:1.00]
   4779 ;
   4780 ; BTVER2-SSE-LABEL: test_pmuldq:
   4781 ; BTVER2-SSE:       # %bb.0:
   4782 ; BTVER2-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [2:1.00]
   4783 ; BTVER2-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [7:1.00]
   4784 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4785 ;
   4786 ; BTVER2-LABEL: test_pmuldq:
   4787 ; BTVER2:       # %bb.0:
   4788 ; BTVER2-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   4789 ; BTVER2-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   4790 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4791 ;
   4792 ; ZNVER1-SSE-LABEL: test_pmuldq:
   4793 ; ZNVER1-SSE:       # %bb.0:
   4794 ; ZNVER1-SSE-NEXT:    pmuldq %xmm1, %xmm0 # sched: [4:1.00]
   4795 ; ZNVER1-SSE-NEXT:    pmuldq (%rdi), %xmm0 # sched: [11:1.00]
   4796 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4797 ;
   4798 ; ZNVER1-LABEL: test_pmuldq:
   4799 ; ZNVER1:       # %bb.0:
   4800 ; ZNVER1-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
   4801 ; ZNVER1-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   4802 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4803   %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
   4804   %2 = bitcast <2 x i64> %1 to <4 x i32>
   4805   %3 = load <4 x i32>, <4 x i32> *%a2, align 16
   4806   %4 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %2, <4 x i32> %3)
   4807   ret <2 x i64> %4
   4808 }
   4809 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
   4810 
   4811 define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   4812 ; GENERIC-LABEL: test_pmulld:
   4813 ; GENERIC:       # %bb.0:
   4814 ; GENERIC-NEXT:    pmulld %xmm1, %xmm0 # sched: [5:1.00]
   4815 ; GENERIC-NEXT:    pmulld (%rdi), %xmm0 # sched: [11:1.00]
   4816 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4817 ;
   4818 ; SLM-LABEL: test_pmulld:
   4819 ; SLM:       # %bb.0:
   4820 ; SLM-NEXT:    pmulld %xmm1, %xmm0 # sched: [4:1.00]
   4821 ; SLM-NEXT:    pmulld (%rdi), %xmm0 # sched: [7:1.00]
   4822 ; SLM-NEXT:    retq # sched: [4:1.00]
   4823 ;
   4824 ; SANDY-SSE-LABEL: test_pmulld:
   4825 ; SANDY-SSE:       # %bb.0:
   4826 ; SANDY-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [5:1.00]
   4827 ; SANDY-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [11:1.00]
   4828 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4829 ;
   4830 ; SANDY-LABEL: test_pmulld:
   4831 ; SANDY:       # %bb.0:
   4832 ; SANDY-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   4833 ; SANDY-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   4834 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4835 ;
   4836 ; HASWELL-SSE-LABEL: test_pmulld:
   4837 ; HASWELL-SSE:       # %bb.0:
   4838 ; HASWELL-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [10:2.00]
   4839 ; HASWELL-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [16:2.00]
   4840 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4841 ;
   4842 ; HASWELL-LABEL: test_pmulld:
   4843 ; HASWELL:       # %bb.0:
   4844 ; HASWELL-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
   4845 ; HASWELL-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:2.00]
   4846 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4847 ;
   4848 ; BROADWELL-SSE-LABEL: test_pmulld:
   4849 ; BROADWELL-SSE:       # %bb.0:
   4850 ; BROADWELL-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [10:2.00]
   4851 ; BROADWELL-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [15:2.00]
   4852 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4853 ;
   4854 ; BROADWELL-LABEL: test_pmulld:
   4855 ; BROADWELL:       # %bb.0:
   4856 ; BROADWELL-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
   4857 ; BROADWELL-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00]
   4858 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4859 ;
   4860 ; SKYLAKE-SSE-LABEL: test_pmulld:
   4861 ; SKYLAKE-SSE:       # %bb.0:
   4862 ; SKYLAKE-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [10:1.00]
   4863 ; SKYLAKE-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [16:1.00]
   4864 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4865 ;
   4866 ; SKYLAKE-LABEL: test_pmulld:
   4867 ; SKYLAKE:       # %bb.0:
   4868 ; SKYLAKE-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
   4869 ; SKYLAKE-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
   4870 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4871 ;
   4872 ; SKX-SSE-LABEL: test_pmulld:
   4873 ; SKX-SSE:       # %bb.0:
   4874 ; SKX-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [10:1.00]
   4875 ; SKX-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [16:1.00]
   4876 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4877 ;
   4878 ; SKX-LABEL: test_pmulld:
   4879 ; SKX:       # %bb.0:
   4880 ; SKX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
   4881 ; SKX-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
   4882 ; SKX-NEXT:    retq # sched: [7:1.00]
   4883 ;
   4884 ; BTVER2-SSE-LABEL: test_pmulld:
   4885 ; BTVER2-SSE:       # %bb.0:
   4886 ; BTVER2-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [4:2.00]
   4887 ; BTVER2-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [9:2.00]
   4888 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4889 ;
   4890 ; BTVER2-LABEL: test_pmulld:
   4891 ; BTVER2:       # %bb.0:
   4892 ; BTVER2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
   4893 ; BTVER2-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
   4894 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4895 ;
   4896 ; ZNVER1-SSE-LABEL: test_pmulld:
   4897 ; ZNVER1-SSE:       # %bb.0:
   4898 ; ZNVER1-SSE-NEXT:    pmulld %xmm1, %xmm0 # sched: [4:1.00]
   4899 ; ZNVER1-SSE-NEXT:    pmulld (%rdi), %xmm0 # sched: [11:1.00]
   4900 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4901 ;
   4902 ; ZNVER1-LABEL: test_pmulld:
   4903 ; ZNVER1:       # %bb.0:
   4904 ; ZNVER1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
   4905 ; ZNVER1-NEXT:    vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   4906 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4907   %1 = mul <4 x i32> %a0, %a1
   4908   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   4909   %3 = mul <4 x i32> %1, %2
   4910   ret <4 x i32> %3
   4911 }
   4912 
   4913 define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   4914 ; GENERIC-LABEL: test_ptest:
   4915 ; GENERIC:       # %bb.0:
   4916 ; GENERIC-NEXT:    ptest %xmm1, %xmm0 # sched: [2:1.00]
   4917 ; GENERIC-NEXT:    setb %al # sched: [1:0.50]
   4918 ; GENERIC-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
   4919 ; GENERIC-NEXT:    setb %cl # sched: [1:0.50]
   4920 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
   4921 ; GENERIC-NEXT:    movzbl %cl, %eax # sched: [1:0.33]
   4922 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4923 ;
   4924 ; SLM-LABEL: test_ptest:
   4925 ; SLM:       # %bb.0:
   4926 ; SLM-NEXT:    ptest %xmm1, %xmm0 # sched: [1:0.50]
   4927 ; SLM-NEXT:    setb %al # sched: [1:0.50]
   4928 ; SLM-NEXT:    ptest (%rdi), %xmm0 # sched: [4:1.00]
   4929 ; SLM-NEXT:    setb %cl # sched: [1:0.50]
   4930 ; SLM-NEXT:    andb %al, %cl # sched: [1:0.50]
   4931 ; SLM-NEXT:    movzbl %cl, %eax # sched: [1:0.50]
   4932 ; SLM-NEXT:    retq # sched: [4:1.00]
   4933 ;
   4934 ; SANDY-SSE-LABEL: test_ptest:
   4935 ; SANDY-SSE:       # %bb.0:
   4936 ; SANDY-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [2:1.00]
   4937 ; SANDY-SSE-NEXT:    setb %al # sched: [1:0.50]
   4938 ; SANDY-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
   4939 ; SANDY-SSE-NEXT:    setb %cl # sched: [1:0.50]
   4940 ; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
   4941 ; SANDY-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.33]
   4942 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4943 ;
   4944 ; SANDY-LABEL: test_ptest:
   4945 ; SANDY:       # %bb.0:
   4946 ; SANDY-NEXT:    vptest %xmm1, %xmm0 # sched: [2:1.00]
   4947 ; SANDY-NEXT:    setb %al # sched: [1:0.50]
   4948 ; SANDY-NEXT:    vptest (%rdi), %xmm0 # sched: [8:1.00]
   4949 ; SANDY-NEXT:    setb %cl # sched: [1:0.50]
   4950 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
   4951 ; SANDY-NEXT:    movzbl %cl, %eax # sched: [1:0.33]
   4952 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4953 ;
   4954 ; HASWELL-SSE-LABEL: test_ptest:
   4955 ; HASWELL-SSE:       # %bb.0:
   4956 ; HASWELL-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [2:1.00]
   4957 ; HASWELL-SSE-NEXT:    setb %al # sched: [1:0.50]
   4958 ; HASWELL-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
   4959 ; HASWELL-SSE-NEXT:    setb %cl # sched: [1:0.50]
   4960 ; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   4961 ; HASWELL-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   4962 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4963 ;
   4964 ; HASWELL-LABEL: test_ptest:
   4965 ; HASWELL:       # %bb.0:
   4966 ; HASWELL-NEXT:    vptest %xmm1, %xmm0 # sched: [2:1.00]
   4967 ; HASWELL-NEXT:    setb %al # sched: [1:0.50]
   4968 ; HASWELL-NEXT:    vptest (%rdi), %xmm0 # sched: [8:1.00]
   4969 ; HASWELL-NEXT:    setb %cl # sched: [1:0.50]
   4970 ; HASWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
   4971 ; HASWELL-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   4972 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4973 ;
   4974 ; BROADWELL-SSE-LABEL: test_ptest:
   4975 ; BROADWELL-SSE:       # %bb.0:
   4976 ; BROADWELL-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [2:1.00]
   4977 ; BROADWELL-SSE-NEXT:    setb %al # sched: [1:0.50]
   4978 ; BROADWELL-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [7:1.00]
   4979 ; BROADWELL-SSE-NEXT:    setb %cl # sched: [1:0.50]
   4980 ; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   4981 ; BROADWELL-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   4982 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4983 ;
   4984 ; BROADWELL-LABEL: test_ptest:
   4985 ; BROADWELL:       # %bb.0:
   4986 ; BROADWELL-NEXT:    vptest %xmm1, %xmm0 # sched: [2:1.00]
   4987 ; BROADWELL-NEXT:    setb %al # sched: [1:0.50]
   4988 ; BROADWELL-NEXT:    vptest (%rdi), %xmm0 # sched: [7:1.00]
   4989 ; BROADWELL-NEXT:    setb %cl # sched: [1:0.50]
   4990 ; BROADWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
   4991 ; BROADWELL-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   4992 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4993 ;
   4994 ; SKYLAKE-SSE-LABEL: test_ptest:
   4995 ; SKYLAKE-SSE:       # %bb.0:
   4996 ; SKYLAKE-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [3:1.00]
   4997 ; SKYLAKE-SSE-NEXT:    setb %al # sched: [1:0.50]
   4998 ; SKYLAKE-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [9:1.00]
   4999 ; SKYLAKE-SSE-NEXT:    setb %cl # sched: [1:0.50]
   5000 ; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5001 ; SKYLAKE-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   5002 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5003 ;
   5004 ; SKYLAKE-LABEL: test_ptest:
   5005 ; SKYLAKE:       # %bb.0:
   5006 ; SKYLAKE-NEXT:    vptest %xmm1, %xmm0 # sched: [3:1.00]
   5007 ; SKYLAKE-NEXT:    setb %al # sched: [1:0.50]
   5008 ; SKYLAKE-NEXT:    vptest (%rdi), %xmm0 # sched: [9:1.00]
   5009 ; SKYLAKE-NEXT:    setb %cl # sched: [1:0.50]
   5010 ; SKYLAKE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5011 ; SKYLAKE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   5012 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5013 ;
   5014 ; SKX-SSE-LABEL: test_ptest:
   5015 ; SKX-SSE:       # %bb.0:
   5016 ; SKX-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [3:1.00]
   5017 ; SKX-SSE-NEXT:    setb %al # sched: [1:0.50]
   5018 ; SKX-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [9:1.00]
   5019 ; SKX-SSE-NEXT:    setb %cl # sched: [1:0.50]
   5020 ; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5021 ; SKX-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   5022 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5023 ;
   5024 ; SKX-LABEL: test_ptest:
   5025 ; SKX:       # %bb.0:
   5026 ; SKX-NEXT:    vptest %xmm1, %xmm0 # sched: [3:1.00]
   5027 ; SKX-NEXT:    setb %al # sched: [1:0.50]
   5028 ; SKX-NEXT:    vptest (%rdi), %xmm0 # sched: [9:1.00]
   5029 ; SKX-NEXT:    setb %cl # sched: [1:0.50]
   5030 ; SKX-NEXT:    andb %al, %cl # sched: [1:0.25]
   5031 ; SKX-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   5032 ; SKX-NEXT:    retq # sched: [7:1.00]
   5033 ;
   5034 ; BTVER2-SSE-LABEL: test_ptest:
   5035 ; BTVER2-SSE:       # %bb.0:
   5036 ; BTVER2-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [3:1.00]
   5037 ; BTVER2-SSE-NEXT:    setb %al # sched: [1:0.50]
   5038 ; BTVER2-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
   5039 ; BTVER2-SSE-NEXT:    setb %cl # sched: [1:0.50]
   5040 ; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
   5041 ; BTVER2-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.50]
   5042 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5043 ;
   5044 ; BTVER2-LABEL: test_ptest:
   5045 ; BTVER2:       # %bb.0:
   5046 ; BTVER2-NEXT:    vptest %xmm1, %xmm0 # sched: [3:1.00]
   5047 ; BTVER2-NEXT:    setb %al # sched: [1:0.50]
   5048 ; BTVER2-NEXT:    vptest (%rdi), %xmm0 # sched: [8:1.00]
   5049 ; BTVER2-NEXT:    setb %cl # sched: [1:0.50]
   5050 ; BTVER2-NEXT:    andb %al, %cl # sched: [1:0.50]
   5051 ; BTVER2-NEXT:    movzbl %cl, %eax # sched: [1:0.50]
   5052 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5053 ;
   5054 ; ZNVER1-SSE-LABEL: test_ptest:
   5055 ; ZNVER1-SSE:       # %bb.0:
   5056 ; ZNVER1-SSE-NEXT:    ptest %xmm1, %xmm0 # sched: [1:1.00]
   5057 ; ZNVER1-SSE-NEXT:    setb %al # sched: [1:0.25]
   5058 ; ZNVER1-SSE-NEXT:    ptest (%rdi), %xmm0 # sched: [8:1.00]
   5059 ; ZNVER1-SSE-NEXT:    setb %cl # sched: [1:0.25]
   5060 ; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5061 ; ZNVER1-SSE-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   5062 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5063 ;
   5064 ; ZNVER1-LABEL: test_ptest:
   5065 ; ZNVER1:       # %bb.0:
   5066 ; ZNVER1-NEXT:    vptest %xmm1, %xmm0 # sched: [1:1.00]
   5067 ; ZNVER1-NEXT:    setb %al # sched: [1:0.25]
   5068 ; ZNVER1-NEXT:    vptest (%rdi), %xmm0 # sched: [8:1.00]
   5069 ; ZNVER1-NEXT:    setb %cl # sched: [1:0.25]
   5070 ; ZNVER1-NEXT:    andb %al, %cl # sched: [1:0.25]
   5071 ; ZNVER1-NEXT:    movzbl %cl, %eax # sched: [1:0.25]
   5072 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5073   %1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
   5074   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   5075   %3 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %2)
   5076   %4 = and i32 %1, %3
   5077   ret i32 %4
   5078 }
   5079 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
   5080 
   5081 define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
   5082 ; GENERIC-LABEL: test_roundpd:
   5083 ; GENERIC:       # %bb.0:
   5084 ; GENERIC-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
   5085 ; GENERIC-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
   5086 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   5087 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5088 ;
   5089 ; SLM-LABEL: test_roundpd:
   5090 ; SLM:       # %bb.0:
   5091 ; SLM-NEXT:    roundpd $7, (%rdi), %xmm1 # sched: [6:1.00]
   5092 ; SLM-NEXT:    roundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
   5093 ; SLM-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   5094 ; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
   5095 ; SLM-NEXT:    retq # sched: [4:1.00]
   5096 ;
   5097 ; SANDY-SSE-LABEL: test_roundpd:
   5098 ; SANDY-SSE:       # %bb.0:
   5099 ; SANDY-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
   5100 ; SANDY-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
   5101 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   5102 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5103 ;
   5104 ; SANDY-LABEL: test_roundpd:
   5105 ; SANDY:       # %bb.0:
   5106 ; SANDY-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
   5107 ; SANDY-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
   5108 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5109 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5110 ;
   5111 ; HASWELL-SSE-LABEL: test_roundpd:
   5112 ; HASWELL-SSE:       # %bb.0:
   5113 ; HASWELL-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [6:0.50]
   5114 ; HASWELL-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [12:2.00]
   5115 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   5116 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5117 ;
   5118 ; HASWELL-LABEL: test_roundpd:
   5119 ; HASWELL:       # %bb.0:
   5120 ; HASWELL-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
   5121 ; HASWELL-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [12:2.00]
   5122 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5123 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5124 ;
   5125 ; BROADWELL-SSE-LABEL: test_roundpd:
   5126 ; BROADWELL-SSE:       # %bb.0:
   5127 ; BROADWELL-SSE-NEXT:    roundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
   5128 ; BROADWELL-SSE-NEXT:    roundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
   5129 ; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   5130 ; BROADWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
   5131 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5132 ;
   5133 ; BROADWELL-LABEL: test_roundpd:
   5134 ; BROADWELL:       # %bb.0:
   5135 ; BROADWELL-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
   5136 ; BROADWELL-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
   5137 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5138 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5139 ;
   5140 ; SKYLAKE-SSE-LABEL: test_roundpd:
   5141 ; SKYLAKE-SSE:       # %bb.0:
   5142 ; SKYLAKE-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [8:1.00]
   5143 ; SKYLAKE-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [14:1.00]
   5144 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   5145 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5146 ;
   5147 ; SKYLAKE-LABEL: test_roundpd:
   5148 ; SKYLAKE:       # %bb.0:
   5149 ; SKYLAKE-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00]
   5150 ; SKYLAKE-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00]
   5151 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5152 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5153 ;
   5154 ; SKX-SSE-LABEL: test_roundpd:
   5155 ; SKX-SSE:       # %bb.0:
   5156 ; SKX-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [8:1.00]
   5157 ; SKX-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [14:1.00]
   5158 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   5159 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5160 ;
   5161 ; SKX-LABEL: test_roundpd:
   5162 ; SKX:       # %bb.0:
   5163 ; SKX-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00]
   5164 ; SKX-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00]
   5165 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5166 ; SKX-NEXT:    retq # sched: [7:1.00]
   5167 ;
   5168 ; BTVER2-SSE-LABEL: test_roundpd:
   5169 ; BTVER2-SSE:       # %bb.0:
   5170 ; BTVER2-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
   5171 ; BTVER2-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [8:1.00]
   5172 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   5173 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5174 ;
   5175 ; BTVER2-LABEL: test_roundpd:
   5176 ; BTVER2:       # %bb.0:
   5177 ; BTVER2-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00]
   5178 ; BTVER2-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
   5179 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5180 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5181 ;
   5182 ; ZNVER1-SSE-LABEL: test_roundpd:
   5183 ; ZNVER1-SSE:       # %bb.0:
   5184 ; ZNVER1-SSE-NEXT:    roundpd $7, %xmm0, %xmm1 # sched: [4:1.00]
   5185 ; ZNVER1-SSE-NEXT:    roundpd $7, (%rdi), %xmm0 # sched: [11:1.00]
   5186 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   5187 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5188 ;
   5189 ; ZNVER1-LABEL: test_roundpd:
   5190 ; ZNVER1:       # %bb.0:
   5191 ; ZNVER1-NEXT:    vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00]
   5192 ; ZNVER1-NEXT:    vroundpd $7, %xmm0, %xmm0 # sched: [4:1.00]
   5193 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5194 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5195   %1 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
   5196   %2 = load <2 x double>, <2 x double> *%a1, align 16
   5197   %3 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %2, i32 7)
   5198   %4 = fadd <2 x double> %1, %3
   5199   ret <2 x double> %4
   5200 }
   5201 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
   5202 
   5203 define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
   5204 ; GENERIC-LABEL: test_roundps:
   5205 ; GENERIC:       # %bb.0:
   5206 ; GENERIC-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
   5207 ; GENERIC-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
   5208 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5209 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5210 ;
   5211 ; SLM-LABEL: test_roundps:
   5212 ; SLM:       # %bb.0:
   5213 ; SLM-NEXT:    roundps $7, (%rdi), %xmm1 # sched: [6:1.00]
   5214 ; SLM-NEXT:    roundps $7, %xmm0, %xmm0 # sched: [3:1.00]
   5215 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   5216 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   5217 ; SLM-NEXT:    retq # sched: [4:1.00]
   5218 ;
   5219 ; SANDY-SSE-LABEL: test_roundps:
   5220 ; SANDY-SSE:       # %bb.0:
   5221 ; SANDY-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
   5222 ; SANDY-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
   5223 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5224 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5225 ;
   5226 ; SANDY-LABEL: test_roundps:
   5227 ; SANDY:       # %bb.0:
   5228 ; SANDY-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
   5229 ; SANDY-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
   5230 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5231 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5232 ;
   5233 ; HASWELL-SSE-LABEL: test_roundps:
   5234 ; HASWELL-SSE:       # %bb.0:
   5235 ; HASWELL-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [6:0.50]
   5236 ; HASWELL-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [12:2.00]
   5237 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5238 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5239 ;
   5240 ; HASWELL-LABEL: test_roundps:
   5241 ; HASWELL:       # %bb.0:
   5242 ; HASWELL-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [6:0.50]
   5243 ; HASWELL-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [12:2.00]
   5244 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5245 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5246 ;
   5247 ; BROADWELL-SSE-LABEL: test_roundps:
   5248 ; BROADWELL-SSE:       # %bb.0:
   5249 ; BROADWELL-SSE-NEXT:    roundps $7, (%rdi), %xmm1 # sched: [11:2.00]
   5250 ; BROADWELL-SSE-NEXT:    roundps $7, %xmm0, %xmm0 # sched: [6:0.50]
   5251 ; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   5252 ; BROADWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
   5253 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5254 ;
   5255 ; BROADWELL-LABEL: test_roundps:
   5256 ; BROADWELL:       # %bb.0:
   5257 ; BROADWELL-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [11:2.00]
   5258 ; BROADWELL-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [6:0.50]
   5259 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5260 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5261 ;
   5262 ; SKYLAKE-SSE-LABEL: test_roundps:
   5263 ; SKYLAKE-SSE:       # %bb.0:
   5264 ; SKYLAKE-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [8:1.00]
   5265 ; SKYLAKE-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [14:1.00]
   5266 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   5267 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5268 ;
   5269 ; SKYLAKE-LABEL: test_roundps:
   5270 ; SKYLAKE:       # %bb.0:
   5271 ; SKYLAKE-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [8:1.00]
   5272 ; SKYLAKE-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [14:1.00]
   5273 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5274 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5275 ;
   5276 ; SKX-SSE-LABEL: test_roundps:
   5277 ; SKX-SSE:       # %bb.0:
   5278 ; SKX-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [8:1.00]
   5279 ; SKX-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [14:1.00]
   5280 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   5281 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5282 ;
   5283 ; SKX-LABEL: test_roundps:
   5284 ; SKX:       # %bb.0:
   5285 ; SKX-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [8:1.00]
   5286 ; SKX-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [14:1.00]
   5287 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5288 ; SKX-NEXT:    retq # sched: [7:1.00]
   5289 ;
   5290 ; BTVER2-SSE-LABEL: test_roundps:
   5291 ; BTVER2-SSE:       # %bb.0:
   5292 ; BTVER2-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
   5293 ; BTVER2-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [8:1.00]
   5294 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5295 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5296 ;
   5297 ; BTVER2-LABEL: test_roundps:
   5298 ; BTVER2:       # %bb.0:
   5299 ; BTVER2-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [8:1.00]
   5300 ; BTVER2-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
   5301 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5302 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5303 ;
   5304 ; ZNVER1-SSE-LABEL: test_roundps:
   5305 ; ZNVER1-SSE:       # %bb.0:
   5306 ; ZNVER1-SSE-NEXT:    roundps $7, %xmm0, %xmm1 # sched: [4:1.00]
   5307 ; ZNVER1-SSE-NEXT:    roundps $7, (%rdi), %xmm0 # sched: [11:1.00]
   5308 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5309 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5310 ;
   5311 ; ZNVER1-LABEL: test_roundps:
   5312 ; ZNVER1:       # %bb.0:
   5313 ; ZNVER1-NEXT:    vroundps $7, (%rdi), %xmm1 # sched: [11:1.00]
   5314 ; ZNVER1-NEXT:    vroundps $7, %xmm0, %xmm0 # sched: [4:1.00]
   5315 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5316 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5317   %1 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
   5318   %2 = load <4 x float>, <4 x float> *%a1, align 16
   5319   %3 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %2, i32 7)
   5320   %4 = fadd <4 x float> %1, %3
   5321   ret <4 x float> %4
   5322 }
   5323 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
   5324 
   5325 define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   5326 ; GENERIC-LABEL: test_roundsd:
   5327 ; GENERIC:       # %bb.0:
   5328 ; GENERIC-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
   5329 ; GENERIC-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
   5330 ; GENERIC-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
   5331 ; GENERIC-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
   5332 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5333 ;
   5334 ; SLM-LABEL: test_roundsd:
   5335 ; SLM:       # %bb.0:
   5336 ; SLM-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.50]
   5337 ; SLM-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [6:1.00]
   5338 ; SLM-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
   5339 ; SLM-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
   5340 ; SLM-NEXT:    retq # sched: [4:1.00]
   5341 ;
   5342 ; SANDY-SSE-LABEL: test_roundsd:
   5343 ; SANDY-SSE:       # %bb.0:
   5344 ; SANDY-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
   5345 ; SANDY-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
   5346 ; SANDY-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
   5347 ; SANDY-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
   5348 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5349 ;
   5350 ; SANDY-LABEL: test_roundsd:
   5351 ; SANDY:       # %bb.0:
   5352 ; SANDY-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
   5353 ; SANDY-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   5354 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   5355 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5356 ;
   5357 ; HASWELL-SSE-LABEL: test_roundsd:
   5358 ; HASWELL-SSE:       # %bb.0:
   5359 ; HASWELL-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
   5360 ; HASWELL-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
   5361 ; HASWELL-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [12:2.00]
   5362 ; HASWELL-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
   5363 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5364 ;
   5365 ; HASWELL-LABEL: test_roundsd:
   5366 ; HASWELL:       # %bb.0:
   5367 ; HASWELL-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
   5368 ; HASWELL-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
   5369 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   5370 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5371 ;
   5372 ; BROADWELL-SSE-LABEL: test_roundsd:
   5373 ; BROADWELL-SSE:       # %bb.0:
   5374 ; BROADWELL-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:1.00]
   5375 ; BROADWELL-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [11:2.00]
   5376 ; BROADWELL-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
   5377 ; BROADWELL-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
   5378 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5379 ;
   5380 ; BROADWELL-LABEL: test_roundsd:
   5381 ; BROADWELL:       # %bb.0:
   5382 ; BROADWELL-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
   5383 ; BROADWELL-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50]
   5384 ; BROADWELL-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
   5385 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5386 ;
   5387 ; SKYLAKE-SSE-LABEL: test_roundsd:
   5388 ; SKYLAKE-SSE:       # %bb.0:
   5389 ; SKYLAKE-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.33]
   5390 ; SKYLAKE-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [8:1.00]
   5391 ; SKYLAKE-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [14:1.00]
   5392 ; SKYLAKE-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [4:0.50]
   5393 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5394 ;
   5395 ; SKYLAKE-LABEL: test_roundsd:
   5396 ; SKYLAKE:       # %bb.0:
   5397 ; SKYLAKE-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
   5398 ; SKYLAKE-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
   5399 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   5400 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5401 ;
   5402 ; SKX-SSE-LABEL: test_roundsd:
   5403 ; SKX-SSE:       # %bb.0:
   5404 ; SKX-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.33]
   5405 ; SKX-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [8:1.00]
   5406 ; SKX-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [14:1.00]
   5407 ; SKX-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [4:0.50]
   5408 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5409 ;
   5410 ; SKX-LABEL: test_roundsd:
   5411 ; SKX:       # %bb.0:
   5412 ; SKX-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
   5413 ; SKX-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
   5414 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   5415 ; SKX-NEXT:    retq # sched: [7:1.00]
   5416 ;
   5417 ; BTVER2-SSE-LABEL: test_roundsd:
   5418 ; BTVER2-SSE:       # %bb.0:
   5419 ; BTVER2-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.50]
   5420 ; BTVER2-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [8:1.00]
   5421 ; BTVER2-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
   5422 ; BTVER2-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
   5423 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5424 ;
   5425 ; BTVER2-LABEL: test_roundsd:
   5426 ; BTVER2:       # %bb.0:
   5427 ; BTVER2-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
   5428 ; BTVER2-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   5429 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   5430 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5431 ;
   5432 ; ZNVER1-SSE-LABEL: test_roundsd:
   5433 ; ZNVER1-SSE:       # %bb.0:
   5434 ; ZNVER1-SSE-NEXT:    movapd %xmm0, %xmm2 # sched: [1:0.25]
   5435 ; ZNVER1-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 # sched: [11:1.00]
   5436 ; ZNVER1-SSE-NEXT:    roundsd $7, %xmm1, %xmm2 # sched: [4:1.00]
   5437 ; ZNVER1-SSE-NEXT:    addpd %xmm2, %xmm0 # sched: [3:1.00]
   5438 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5439 ;
   5440 ; ZNVER1-LABEL: test_roundsd:
   5441 ; ZNVER1:       # %bb.0:
   5442 ; ZNVER1-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
   5443 ; ZNVER1-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   5444 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   5445 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5446   %1 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
   5447   %2 = load <2 x double>, <2 x double>* %a2, align 16
   5448   %3 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %2, i32 7)
   5449   %4 = fadd <2 x double> %1, %3
   5450   ret <2 x double> %4
   5451 }
   5452 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
   5453 
   5454 define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   5455 ; GENERIC-LABEL: test_roundss:
   5456 ; GENERIC:       # %bb.0:
   5457 ; GENERIC-NEXT:    movaps %xmm0, %xmm2 # sched: [1:1.00]
   5458 ; GENERIC-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
   5459 ; GENERIC-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
   5460 ; GENERIC-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
   5461 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5462 ;
   5463 ; SLM-LABEL: test_roundss:
   5464 ; SLM:       # %bb.0:
   5465 ; SLM-NEXT:    movaps %xmm0, %xmm2 # sched: [1:0.50]
   5466 ; SLM-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [6:1.00]
   5467 ; SLM-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
   5468 ; SLM-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
   5469 ; SLM-NEXT:    retq # sched: [4:1.00]
   5470 ;
   5471 ; SANDY-SSE-LABEL: test_roundss:
   5472 ; SANDY-SSE:       # %bb.0:
   5473 ; SANDY-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:1.00]
   5474 ; SANDY-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
   5475 ; SANDY-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
   5476 ; SANDY-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
   5477 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5478 ;
   5479 ; SANDY-LABEL: test_roundss:
   5480 ; SANDY:       # %bb.0:
   5481 ; SANDY-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
   5482 ; SANDY-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   5483 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   5484 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5485 ;
   5486 ; HASWELL-SSE-LABEL: test_roundss:
   5487 ; HASWELL-SSE:       # %bb.0:
   5488 ; HASWELL-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:1.00]
   5489 ; HASWELL-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
   5490 ; HASWELL-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [12:2.00]
   5491 ; HASWELL-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
   5492 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5493 ;
   5494 ; HASWELL-LABEL: test_roundss:
   5495 ; HASWELL:       # %bb.0:
   5496 ; HASWELL-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
   5497 ; HASWELL-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
   5498 ; HASWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   5499 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5500 ;
   5501 ; BROADWELL-SSE-LABEL: test_roundss:
   5502 ; BROADWELL-SSE:       # %bb.0:
   5503 ; BROADWELL-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:1.00]
   5504 ; BROADWELL-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [11:2.00]
   5505 ; BROADWELL-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
   5506 ; BROADWELL-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
   5507 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5508 ;
   5509 ; BROADWELL-LABEL: test_roundss:
   5510 ; BROADWELL:       # %bb.0:
   5511 ; BROADWELL-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
   5512 ; BROADWELL-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50]
   5513 ; BROADWELL-NEXT:    vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
   5514 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5515 ;
   5516 ; SKYLAKE-SSE-LABEL: test_roundss:
   5517 ; SKYLAKE-SSE:       # %bb.0:
   5518 ; SKYLAKE-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:0.33]
   5519 ; SKYLAKE-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [8:1.00]
   5520 ; SKYLAKE-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [14:1.00]
   5521 ; SKYLAKE-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [4:0.50]
   5522 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5523 ;
   5524 ; SKYLAKE-LABEL: test_roundss:
   5525 ; SKYLAKE:       # %bb.0:
   5526 ; SKYLAKE-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
   5527 ; SKYLAKE-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
   5528 ; SKYLAKE-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   5529 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5530 ;
   5531 ; SKX-SSE-LABEL: test_roundss:
   5532 ; SKX-SSE:       # %bb.0:
   5533 ; SKX-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:0.33]
   5534 ; SKX-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [8:1.00]
   5535 ; SKX-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [14:1.00]
   5536 ; SKX-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [4:0.50]
   5537 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5538 ;
   5539 ; SKX-LABEL: test_roundss:
   5540 ; SKX:       # %bb.0:
   5541 ; SKX-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
   5542 ; SKX-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
   5543 ; SKX-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   5544 ; SKX-NEXT:    retq # sched: [7:1.00]
   5545 ;
   5546 ; BTVER2-SSE-LABEL: test_roundss:
   5547 ; BTVER2-SSE:       # %bb.0:
   5548 ; BTVER2-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:0.50]
   5549 ; BTVER2-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [8:1.00]
   5550 ; BTVER2-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
   5551 ; BTVER2-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
   5552 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5553 ;
   5554 ; BTVER2-LABEL: test_roundss:
   5555 ; BTVER2:       # %bb.0:
   5556 ; BTVER2-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
   5557 ; BTVER2-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   5558 ; BTVER2-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   5559 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5560 ;
   5561 ; ZNVER1-SSE-LABEL: test_roundss:
   5562 ; ZNVER1-SSE:       # %bb.0:
   5563 ; ZNVER1-SSE-NEXT:    movaps %xmm0, %xmm2 # sched: [1:0.25]
   5564 ; ZNVER1-SSE-NEXT:    roundss $7, (%rdi), %xmm0 # sched: [11:1.00]
   5565 ; ZNVER1-SSE-NEXT:    roundss $7, %xmm1, %xmm2 # sched: [4:1.00]
   5566 ; ZNVER1-SSE-NEXT:    addps %xmm2, %xmm0 # sched: [3:1.00]
   5567 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5568 ;
   5569 ; ZNVER1-LABEL: test_roundss:
   5570 ; ZNVER1:       # %bb.0:
   5571 ; ZNVER1-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
   5572 ; ZNVER1-NEXT:    vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   5573 ; ZNVER1-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   5574 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5575   %1 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
   5576   %2 = load <4 x float>, <4 x float> *%a2, align 16
   5577   %3 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %2, i32 7)
   5578   %4 = fadd <4 x float> %1, %3
   5579   ret <4 x float> %4
   5580 }
   5581 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
   5582