Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
     10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
     11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
     12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
     13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
     14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
     15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
     16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
     17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
     18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
     19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
     20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
     21 
     22 define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
     23 ; GENERIC-LABEL: test_addpd:
     24 ; GENERIC:       # %bb.0:
     25 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     26 ; GENERIC-NEXT:    addpd (%rdi), %xmm0 # sched: [9:1.00]
     27 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     28 ;
     29 ; ATOM-LABEL: test_addpd:
     30 ; ATOM:       # %bb.0:
     31 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
     32 ; ATOM-NEXT:    addpd (%rdi), %xmm0 # sched: [7:3.50]
     33 ; ATOM-NEXT:    retq # sched: [79:39.50]
     34 ;
     35 ; SLM-LABEL: test_addpd:
     36 ; SLM:       # %bb.0:
     37 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     38 ; SLM-NEXT:    addpd (%rdi), %xmm0 # sched: [6:1.00]
     39 ; SLM-NEXT:    retq # sched: [4:1.00]
     40 ;
     41 ; SANDY-SSE-LABEL: test_addpd:
     42 ; SANDY-SSE:       # %bb.0:
     43 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     44 ; SANDY-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [9:1.00]
     45 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
     46 ;
     47 ; SANDY-LABEL: test_addpd:
     48 ; SANDY:       # %bb.0:
     49 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
     50 ; SANDY-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
     51 ; SANDY-NEXT:    retq # sched: [1:1.00]
     52 ;
     53 ; HASWELL-SSE-LABEL: test_addpd:
     54 ; HASWELL-SSE:       # %bb.0:
     55 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     56 ; HASWELL-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [9:1.00]
     57 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
     58 ;
     59 ; HASWELL-LABEL: test_addpd:
     60 ; HASWELL:       # %bb.0:
     61 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
     62 ; HASWELL-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
     63 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     64 ;
     65 ; BROADWELL-SSE-LABEL: test_addpd:
     66 ; BROADWELL-SSE:       # %bb.0:
     67 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
     68 ; BROADWELL-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [8:1.00]
     69 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
     70 ;
     71 ; BROADWELL-LABEL: test_addpd:
     72 ; BROADWELL:       # %bb.0:
     73 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
     74 ; BROADWELL-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
     75 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     76 ;
     77 ; SKYLAKE-SSE-LABEL: test_addpd:
     78 ; SKYLAKE-SSE:       # %bb.0:
     79 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
     80 ; SKYLAKE-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:0.50]
     81 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
     82 ;
     83 ; SKYLAKE-LABEL: test_addpd:
     84 ; SKYLAKE:       # %bb.0:
     85 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
     86 ; SKYLAKE-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
     87 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     88 ;
     89 ; SKX-SSE-LABEL: test_addpd:
     90 ; SKX-SSE:       # %bb.0:
     91 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
     92 ; SKX-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:0.50]
     93 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
     94 ;
     95 ; SKX-LABEL: test_addpd:
     96 ; SKX:       # %bb.0:
     97 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
     98 ; SKX-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
     99 ; SKX-NEXT:    retq # sched: [7:1.00]
    100 ;
    101 ; BTVER2-SSE-LABEL: test_addpd:
    102 ; BTVER2-SSE:       # %bb.0:
    103 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    104 ; BTVER2-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [8:1.00]
    105 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    106 ;
    107 ; BTVER2-LABEL: test_addpd:
    108 ; BTVER2:       # %bb.0:
    109 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    110 ; BTVER2-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    111 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    112 ;
    113 ; ZNVER1-SSE-LABEL: test_addpd:
    114 ; ZNVER1-SSE:       # %bb.0:
    115 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    116 ; ZNVER1-SSE-NEXT:    addpd (%rdi), %xmm0 # sched: [10:1.00]
    117 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    118 ;
    119 ; ZNVER1-LABEL: test_addpd:
    120 ; ZNVER1:       # %bb.0:
    121 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    122 ; ZNVER1-NEXT:    vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    123 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    124   %1 = fadd <2 x double> %a0, %a1
    125   %2 = load <2 x double>, <2 x double> *%a2, align 16
    126   %3 = fadd <2 x double> %1, %2
    127   ret <2 x double> %3
    128 }
    129 
    130 define double @test_addsd(double %a0, double %a1, double *%a2) {
    131 ; GENERIC-LABEL: test_addsd:
    132 ; GENERIC:       # %bb.0:
    133 ; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
    134 ; GENERIC-NEXT:    addsd (%rdi), %xmm0 # sched: [9:1.00]
    135 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    136 ;
    137 ; ATOM-LABEL: test_addsd:
    138 ; ATOM:       # %bb.0:
    139 ; ATOM-NEXT:    addsd %xmm1, %xmm0 # sched: [5:5.00]
    140 ; ATOM-NEXT:    addsd (%rdi), %xmm0 # sched: [5:5.00]
    141 ; ATOM-NEXT:    retq # sched: [79:39.50]
    142 ;
    143 ; SLM-LABEL: test_addsd:
    144 ; SLM:       # %bb.0:
    145 ; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
    146 ; SLM-NEXT:    addsd (%rdi), %xmm0 # sched: [6:1.00]
    147 ; SLM-NEXT:    retq # sched: [4:1.00]
    148 ;
    149 ; SANDY-SSE-LABEL: test_addsd:
    150 ; SANDY-SSE:       # %bb.0:
    151 ; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
    152 ; SANDY-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [9:1.00]
    153 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    154 ;
    155 ; SANDY-LABEL: test_addsd:
    156 ; SANDY:       # %bb.0:
    157 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    158 ; SANDY-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    159 ; SANDY-NEXT:    retq # sched: [1:1.00]
    160 ;
    161 ; HASWELL-SSE-LABEL: test_addsd:
    162 ; HASWELL-SSE:       # %bb.0:
    163 ; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
    164 ; HASWELL-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [8:1.00]
    165 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    166 ;
    167 ; HASWELL-LABEL: test_addsd:
    168 ; HASWELL:       # %bb.0:
    169 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    170 ; HASWELL-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    171 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    172 ;
    173 ; BROADWELL-SSE-LABEL: test_addsd:
    174 ; BROADWELL-SSE:       # %bb.0:
    175 ; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
    176 ; BROADWELL-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [8:1.00]
    177 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    178 ;
    179 ; BROADWELL-LABEL: test_addsd:
    180 ; BROADWELL:       # %bb.0:
    181 ; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    182 ; BROADWELL-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    183 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    184 ;
    185 ; SKYLAKE-SSE-LABEL: test_addsd:
    186 ; SKYLAKE-SSE:       # %bb.0:
    187 ; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
    188 ; SKYLAKE-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [9:0.50]
    189 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    190 ;
    191 ; SKYLAKE-LABEL: test_addsd:
    192 ; SKYLAKE:       # %bb.0:
    193 ; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    194 ; SKYLAKE-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
    195 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    196 ;
    197 ; SKX-SSE-LABEL: test_addsd:
    198 ; SKX-SSE:       # %bb.0:
    199 ; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
    200 ; SKX-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [9:0.50]
    201 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    202 ;
    203 ; SKX-LABEL: test_addsd:
    204 ; SKX:       # %bb.0:
    205 ; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    206 ; SKX-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
    207 ; SKX-NEXT:    retq # sched: [7:1.00]
    208 ;
    209 ; BTVER2-SSE-LABEL: test_addsd:
    210 ; BTVER2-SSE:       # %bb.0:
    211 ; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
    212 ; BTVER2-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [8:1.00]
    213 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    214 ;
    215 ; BTVER2-LABEL: test_addsd:
    216 ; BTVER2:       # %bb.0:
    217 ; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    218 ; BTVER2-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    219 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    220 ;
    221 ; ZNVER1-SSE-LABEL: test_addsd:
    222 ; ZNVER1-SSE:       # %bb.0:
    223 ; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
    224 ; ZNVER1-SSE-NEXT:    addsd (%rdi), %xmm0 # sched: [10:1.00]
    225 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    226 ;
    227 ; ZNVER1-LABEL: test_addsd:
    228 ; ZNVER1:       # %bb.0:
    229 ; ZNVER1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    230 ; ZNVER1-NEXT:    vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    231 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    232   %1 = fadd double %a0, %a1
    233   %2 = load double, double *%a2, align 8
    234   %3 = fadd double %1, %2
    235   ret double %3
    236 }
    237 
    238 define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
    239 ; GENERIC-LABEL: test_andpd:
    240 ; GENERIC:       # %bb.0:
    241 ; GENERIC-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
    242 ; GENERIC-NEXT:    andpd (%rdi), %xmm0 # sched: [7:1.00]
    243 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    244 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    245 ;
    246 ; ATOM-LABEL: test_andpd:
    247 ; ATOM:       # %bb.0:
    248 ; ATOM-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.50]
    249 ; ATOM-NEXT:    andpd (%rdi), %xmm0 # sched: [1:1.00]
    250 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
    251 ; ATOM-NEXT:    retq # sched: [79:39.50]
    252 ;
    253 ; SLM-LABEL: test_andpd:
    254 ; SLM:       # %bb.0:
    255 ; SLM-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.50]
    256 ; SLM-NEXT:    andpd (%rdi), %xmm0 # sched: [4:1.00]
    257 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    258 ; SLM-NEXT:    retq # sched: [4:1.00]
    259 ;
    260 ; SANDY-SSE-LABEL: test_andpd:
    261 ; SANDY-SSE:       # %bb.0:
    262 ; SANDY-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
    263 ; SANDY-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:1.00]
    264 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    265 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    266 ;
    267 ; SANDY-LABEL: test_andpd:
    268 ; SANDY:       # %bb.0:
    269 ; SANDY-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    270 ; SANDY-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    271 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    272 ; SANDY-NEXT:    retq # sched: [1:1.00]
    273 ;
    274 ; HASWELL-SSE-LABEL: test_andpd:
    275 ; HASWELL-SSE:       # %bb.0:
    276 ; HASWELL-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
    277 ; HASWELL-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:1.00]
    278 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    279 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    280 ;
    281 ; HASWELL-LABEL: test_andpd:
    282 ; HASWELL:       # %bb.0:
    283 ; HASWELL-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    284 ; HASWELL-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    285 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    286 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    287 ;
    288 ; BROADWELL-SSE-LABEL: test_andpd:
    289 ; BROADWELL-SSE:       # %bb.0:
    290 ; BROADWELL-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:1.00]
    291 ; BROADWELL-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [6:1.00]
    292 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    293 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    294 ;
    295 ; BROADWELL-LABEL: test_andpd:
    296 ; BROADWELL:       # %bb.0:
    297 ; BROADWELL-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    298 ; BROADWELL-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    299 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    300 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    301 ;
    302 ; SKYLAKE-SSE-LABEL: test_andpd:
    303 ; SKYLAKE-SSE:       # %bb.0:
    304 ; SKYLAKE-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.33]
    305 ; SKYLAKE-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:0.50]
    306 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
    307 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    308 ;
    309 ; SKYLAKE-LABEL: test_andpd:
    310 ; SKYLAKE:       # %bb.0:
    311 ; SKYLAKE-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    312 ; SKYLAKE-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
    313 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
    314 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    315 ;
    316 ; SKX-SSE-LABEL: test_andpd:
    317 ; SKX-SSE:       # %bb.0:
    318 ; SKX-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.33]
    319 ; SKX-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [7:0.50]
    320 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
    321 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    322 ;
    323 ; SKX-LABEL: test_andpd:
    324 ; SKX:       # %bb.0:
    325 ; SKX-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    326 ; SKX-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
    327 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
    328 ; SKX-NEXT:    retq # sched: [7:1.00]
    329 ;
    330 ; BTVER2-SSE-LABEL: test_andpd:
    331 ; BTVER2-SSE:       # %bb.0:
    332 ; BTVER2-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.50]
    333 ; BTVER2-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [6:1.00]
    334 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    335 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    336 ;
    337 ; BTVER2-LABEL: test_andpd:
    338 ; BTVER2:       # %bb.0:
    339 ; BTVER2-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    340 ; BTVER2-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    341 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    342 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    343 ;
    344 ; ZNVER1-SSE-LABEL: test_andpd:
    345 ; ZNVER1-SSE:       # %bb.0:
    346 ; ZNVER1-SSE-NEXT:    andpd %xmm1, %xmm0 # sched: [1:0.25]
    347 ; ZNVER1-SSE-NEXT:    andpd (%rdi), %xmm0 # sched: [8:0.50]
    348 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    349 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    350 ;
    351 ; ZNVER1-LABEL: test_andpd:
    352 ; ZNVER1:       # %bb.0:
    353 ; ZNVER1-NEXT:    vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
    354 ; ZNVER1-NEXT:    vandpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
    355 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    356 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    357   %1 = bitcast <2 x double> %a0 to <4 x i32>
    358   %2 = bitcast <2 x double> %a1 to <4 x i32>
    359   %3 = and <4 x i32> %1, %2
    360   %4 = load <2 x double>, <2 x double> *%a2, align 16
    361   %5 = bitcast <2 x double> %4 to <4 x i32>
    362   %6 = and <4 x i32> %3, %5
    363   %7 = bitcast <4 x i32> %6 to <2 x double>
    364   %8 = fadd <2 x double> %a1, %7
    365   ret <2 x double> %8
    366 }
    367 
    368 define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
    369 ; GENERIC-LABEL: test_andnotpd:
    370 ; GENERIC:       # %bb.0:
    371 ; GENERIC-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
    372 ; GENERIC-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:1.00]
    373 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    374 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    375 ;
    376 ; ATOM-LABEL: test_andnotpd:
    377 ; ATOM:       # %bb.0:
    378 ; ATOM-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.50]
    379 ; ATOM-NEXT:    andnpd (%rdi), %xmm0 # sched: [1:1.00]
    380 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
    381 ; ATOM-NEXT:    retq # sched: [79:39.50]
    382 ;
    383 ; SLM-LABEL: test_andnotpd:
    384 ; SLM:       # %bb.0:
    385 ; SLM-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.50]
    386 ; SLM-NEXT:    andnpd (%rdi), %xmm0 # sched: [4:1.00]
    387 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    388 ; SLM-NEXT:    retq # sched: [4:1.00]
    389 ;
    390 ; SANDY-SSE-LABEL: test_andnotpd:
    391 ; SANDY-SSE:       # %bb.0:
    392 ; SANDY-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
    393 ; SANDY-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:1.00]
    394 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    395 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    396 ;
    397 ; SANDY-LABEL: test_andnotpd:
    398 ; SANDY:       # %bb.0:
    399 ; SANDY-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    400 ; SANDY-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    401 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    402 ; SANDY-NEXT:    retq # sched: [1:1.00]
    403 ;
    404 ; HASWELL-SSE-LABEL: test_andnotpd:
    405 ; HASWELL-SSE:       # %bb.0:
    406 ; HASWELL-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
    407 ; HASWELL-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:1.00]
    408 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    409 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    410 ;
    411 ; HASWELL-LABEL: test_andnotpd:
    412 ; HASWELL:       # %bb.0:
    413 ; HASWELL-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    414 ; HASWELL-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    415 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    416 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    417 ;
    418 ; BROADWELL-SSE-LABEL: test_andnotpd:
    419 ; BROADWELL-SSE:       # %bb.0:
    420 ; BROADWELL-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:1.00]
    421 ; BROADWELL-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [6:1.00]
    422 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    423 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    424 ;
    425 ; BROADWELL-LABEL: test_andnotpd:
    426 ; BROADWELL:       # %bb.0:
    427 ; BROADWELL-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    428 ; BROADWELL-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    429 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    430 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    431 ;
    432 ; SKYLAKE-SSE-LABEL: test_andnotpd:
    433 ; SKYLAKE-SSE:       # %bb.0:
    434 ; SKYLAKE-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.33]
    435 ; SKYLAKE-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:0.50]
    436 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
    437 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    438 ;
    439 ; SKYLAKE-LABEL: test_andnotpd:
    440 ; SKYLAKE:       # %bb.0:
    441 ; SKYLAKE-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    442 ; SKYLAKE-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
    443 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
    444 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    445 ;
    446 ; SKX-SSE-LABEL: test_andnotpd:
    447 ; SKX-SSE:       # %bb.0:
    448 ; SKX-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.33]
    449 ; SKX-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [7:0.50]
    450 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
    451 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    452 ;
    453 ; SKX-LABEL: test_andnotpd:
    454 ; SKX:       # %bb.0:
    455 ; SKX-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    456 ; SKX-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
    457 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
    458 ; SKX-NEXT:    retq # sched: [7:1.00]
    459 ;
    460 ; BTVER2-SSE-LABEL: test_andnotpd:
    461 ; BTVER2-SSE:       # %bb.0:
    462 ; BTVER2-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.50]
    463 ; BTVER2-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [6:1.00]
    464 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    465 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    466 ;
    467 ; BTVER2-LABEL: test_andnotpd:
    468 ; BTVER2:       # %bb.0:
    469 ; BTVER2-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    470 ; BTVER2-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    471 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    472 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    473 ;
    474 ; ZNVER1-SSE-LABEL: test_andnotpd:
    475 ; ZNVER1-SSE:       # %bb.0:
    476 ; ZNVER1-SSE-NEXT:    andnpd %xmm1, %xmm0 # sched: [1:0.25]
    477 ; ZNVER1-SSE-NEXT:    andnpd (%rdi), %xmm0 # sched: [8:0.50]
    478 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
    479 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    480 ;
    481 ; ZNVER1-LABEL: test_andnotpd:
    482 ; ZNVER1:       # %bb.0:
    483 ; ZNVER1-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
    484 ; ZNVER1-NEXT:    vandnpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
    485 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    486 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    487   %1 = bitcast <2 x double> %a0 to <4 x i32>
    488   %2 = bitcast <2 x double> %a1 to <4 x i32>
    489   %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
    490   %4 = and <4 x i32> %3, %2
    491   %5 = load <2 x double>, <2 x double> *%a2, align 16
    492   %6 = bitcast <2 x double> %5 to <4 x i32>
    493   %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
    494   %8 = and <4 x i32> %6, %7
    495   %9 = bitcast <4 x i32> %8 to <2 x double>
    496   %10 = fadd <2 x double> %a1, %9
    497   ret <2 x double> %10
    498 }
    499 
    500 define void @test_clflush(i8* %p){
    501 ; GENERIC-LABEL: test_clflush:
    502 ; GENERIC:       # %bb.0:
    503 ; GENERIC-NEXT:    clflush (%rdi) # sched: [5:1.00]
    504 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    505 ;
    506 ; ATOM-LABEL: test_clflush:
    507 ; ATOM:       # %bb.0:
    508 ; ATOM-NEXT:    clflush (%rdi) # sched: [1:1.00]
    509 ; ATOM-NEXT:    nop # sched: [1:0.50]
    510 ; ATOM-NEXT:    nop # sched: [1:0.50]
    511 ; ATOM-NEXT:    nop # sched: [1:0.50]
    512 ; ATOM-NEXT:    nop # sched: [1:0.50]
    513 ; ATOM-NEXT:    nop # sched: [1:0.50]
    514 ; ATOM-NEXT:    nop # sched: [1:0.50]
    515 ; ATOM-NEXT:    retq # sched: [79:39.50]
    516 ;
    517 ; SLM-LABEL: test_clflush:
    518 ; SLM:       # %bb.0:
    519 ; SLM-NEXT:    clflush (%rdi) # sched: [3:1.00]
    520 ; SLM-NEXT:    retq # sched: [4:1.00]
    521 ;
    522 ; SANDY-SSE-LABEL: test_clflush:
    523 ; SANDY-SSE:       # %bb.0:
    524 ; SANDY-SSE-NEXT:    clflush (%rdi) # sched: [5:1.00]
    525 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    526 ;
    527 ; SANDY-LABEL: test_clflush:
    528 ; SANDY:       # %bb.0:
    529 ; SANDY-NEXT:    clflush (%rdi) # sched: [5:1.00]
    530 ; SANDY-NEXT:    retq # sched: [1:1.00]
    531 ;
    532 ; HASWELL-SSE-LABEL: test_clflush:
    533 ; HASWELL-SSE:       # %bb.0:
    534 ; HASWELL-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
    535 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    536 ;
    537 ; HASWELL-LABEL: test_clflush:
    538 ; HASWELL:       # %bb.0:
    539 ; HASWELL-NEXT:    clflush (%rdi) # sched: [2:1.00]
    540 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    541 ;
    542 ; BROADWELL-SSE-LABEL: test_clflush:
    543 ; BROADWELL-SSE:       # %bb.0:
    544 ; BROADWELL-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
    545 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    546 ;
    547 ; BROADWELL-LABEL: test_clflush:
    548 ; BROADWELL:       # %bb.0:
    549 ; BROADWELL-NEXT:    clflush (%rdi) # sched: [2:1.00]
    550 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    551 ;
    552 ; SKYLAKE-SSE-LABEL: test_clflush:
    553 ; SKYLAKE-SSE:       # %bb.0:
    554 ; SKYLAKE-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
    555 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    556 ;
    557 ; SKYLAKE-LABEL: test_clflush:
    558 ; SKYLAKE:       # %bb.0:
    559 ; SKYLAKE-NEXT:    clflush (%rdi) # sched: [2:1.00]
    560 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    561 ;
    562 ; SKX-SSE-LABEL: test_clflush:
    563 ; SKX-SSE:       # %bb.0:
    564 ; SKX-SSE-NEXT:    clflush (%rdi) # sched: [2:1.00]
    565 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    566 ;
    567 ; SKX-LABEL: test_clflush:
    568 ; SKX:       # %bb.0:
    569 ; SKX-NEXT:    clflush (%rdi) # sched: [2:1.00]
    570 ; SKX-NEXT:    retq # sched: [7:1.00]
    571 ;
    572 ; BTVER2-SSE-LABEL: test_clflush:
    573 ; BTVER2-SSE:       # %bb.0:
    574 ; BTVER2-SSE-NEXT:    clflush (%rdi) # sched: [5:1.00]
    575 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    576 ;
    577 ; BTVER2-LABEL: test_clflush:
    578 ; BTVER2:       # %bb.0:
    579 ; BTVER2-NEXT:    clflush (%rdi) # sched: [5:1.00]
    580 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    581 ;
    582 ; ZNVER1-SSE-LABEL: test_clflush:
    583 ; ZNVER1-SSE:       # %bb.0:
    584 ; ZNVER1-SSE-NEXT:    clflush (%rdi) # sched: [8:0.50]
    585 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    586 ;
    587 ; ZNVER1-LABEL: test_clflush:
    588 ; ZNVER1:       # %bb.0:
    589 ; ZNVER1-NEXT:    clflush (%rdi) # sched: [8:0.50]
    590 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    591   tail call void @llvm.x86.sse2.clflush(i8* %p)
    592   ret void
    593 }
    594 declare void @llvm.x86.sse2.clflush(i8*) nounwind
    595 
    596 define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
    597 ; GENERIC-LABEL: test_cmppd:
    598 ; GENERIC:       # %bb.0:
    599 ; GENERIC-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
    600 ; GENERIC-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
    601 ; GENERIC-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
    602 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    603 ;
    604 ; ATOM-LABEL: test_cmppd:
    605 ; ATOM:       # %bb.0:
    606 ; ATOM-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [6:3.00]
    607 ; ATOM-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [7:3.50]
    608 ; ATOM-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
    609 ; ATOM-NEXT:    retq # sched: [79:39.50]
    610 ;
    611 ; SLM-LABEL: test_cmppd:
    612 ; SLM:       # %bb.0:
    613 ; SLM-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
    614 ; SLM-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [6:1.00]
    615 ; SLM-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
    616 ; SLM-NEXT:    retq # sched: [4:1.00]
    617 ;
    618 ; SANDY-SSE-LABEL: test_cmppd:
    619 ; SANDY-SSE:       # %bb.0:
    620 ; SANDY-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
    621 ; SANDY-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
    622 ; SANDY-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
    623 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    624 ;
    625 ; SANDY-LABEL: test_cmppd:
    626 ; SANDY:       # %bb.0:
    627 ; SANDY-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
    628 ; SANDY-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    629 ; SANDY-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
    630 ; SANDY-NEXT:    retq # sched: [1:1.00]
    631 ;
    632 ; HASWELL-SSE-LABEL: test_cmppd:
    633 ; HASWELL-SSE:       # %bb.0:
    634 ; HASWELL-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
    635 ; HASWELL-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
    636 ; HASWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
    637 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    638 ;
    639 ; HASWELL-LABEL: test_cmppd:
    640 ; HASWELL:       # %bb.0:
    641 ; HASWELL-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
    642 ; HASWELL-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    643 ; HASWELL-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
    644 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    645 ;
    646 ; BROADWELL-SSE-LABEL: test_cmppd:
    647 ; BROADWELL-SSE:       # %bb.0:
    648 ; BROADWELL-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
    649 ; BROADWELL-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [8:1.00]
    650 ; BROADWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
    651 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    652 ;
    653 ; BROADWELL-LABEL: test_cmppd:
    654 ; BROADWELL:       # %bb.0:
    655 ; BROADWELL-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
    656 ; BROADWELL-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    657 ; BROADWELL-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
    658 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    659 ;
    660 ; SKYLAKE-SSE-LABEL: test_cmppd:
    661 ; SKYLAKE-SSE:       # %bb.0:
    662 ; SKYLAKE-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
    663 ; SKYLAKE-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
    664 ; SKYLAKE-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
    665 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    666 ;
    667 ; SKYLAKE-LABEL: test_cmppd:
    668 ; SKYLAKE:       # %bb.0:
    669 ; SKYLAKE-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
    670 ; SKYLAKE-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    671 ; SKYLAKE-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
    672 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    673 ;
    674 ; SKX-SSE-LABEL: test_cmppd:
    675 ; SKX-SSE:       # %bb.0:
    676 ; SKX-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
    677 ; SKX-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
    678 ; SKX-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
    679 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    680 ;
    681 ; SKX-LABEL: test_cmppd:
    682 ; SKX:       # %bb.0:
    683 ; SKX-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
    684 ; SKX-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    685 ; SKX-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
    686 ; SKX-NEXT:    retq # sched: [7:1.00]
    687 ;
    688 ; BTVER2-SSE-LABEL: test_cmppd:
    689 ; BTVER2-SSE:       # %bb.0:
    690 ; BTVER2-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
    691 ; BTVER2-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
    692 ; BTVER2-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
    693 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    694 ;
    695 ; BTVER2-LABEL: test_cmppd:
    696 ; BTVER2:       # %bb.0:
    697 ; BTVER2-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
    698 ; BTVER2-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    699 ; BTVER2-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
    700 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    701 ;
    702 ; ZNVER1-SSE-LABEL: test_cmppd:
    703 ; ZNVER1-SSE:       # %bb.0:
    704 ; ZNVER1-SSE-NEXT:    cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
    705 ; ZNVER1-SSE-NEXT:    cmpeqpd (%rdi), %xmm0 # sched: [10:1.00]
    706 ; ZNVER1-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.25]
    707 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    708 ;
    709 ; ZNVER1-LABEL: test_cmppd:
    710 ; ZNVER1:       # %bb.0:
    711 ; ZNVER1-NEXT:    vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
    712 ; ZNVER1-NEXT:    vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    713 ; ZNVER1-NEXT:    vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
    714 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    715   %1 = fcmp oeq <2 x double> %a0, %a1
    716   %2 = load <2 x double>, <2 x double> *%a2, align 16
    717   %3 = fcmp oeq <2 x double> %a0, %2
    718   %4 = or <2 x i1> %1, %3
    719   %5 = sext <2 x i1> %4 to <2 x i64>
    720   %6 = bitcast <2 x i64> %5 to <2 x double>
    721   ret <2 x double> %6
    722 }
    723 
    724 define double @test_cmpsd(double %a0, double %a1, double *%a2) {
    725 ; GENERIC-LABEL: test_cmpsd:
    726 ; GENERIC:       # %bb.0:
    727 ; GENERIC-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
    728 ; GENERIC-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
    729 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    730 ;
    731 ; ATOM-LABEL: test_cmpsd:
    732 ; ATOM:       # %bb.0:
    733 ; ATOM-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [5:5.00]
    734 ; ATOM-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [5:5.00]
    735 ; ATOM-NEXT:    retq # sched: [79:39.50]
    736 ;
    737 ; SLM-LABEL: test_cmpsd:
    738 ; SLM:       # %bb.0:
    739 ; SLM-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
    740 ; SLM-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [6:1.00]
    741 ; SLM-NEXT:    retq # sched: [4:1.00]
    742 ;
    743 ; SANDY-SSE-LABEL: test_cmpsd:
    744 ; SANDY-SSE:       # %bb.0:
    745 ; SANDY-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
    746 ; SANDY-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
    747 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    748 ;
    749 ; SANDY-LABEL: test_cmpsd:
    750 ; SANDY:       # %bb.0:
    751 ; SANDY-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    752 ; SANDY-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    753 ; SANDY-NEXT:    retq # sched: [1:1.00]
    754 ;
    755 ; HASWELL-SSE-LABEL: test_cmpsd:
    756 ; HASWELL-SSE:       # %bb.0:
    757 ; HASWELL-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
    758 ; HASWELL-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
    759 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    760 ;
    761 ; HASWELL-LABEL: test_cmpsd:
    762 ; HASWELL:       # %bb.0:
    763 ; HASWELL-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    764 ; HASWELL-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    765 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    766 ;
    767 ; BROADWELL-SSE-LABEL: test_cmpsd:
    768 ; BROADWELL-SSE:       # %bb.0:
    769 ; BROADWELL-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
    770 ; BROADWELL-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
    771 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    772 ;
    773 ; BROADWELL-LABEL: test_cmpsd:
    774 ; BROADWELL:       # %bb.0:
    775 ; BROADWELL-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    776 ; BROADWELL-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    777 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    778 ;
    779 ; SKYLAKE-SSE-LABEL: test_cmpsd:
    780 ; SKYLAKE-SSE:       # %bb.0:
    781 ; SKYLAKE-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
    782 ; SKYLAKE-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
    783 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    784 ;
    785 ; SKYLAKE-LABEL: test_cmpsd:
    786 ; SKYLAKE:       # %bb.0:
    787 ; SKYLAKE-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    788 ; SKYLAKE-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
    789 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    790 ;
    791 ; SKX-SSE-LABEL: test_cmpsd:
    792 ; SKX-SSE:       # %bb.0:
    793 ; SKX-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
    794 ; SKX-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
    795 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    796 ;
    797 ; SKX-LABEL: test_cmpsd:
    798 ; SKX:       # %bb.0:
    799 ; SKX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    800 ; SKX-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
    801 ; SKX-NEXT:    retq # sched: [7:1.00]
    802 ;
    803 ; BTVER2-SSE-LABEL: test_cmpsd:
    804 ; BTVER2-SSE:       # %bb.0:
    805 ; BTVER2-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
    806 ; BTVER2-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
    807 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    808 ;
    809 ; BTVER2-LABEL: test_cmpsd:
    810 ; BTVER2:       # %bb.0:
    811 ; BTVER2-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
    812 ; BTVER2-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    813 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    814 ;
    815 ; ZNVER1-SSE-LABEL: test_cmpsd:
    816 ; ZNVER1-SSE:       # %bb.0:
    817 ; ZNVER1-SSE-NEXT:    cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
    818 ; ZNVER1-SSE-NEXT:    cmpeqsd (%rdi), %xmm0 # sched: [10:1.00]
    819 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    820 ;
    821 ; ZNVER1-LABEL: test_cmpsd:
    822 ; ZNVER1:       # %bb.0:
    823 ; ZNVER1-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    824 ; ZNVER1-NEXT:    vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    825 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    826   %1 = insertelement <2 x double> undef, double %a0, i32 0
    827   %2 = insertelement <2 x double> undef, double %a1, i32 0
    828   %3 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %2, i8 0)
    829   %4 = load double, double *%a2, align 8
    830   %5 = insertelement <2 x double> undef, double %4, i32 0
    831   %6 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %3, <2 x double> %5, i8 0)
    832   %7 = extractelement <2 x double> %6, i32 0
    833   ret double %7
    834 }
    835 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
    836 
    837 define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
    838 ; GENERIC-LABEL: test_comisd:
    839 ; GENERIC:       # %bb.0:
    840 ; GENERIC-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
    841 ; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
    842 ; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
    843 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
    844 ; GENERIC-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
    845 ; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
    846 ; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
    847 ; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
    848 ; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
    849 ; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
    850 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    851 ;
    852 ; ATOM-LABEL: test_comisd:
    853 ; ATOM:       # %bb.0:
    854 ; ATOM-NEXT:    comisd %xmm1, %xmm0 # sched: [9:4.50]
    855 ; ATOM-NEXT:    setnp %al # sched: [1:0.50]
    856 ; ATOM-NEXT:    sete %cl # sched: [1:0.50]
    857 ; ATOM-NEXT:    andb %al, %cl # sched: [1:0.50]
    858 ; ATOM-NEXT:    comisd (%rdi), %xmm0 # sched: [10:5.00]
    859 ; ATOM-NEXT:    setnp %al # sched: [1:0.50]
    860 ; ATOM-NEXT:    sete %dl # sched: [1:0.50]
    861 ; ATOM-NEXT:    andb %al, %dl # sched: [1:0.50]
    862 ; ATOM-NEXT:    orb %cl, %dl # sched: [1:0.50]
    863 ; ATOM-NEXT:    movzbl %dl, %eax # sched: [1:1.00]
    864 ; ATOM-NEXT:    retq # sched: [79:39.50]
    865 ;
    866 ; SLM-LABEL: test_comisd:
    867 ; SLM:       # %bb.0:
    868 ; SLM-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
    869 ; SLM-NEXT:    setnp %al # sched: [1:0.50]
    870 ; SLM-NEXT:    sete %cl # sched: [1:0.50]
    871 ; SLM-NEXT:    andb %al, %cl # sched: [1:0.50]
    872 ; SLM-NEXT:    comisd (%rdi), %xmm0 # sched: [6:1.00]
    873 ; SLM-NEXT:    setnp %al # sched: [1:0.50]
    874 ; SLM-NEXT:    sete %dl # sched: [1:0.50]
    875 ; SLM-NEXT:    andb %al, %dl # sched: [1:0.50]
    876 ; SLM-NEXT:    orb %cl, %dl # sched: [1:0.50]
    877 ; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
    878 ; SLM-NEXT:    retq # sched: [4:1.00]
    879 ;
    880 ; SANDY-SSE-LABEL: test_comisd:
    881 ; SANDY-SSE:       # %bb.0:
    882 ; SANDY-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
    883 ; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
    884 ; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
    885 ; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
    886 ; SANDY-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
    887 ; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
    888 ; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
    889 ; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
    890 ; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
    891 ; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
    892 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    893 ;
    894 ; SANDY-LABEL: test_comisd:
    895 ; SANDY:       # %bb.0:
    896 ; SANDY-NEXT:    vcomisd %xmm1, %xmm0 # sched: [2:1.00]
    897 ; SANDY-NEXT:    setnp %al # sched: [1:0.50]
    898 ; SANDY-NEXT:    sete %cl # sched: [1:0.50]
    899 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
    900 ; SANDY-NEXT:    vcomisd (%rdi), %xmm0 # sched: [8:1.00]
    901 ; SANDY-NEXT:    setnp %al # sched: [1:0.50]
    902 ; SANDY-NEXT:    sete %dl # sched: [1:0.50]
    903 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
    904 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
    905 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
    906 ; SANDY-NEXT:    retq # sched: [1:1.00]
    907 ;
    908 ; HASWELL-SSE-LABEL: test_comisd:
    909 ; HASWELL-SSE:       # %bb.0:
    910 ; HASWELL-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
    911 ; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
    912 ; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
    913 ; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
    914 ; HASWELL-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
    915 ; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
    916 ; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
    917 ; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
    918 ; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    919 ; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    920 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    921 ;
    922 ; HASWELL-LABEL: test_comisd:
    923 ; HASWELL:       # %bb.0:
    924 ; HASWELL-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
    925 ; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
    926 ; HASWELL-NEXT:    sete %cl # sched: [1:0.50]
    927 ; HASWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
    928 ; HASWELL-NEXT:    vcomisd (%rdi), %xmm0 # sched: [8:1.00]
    929 ; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
    930 ; HASWELL-NEXT:    sete %dl # sched: [1:0.50]
    931 ; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
    932 ; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
    933 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    934 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    935 ;
    936 ; BROADWELL-SSE-LABEL: test_comisd:
    937 ; BROADWELL-SSE:       # %bb.0:
    938 ; BROADWELL-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
    939 ; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
    940 ; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
    941 ; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
    942 ; BROADWELL-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
    943 ; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
    944 ; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
    945 ; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
    946 ; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    947 ; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    948 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    949 ;
    950 ; BROADWELL-LABEL: test_comisd:
    951 ; BROADWELL:       # %bb.0:
    952 ; BROADWELL-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
    953 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
    954 ; BROADWELL-NEXT:    sete %cl # sched: [1:0.50]
    955 ; BROADWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
    956 ; BROADWELL-NEXT:    vcomisd (%rdi), %xmm0 # sched: [8:1.00]
    957 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
    958 ; BROADWELL-NEXT:    sete %dl # sched: [1:0.50]
    959 ; BROADWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
    960 ; BROADWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
    961 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    962 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    963 ;
    964 ; SKYLAKE-SSE-LABEL: test_comisd:
    965 ; SKYLAKE-SSE:       # %bb.0:
    966 ; SKYLAKE-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
    967 ; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
    968 ; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
    969 ; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
    970 ; SKYLAKE-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [7:1.00]
    971 ; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
    972 ; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
    973 ; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
    974 ; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    975 ; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    976 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    977 ;
    978 ; SKYLAKE-LABEL: test_comisd:
    979 ; SKYLAKE:       # %bb.0:
    980 ; SKYLAKE-NEXT:    vcomisd %xmm1, %xmm0 # sched: [2:1.00]
    981 ; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
    982 ; SKYLAKE-NEXT:    sete %cl # sched: [1:0.50]
    983 ; SKYLAKE-NEXT:    andb %al, %cl # sched: [1:0.25]
    984 ; SKYLAKE-NEXT:    vcomisd (%rdi), %xmm0 # sched: [7:1.00]
    985 ; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
    986 ; SKYLAKE-NEXT:    sete %dl # sched: [1:0.50]
    987 ; SKYLAKE-NEXT:    andb %al, %dl # sched: [1:0.25]
    988 ; SKYLAKE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    989 ; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    990 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    991 ;
    992 ; SKX-SSE-LABEL: test_comisd:
    993 ; SKX-SSE:       # %bb.0:
    994 ; SKX-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [2:1.00]
    995 ; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
    996 ; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
    997 ; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
    998 ; SKX-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [7:1.00]
    999 ; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
   1000 ; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
   1001 ; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   1002 ; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   1003 ; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   1004 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1005 ;
   1006 ; SKX-LABEL: test_comisd:
   1007 ; SKX:       # %bb.0:
   1008 ; SKX-NEXT:    vcomisd %xmm1, %xmm0 # sched: [2:1.00]
   1009 ; SKX-NEXT:    setnp %al # sched: [1:0.50]
   1010 ; SKX-NEXT:    sete %cl # sched: [1:0.50]
   1011 ; SKX-NEXT:    andb %al, %cl # sched: [1:0.25]
   1012 ; SKX-NEXT:    vcomisd (%rdi), %xmm0 # sched: [7:1.00]
   1013 ; SKX-NEXT:    setnp %al # sched: [1:0.50]
   1014 ; SKX-NEXT:    sete %dl # sched: [1:0.50]
   1015 ; SKX-NEXT:    andb %al, %dl # sched: [1:0.25]
   1016 ; SKX-NEXT:    orb %cl, %dl # sched: [1:0.25]
   1017 ; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   1018 ; SKX-NEXT:    retq # sched: [7:1.00]
   1019 ;
   1020 ; BTVER2-SSE-LABEL: test_comisd:
   1021 ; BTVER2-SSE:       # %bb.0:
   1022 ; BTVER2-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
   1023 ; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
   1024 ; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
   1025 ; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
   1026 ; BTVER2-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [8:1.00]
   1027 ; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
   1028 ; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
   1029 ; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
   1030 ; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
   1031 ; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
   1032 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1033 ;
   1034 ; BTVER2-LABEL: test_comisd:
   1035 ; BTVER2:       # %bb.0:
   1036 ; BTVER2-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
   1037 ; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
   1038 ; BTVER2-NEXT:    sete %cl # sched: [1:0.50]
   1039 ; BTVER2-NEXT:    andb %al, %cl # sched: [1:0.50]
   1040 ; BTVER2-NEXT:    vcomisd (%rdi), %xmm0 # sched: [8:1.00]
   1041 ; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
   1042 ; BTVER2-NEXT:    sete %dl # sched: [1:0.50]
   1043 ; BTVER2-NEXT:    andb %al, %dl # sched: [1:0.50]
   1044 ; BTVER2-NEXT:    orb %cl, %dl # sched: [1:0.50]
   1045 ; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
   1046 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1047 ;
   1048 ; ZNVER1-SSE-LABEL: test_comisd:
   1049 ; ZNVER1-SSE:       # %bb.0:
   1050 ; ZNVER1-SSE-NEXT:    comisd %xmm1, %xmm0 # sched: [3:1.00]
   1051 ; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
   1052 ; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
   1053 ; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   1054 ; ZNVER1-SSE-NEXT:    comisd (%rdi), %xmm0 # sched: [10:1.00]
   1055 ; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
   1056 ; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
   1057 ; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   1058 ; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   1059 ; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   1060 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1061 ;
   1062 ; ZNVER1-LABEL: test_comisd:
   1063 ; ZNVER1:       # %bb.0:
   1064 ; ZNVER1-NEXT:    vcomisd %xmm1, %xmm0 # sched: [3:1.00]
   1065 ; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
   1066 ; ZNVER1-NEXT:    sete %cl # sched: [1:0.25]
   1067 ; ZNVER1-NEXT:    andb %al, %cl # sched: [1:0.25]
   1068 ; ZNVER1-NEXT:    vcomisd (%rdi), %xmm0 # sched: [10:1.00]
   1069 ; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
   1070 ; ZNVER1-NEXT:    sete %dl # sched: [1:0.25]
   1071 ; ZNVER1-NEXT:    andb %al, %dl # sched: [1:0.25]
   1072 ; ZNVER1-NEXT:    orb %cl, %dl # sched: [1:0.25]
   1073 ; ZNVER1-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   1074 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1075   %1 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
   1076   %2 = load <2 x double>, <2 x double> *%a2, align 8
   1077   %3 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %2)
   1078   %4 = or i32 %1, %3
   1079   ret i32 %4
   1080 }
   1081 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
   1082 
   1083 define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
   1084 ; GENERIC-LABEL: test_cvtdq2pd:
   1085 ; GENERIC:       # %bb.0:
   1086 ; GENERIC-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
   1087 ; GENERIC-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
   1088 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1089 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1090 ;
   1091 ; ATOM-LABEL: test_cvtdq2pd:
   1092 ; ATOM:       # %bb.0:
   1093 ; ATOM-NEXT:    cvtdq2pd (%rdi), %xmm1 # sched: [8:4.00]
   1094 ; ATOM-NEXT:    cvtdq2pd %xmm0, %xmm0 # sched: [7:3.50]
   1095 ; ATOM-NEXT:    addpd %xmm0, %xmm1 # sched: [6:3.00]
   1096 ; ATOM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
   1097 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1098 ;
   1099 ; SLM-LABEL: test_cvtdq2pd:
   1100 ; SLM:       # %bb.0:
   1101 ; SLM-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
   1102 ; SLM-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [7:1.00]
   1103 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1104 ; SLM-NEXT:    retq # sched: [4:1.00]
   1105 ;
   1106 ; SANDY-SSE-LABEL: test_cvtdq2pd:
   1107 ; SANDY-SSE:       # %bb.0:
   1108 ; SANDY-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
   1109 ; SANDY-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
   1110 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1111 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1112 ;
   1113 ; SANDY-LABEL: test_cvtdq2pd:
   1114 ; SANDY:       # %bb.0:
   1115 ; SANDY-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
   1116 ; SANDY-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
   1117 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1118 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1119 ;
   1120 ; HASWELL-SSE-LABEL: test_cvtdq2pd:
   1121 ; HASWELL-SSE:       # %bb.0:
   1122 ; HASWELL-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
   1123 ; HASWELL-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
   1124 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1125 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1126 ;
   1127 ; HASWELL-LABEL: test_cvtdq2pd:
   1128 ; HASWELL:       # %bb.0:
   1129 ; HASWELL-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
   1130 ; HASWELL-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
   1131 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1132 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1133 ;
   1134 ; BROADWELL-SSE-LABEL: test_cvtdq2pd:
   1135 ; BROADWELL-SSE:       # %bb.0:
   1136 ; BROADWELL-SSE-NEXT:    cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
   1137 ; BROADWELL-SSE-NEXT:    cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
   1138 ; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   1139 ; BROADWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
   1140 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1141 ;
   1142 ; BROADWELL-LABEL: test_cvtdq2pd:
   1143 ; BROADWELL:       # %bb.0:
   1144 ; BROADWELL-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
   1145 ; BROADWELL-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
   1146 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1147 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1148 ;
   1149 ; SKYLAKE-SSE-LABEL: test_cvtdq2pd:
   1150 ; SKYLAKE-SSE:       # %bb.0:
   1151 ; SKYLAKE-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
   1152 ; SKYLAKE-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
   1153 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   1154 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1155 ;
   1156 ; SKYLAKE-LABEL: test_cvtdq2pd:
   1157 ; SKYLAKE:       # %bb.0:
   1158 ; SKYLAKE-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
   1159 ; SKYLAKE-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
   1160 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1161 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1162 ;
   1163 ; SKX-SSE-LABEL: test_cvtdq2pd:
   1164 ; SKX-SSE:       # %bb.0:
   1165 ; SKX-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
   1166 ; SKX-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
   1167 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   1168 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1169 ;
   1170 ; SKX-LABEL: test_cvtdq2pd:
   1171 ; SKX:       # %bb.0:
   1172 ; SKX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
   1173 ; SKX-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
   1174 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1175 ; SKX-NEXT:    retq # sched: [7:1.00]
   1176 ;
   1177 ; BTVER2-SSE-LABEL: test_cvtdq2pd:
   1178 ; BTVER2-SSE:       # %bb.0:
   1179 ; BTVER2-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00]
   1180 ; BTVER2-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00]
   1181 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1182 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1183 ;
   1184 ; BTVER2-LABEL: test_cvtdq2pd:
   1185 ; BTVER2:       # %bb.0:
   1186 ; BTVER2-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
   1187 ; BTVER2-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [3:1.00]
   1188 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1189 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1190 ;
   1191 ; ZNVER1-SSE-LABEL: test_cvtdq2pd:
   1192 ; ZNVER1-SSE:       # %bb.0:
   1193 ; ZNVER1-SSE-NEXT:    cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
   1194 ; ZNVER1-SSE-NEXT:    cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00]
   1195 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1196 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1197 ;
   1198 ; ZNVER1-LABEL: test_cvtdq2pd:
   1199 ; ZNVER1:       # %bb.0:
   1200 ; ZNVER1-NEXT:    vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00]
   1201 ; ZNVER1-NEXT:    vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
   1202 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1203 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1204   %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
   1205   %2 = sitofp <2 x i32> %1 to <2 x double>
   1206   %3 = load <4 x i32>, <4 x i32>*%a1, align 16
   1207   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
   1208   %5 = sitofp <2 x i32> %4 to <2 x double>
   1209   %6 = fadd <2 x double> %2, %5
   1210   ret <2 x double> %6
   1211 }
   1212 
   1213 define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
   1214 ; GENERIC-LABEL: test_cvtdq2ps:
   1215 ; GENERIC:       # %bb.0:
   1216 ; GENERIC-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
   1217 ; GENERIC-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
   1218 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1219 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1220 ;
   1221 ; ATOM-LABEL: test_cvtdq2ps:
   1222 ; ATOM:       # %bb.0:
   1223 ; ATOM-NEXT:    cvtdq2ps (%rdi), %xmm1 # sched: [7:3.50]
   1224 ; ATOM-NEXT:    cvtdq2ps %xmm0, %xmm0 # sched: [6:3.00]
   1225 ; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
   1226 ; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   1227 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1228 ;
   1229 ; SLM-LABEL: test_cvtdq2ps:
   1230 ; SLM:       # %bb.0:
   1231 ; SLM-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
   1232 ; SLM-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [7:1.00]
   1233 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1234 ; SLM-NEXT:    retq # sched: [4:1.00]
   1235 ;
   1236 ; SANDY-SSE-LABEL: test_cvtdq2ps:
   1237 ; SANDY-SSE:       # %bb.0:
   1238 ; SANDY-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
   1239 ; SANDY-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
   1240 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1241 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1242 ;
   1243 ; SANDY-LABEL: test_cvtdq2ps:
   1244 ; SANDY:       # %bb.0:
   1245 ; SANDY-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
   1246 ; SANDY-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
   1247 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1248 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1249 ;
   1250 ; HASWELL-SSE-LABEL: test_cvtdq2ps:
   1251 ; HASWELL-SSE:       # %bb.0:
   1252 ; HASWELL-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
   1253 ; HASWELL-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
   1254 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1255 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1256 ;
   1257 ; HASWELL-LABEL: test_cvtdq2ps:
   1258 ; HASWELL:       # %bb.0:
   1259 ; HASWELL-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
   1260 ; HASWELL-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
   1261 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1262 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1263 ;
   1264 ; BROADWELL-SSE-LABEL: test_cvtdq2ps:
   1265 ; BROADWELL-SSE:       # %bb.0:
   1266 ; BROADWELL-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
   1267 ; BROADWELL-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
   1268 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1269 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1270 ;
   1271 ; BROADWELL-LABEL: test_cvtdq2ps:
   1272 ; BROADWELL:       # %bb.0:
   1273 ; BROADWELL-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
   1274 ; BROADWELL-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
   1275 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1276 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1277 ;
   1278 ; SKYLAKE-SSE-LABEL: test_cvtdq2ps:
   1279 ; SKYLAKE-SSE:       # %bb.0:
   1280 ; SKYLAKE-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
   1281 ; SKYLAKE-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
   1282 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   1283 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1284 ;
   1285 ; SKYLAKE-LABEL: test_cvtdq2ps:
   1286 ; SKYLAKE:       # %bb.0:
   1287 ; SKYLAKE-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
   1288 ; SKYLAKE-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
   1289 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1290 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1291 ;
   1292 ; SKX-SSE-LABEL: test_cvtdq2ps:
   1293 ; SKX-SSE:       # %bb.0:
   1294 ; SKX-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
   1295 ; SKX-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
   1296 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   1297 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1298 ;
   1299 ; SKX-LABEL: test_cvtdq2ps:
   1300 ; SKX:       # %bb.0:
   1301 ; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
   1302 ; SKX-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
   1303 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1304 ; SKX-NEXT:    retq # sched: [7:1.00]
   1305 ;
   1306 ; BTVER2-SSE-LABEL: test_cvtdq2ps:
   1307 ; BTVER2-SSE:       # %bb.0:
   1308 ; BTVER2-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
   1309 ; BTVER2-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
   1310 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1311 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1312 ;
   1313 ; BTVER2-LABEL: test_cvtdq2ps:
   1314 ; BTVER2:       # %bb.0:
   1315 ; BTVER2-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
   1316 ; BTVER2-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
   1317 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1318 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1319 ;
   1320 ; ZNVER1-SSE-LABEL: test_cvtdq2ps:
   1321 ; ZNVER1-SSE:       # %bb.0:
   1322 ; ZNVER1-SSE-NEXT:    cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00]
   1323 ; ZNVER1-SSE-NEXT:    cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00]
   1324 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1325 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1326 ;
   1327 ; ZNVER1-LABEL: test_cvtdq2ps:
   1328 ; ZNVER1:       # %bb.0:
   1329 ; ZNVER1-NEXT:    vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00]
   1330 ; ZNVER1-NEXT:    vcvtdq2ps %xmm0, %xmm0 # sched: [5:1.00]
   1331 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1332 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1333   %1 = sitofp <4 x i32> %a0 to <4 x float>
   1334   %2 = load <4 x i32>, <4 x i32>*%a1, align 16
   1335   %3 = sitofp <4 x i32> %2 to <4 x float>
   1336   %4 = fadd <4 x float> %1, %3
   1337   ret <4 x float> %4
   1338 }
   1339 
   1340 define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
   1341 ; GENERIC-LABEL: test_cvtpd2dq:
   1342 ; GENERIC:       # %bb.0:
   1343 ; GENERIC-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
   1344 ; GENERIC-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
   1345 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1346 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1347 ;
   1348 ; ATOM-LABEL: test_cvtpd2dq:
   1349 ; ATOM:       # %bb.0:
   1350 ; ATOM-NEXT:    cvtpd2dq (%rdi), %xmm1 # sched: [8:4.00]
   1351 ; ATOM-NEXT:    cvtpd2dq %xmm0, %xmm0 # sched: [7:3.50]
   1352 ; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   1353 ; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   1354 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1355 ;
   1356 ; SLM-LABEL: test_cvtpd2dq:
   1357 ; SLM:       # %bb.0:
   1358 ; SLM-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:0.50]
   1359 ; SLM-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [7:1.00]
   1360 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1361 ; SLM-NEXT:    retq # sched: [4:1.00]
   1362 ;
   1363 ; SANDY-SSE-LABEL: test_cvtpd2dq:
   1364 ; SANDY-SSE:       # %bb.0:
   1365 ; SANDY-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
   1366 ; SANDY-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
   1367 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1368 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1369 ;
   1370 ; SANDY-LABEL: test_cvtpd2dq:
   1371 ; SANDY:       # %bb.0:
   1372 ; SANDY-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
   1373 ; SANDY-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
   1374 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1375 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1376 ;
   1377 ; HASWELL-SSE-LABEL: test_cvtpd2dq:
   1378 ; HASWELL-SSE:       # %bb.0:
   1379 ; HASWELL-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
   1380 ; HASWELL-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
   1381 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1382 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1383 ;
   1384 ; HASWELL-LABEL: test_cvtpd2dq:
   1385 ; HASWELL:       # %bb.0:
   1386 ; HASWELL-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
   1387 ; HASWELL-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
   1388 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1389 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1390 ;
   1391 ; BROADWELL-SSE-LABEL: test_cvtpd2dq:
   1392 ; BROADWELL-SSE:       # %bb.0:
   1393 ; BROADWELL-SSE-NEXT:    cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00]
   1394 ; BROADWELL-SSE-NEXT:    cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
   1395 ; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   1396 ; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
   1397 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1398 ;
   1399 ; BROADWELL-LABEL: test_cvtpd2dq:
   1400 ; BROADWELL:       # %bb.0:
   1401 ; BROADWELL-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
   1402 ; BROADWELL-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
   1403 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1404 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1405 ;
   1406 ; SKYLAKE-SSE-LABEL: test_cvtpd2dq:
   1407 ; SKYLAKE-SSE:       # %bb.0:
   1408 ; SKYLAKE-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
   1409 ; SKYLAKE-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
   1410 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   1411 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1412 ;
   1413 ; SKYLAKE-LABEL: test_cvtpd2dq:
   1414 ; SKYLAKE:       # %bb.0:
   1415 ; SKYLAKE-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
   1416 ; SKYLAKE-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
   1417 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   1418 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1419 ;
   1420 ; SKX-SSE-LABEL: test_cvtpd2dq:
   1421 ; SKX-SSE:       # %bb.0:
   1422 ; SKX-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
   1423 ; SKX-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
   1424 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   1425 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1426 ;
   1427 ; SKX-LABEL: test_cvtpd2dq:
   1428 ; SKX:       # %bb.0:
   1429 ; SKX-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
   1430 ; SKX-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
   1431 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   1432 ; SKX-NEXT:    retq # sched: [7:1.00]
   1433 ;
   1434 ; BTVER2-SSE-LABEL: test_cvtpd2dq:
   1435 ; BTVER2-SSE:       # %bb.0:
   1436 ; BTVER2-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00]
   1437 ; BTVER2-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00]
   1438 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1439 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1440 ;
   1441 ; BTVER2-LABEL: test_cvtpd2dq:
   1442 ; BTVER2:       # %bb.0:
   1443 ; BTVER2-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
   1444 ; BTVER2-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00]
   1445 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1446 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1447 ;
   1448 ; ZNVER1-SSE-LABEL: test_cvtpd2dq:
   1449 ; ZNVER1-SSE:       # %bb.0:
   1450 ; ZNVER1-SSE-NEXT:    cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
   1451 ; ZNVER1-SSE-NEXT:    cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00]
   1452 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   1453 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1454 ;
   1455 ; ZNVER1-LABEL: test_cvtpd2dq:
   1456 ; ZNVER1:       # %bb.0:
   1457 ; ZNVER1-NEXT:    vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
   1458 ; ZNVER1-NEXT:    vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
   1459 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   1460 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1461   %1 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
   1462   %2 = load <2 x double>, <2 x double> *%a1, align 16
   1463   %3 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %2)
   1464   %4 = add <4 x i32> %1, %3
   1465   ret <4 x i32> %4
   1466 }
   1467 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
   1468 
   1469 define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
   1470 ; GENERIC-LABEL: test_cvtpd2ps:
   1471 ; GENERIC:       # %bb.0:
   1472 ; GENERIC-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
   1473 ; GENERIC-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
   1474 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1475 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1476 ;
   1477 ; ATOM-LABEL: test_cvtpd2ps:
   1478 ; ATOM:       # %bb.0:
   1479 ; ATOM-NEXT:    cvtpd2ps (%rdi), %xmm1 # sched: [8:4.00]
   1480 ; ATOM-NEXT:    cvtpd2ps %xmm0, %xmm0 # sched: [7:3.50]
   1481 ; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
   1482 ; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   1483 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1484 ;
   1485 ; SLM-LABEL: test_cvtpd2ps:
   1486 ; SLM:       # %bb.0:
   1487 ; SLM-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:0.50]
   1488 ; SLM-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [7:1.00]
   1489 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1490 ; SLM-NEXT:    retq # sched: [4:1.00]
   1491 ;
   1492 ; SANDY-SSE-LABEL: test_cvtpd2ps:
   1493 ; SANDY-SSE:       # %bb.0:
   1494 ; SANDY-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
   1495 ; SANDY-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
   1496 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1497 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1498 ;
   1499 ; SANDY-LABEL: test_cvtpd2ps:
   1500 ; SANDY:       # %bb.0:
   1501 ; SANDY-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
   1502 ; SANDY-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00]
   1503 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1504 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1505 ;
   1506 ; HASWELL-SSE-LABEL: test_cvtpd2ps:
   1507 ; HASWELL-SSE:       # %bb.0:
   1508 ; HASWELL-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
   1509 ; HASWELL-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
   1510 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1511 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1512 ;
   1513 ; HASWELL-LABEL: test_cvtpd2ps:
   1514 ; HASWELL:       # %bb.0:
   1515 ; HASWELL-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
   1516 ; HASWELL-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
   1517 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1518 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1519 ;
   1520 ; BROADWELL-SSE-LABEL: test_cvtpd2ps:
   1521 ; BROADWELL-SSE:       # %bb.0:
   1522 ; BROADWELL-SSE-NEXT:    cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00]
   1523 ; BROADWELL-SSE-NEXT:    cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
   1524 ; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   1525 ; BROADWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
   1526 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1527 ;
   1528 ; BROADWELL-LABEL: test_cvtpd2ps:
   1529 ; BROADWELL:       # %bb.0:
   1530 ; BROADWELL-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
   1531 ; BROADWELL-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
   1532 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1533 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1534 ;
   1535 ; SKYLAKE-SSE-LABEL: test_cvtpd2ps:
   1536 ; SKYLAKE-SSE:       # %bb.0:
   1537 ; SKYLAKE-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
   1538 ; SKYLAKE-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
   1539 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   1540 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1541 ;
   1542 ; SKYLAKE-LABEL: test_cvtpd2ps:
   1543 ; SKYLAKE:       # %bb.0:
   1544 ; SKYLAKE-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
   1545 ; SKYLAKE-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
   1546 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1547 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1548 ;
   1549 ; SKX-SSE-LABEL: test_cvtpd2ps:
   1550 ; SKX-SSE:       # %bb.0:
   1551 ; SKX-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
   1552 ; SKX-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
   1553 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   1554 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1555 ;
   1556 ; SKX-LABEL: test_cvtpd2ps:
   1557 ; SKX:       # %bb.0:
   1558 ; SKX-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
   1559 ; SKX-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
   1560 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1561 ; SKX-NEXT:    retq # sched: [7:1.00]
   1562 ;
   1563 ; BTVER2-SSE-LABEL: test_cvtpd2ps:
   1564 ; BTVER2-SSE:       # %bb.0:
   1565 ; BTVER2-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00]
   1566 ; BTVER2-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00]
   1567 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1568 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1569 ;
   1570 ; BTVER2-LABEL: test_cvtpd2ps:
   1571 ; BTVER2:       # %bb.0:
   1572 ; BTVER2-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
   1573 ; BTVER2-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00]
   1574 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1575 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1576 ;
   1577 ; ZNVER1-SSE-LABEL: test_cvtpd2ps:
   1578 ; ZNVER1-SSE:       # %bb.0:
   1579 ; ZNVER1-SSE-NEXT:    cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
   1580 ; ZNVER1-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
   1581 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1582 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1583 ;
   1584 ; ZNVER1-LABEL: test_cvtpd2ps:
   1585 ; ZNVER1:       # %bb.0:
   1586 ; ZNVER1-NEXT:    vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00]
   1587 ; ZNVER1-NEXT:    vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
   1588 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1589 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1590   %1 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
   1591   %2 = load <2 x double>, <2 x double> *%a1, align 16
   1592   %3 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %2)
   1593   %4 = fadd <4 x float> %1, %3
   1594   ret <4 x float> %4
   1595 }
   1596 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
   1597 
   1598 define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
   1599 ; GENERIC-LABEL: test_cvtps2dq:
   1600 ; GENERIC:       # %bb.0:
   1601 ; GENERIC-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
   1602 ; GENERIC-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
   1603 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1604 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1605 ;
   1606 ; ATOM-LABEL: test_cvtps2dq:
   1607 ; ATOM:       # %bb.0:
   1608 ; ATOM-NEXT:    cvtps2dq (%rdi), %xmm1 # sched: [7:3.50]
   1609 ; ATOM-NEXT:    cvtps2dq %xmm0, %xmm0 # sched: [6:3.00]
   1610 ; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   1611 ; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   1612 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1613 ;
   1614 ; SLM-LABEL: test_cvtps2dq:
   1615 ; SLM:       # %bb.0:
   1616 ; SLM-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
   1617 ; SLM-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [7:1.00]
   1618 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1619 ; SLM-NEXT:    retq # sched: [4:1.00]
   1620 ;
   1621 ; SANDY-SSE-LABEL: test_cvtps2dq:
   1622 ; SANDY-SSE:       # %bb.0:
   1623 ; SANDY-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
   1624 ; SANDY-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
   1625 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1626 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1627 ;
   1628 ; SANDY-LABEL: test_cvtps2dq:
   1629 ; SANDY:       # %bb.0:
   1630 ; SANDY-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
   1631 ; SANDY-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
   1632 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1633 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1634 ;
   1635 ; HASWELL-SSE-LABEL: test_cvtps2dq:
   1636 ; HASWELL-SSE:       # %bb.0:
   1637 ; HASWELL-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
   1638 ; HASWELL-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
   1639 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1640 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1641 ;
   1642 ; HASWELL-LABEL: test_cvtps2dq:
   1643 ; HASWELL:       # %bb.0:
   1644 ; HASWELL-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
   1645 ; HASWELL-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
   1646 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1647 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1648 ;
   1649 ; BROADWELL-SSE-LABEL: test_cvtps2dq:
   1650 ; BROADWELL-SSE:       # %bb.0:
   1651 ; BROADWELL-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
   1652 ; BROADWELL-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
   1653 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1654 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1655 ;
   1656 ; BROADWELL-LABEL: test_cvtps2dq:
   1657 ; BROADWELL:       # %bb.0:
   1658 ; BROADWELL-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
   1659 ; BROADWELL-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
   1660 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1661 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1662 ;
   1663 ; SKYLAKE-SSE-LABEL: test_cvtps2dq:
   1664 ; SKYLAKE-SSE:       # %bb.0:
   1665 ; SKYLAKE-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
   1666 ; SKYLAKE-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
   1667 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   1668 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1669 ;
   1670 ; SKYLAKE-LABEL: test_cvtps2dq:
   1671 ; SKYLAKE:       # %bb.0:
   1672 ; SKYLAKE-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
   1673 ; SKYLAKE-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
   1674 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   1675 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1676 ;
   1677 ; SKX-SSE-LABEL: test_cvtps2dq:
   1678 ; SKX-SSE:       # %bb.0:
   1679 ; SKX-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
   1680 ; SKX-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
   1681 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   1682 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1683 ;
   1684 ; SKX-LABEL: test_cvtps2dq:
   1685 ; SKX:       # %bb.0:
   1686 ; SKX-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
   1687 ; SKX-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
   1688 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   1689 ; SKX-NEXT:    retq # sched: [7:1.00]
   1690 ;
   1691 ; BTVER2-SSE-LABEL: test_cvtps2dq:
   1692 ; BTVER2-SSE:       # %bb.0:
   1693 ; BTVER2-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
   1694 ; BTVER2-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
   1695 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   1696 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1697 ;
   1698 ; BTVER2-LABEL: test_cvtps2dq:
   1699 ; BTVER2:       # %bb.0:
   1700 ; BTVER2-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
   1701 ; BTVER2-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
   1702 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   1703 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1704 ;
   1705 ; ZNVER1-SSE-LABEL: test_cvtps2dq:
   1706 ; ZNVER1-SSE:       # %bb.0:
   1707 ; ZNVER1-SSE-NEXT:    cvtps2dq %xmm0, %xmm1 # sched: [5:1.00]
   1708 ; ZNVER1-SSE-NEXT:    cvtps2dq (%rdi), %xmm0 # sched: [12:1.00]
   1709 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   1710 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1711 ;
   1712 ; ZNVER1-LABEL: test_cvtps2dq:
   1713 ; ZNVER1:       # %bb.0:
   1714 ; ZNVER1-NEXT:    vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00]
   1715 ; ZNVER1-NEXT:    vcvtps2dq %xmm0, %xmm0 # sched: [5:1.00]
   1716 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   1717 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1718   %1 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
   1719   %2 = load <4 x float>, <4 x float> *%a1, align 16
   1720   %3 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %2)
   1721   %4 = add <4 x i32> %1, %3
   1722   ret <4 x i32> %4
   1723 }
   1724 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
   1725 
   1726 define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
   1727 ; GENERIC-LABEL: test_cvtps2pd:
   1728 ; GENERIC:       # %bb.0:
   1729 ; GENERIC-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
   1730 ; GENERIC-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
   1731 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1732 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1733 ;
   1734 ; ATOM-LABEL: test_cvtps2pd:
   1735 ; ATOM:       # %bb.0:
   1736 ; ATOM-NEXT:    cvtps2pd (%rdi), %xmm1 # sched: [8:4.00]
   1737 ; ATOM-NEXT:    cvtps2pd %xmm0, %xmm0 # sched: [7:3.50]
   1738 ; ATOM-NEXT:    addpd %xmm0, %xmm1 # sched: [6:3.00]
   1739 ; ATOM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
   1740 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1741 ;
   1742 ; SLM-LABEL: test_cvtps2pd:
   1743 ; SLM:       # %bb.0:
   1744 ; SLM-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [4:0.50]
   1745 ; SLM-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
   1746 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1747 ; SLM-NEXT:    retq # sched: [4:1.00]
   1748 ;
   1749 ; SANDY-SSE-LABEL: test_cvtps2pd:
   1750 ; SANDY-SSE:       # %bb.0:
   1751 ; SANDY-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
   1752 ; SANDY-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
   1753 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1754 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1755 ;
   1756 ; SANDY-LABEL: test_cvtps2pd:
   1757 ; SANDY:       # %bb.0:
   1758 ; SANDY-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
   1759 ; SANDY-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
   1760 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1761 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1762 ;
   1763 ; HASWELL-SSE-LABEL: test_cvtps2pd:
   1764 ; HASWELL-SSE:       # %bb.0:
   1765 ; HASWELL-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
   1766 ; HASWELL-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
   1767 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1768 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1769 ;
   1770 ; HASWELL-LABEL: test_cvtps2pd:
   1771 ; HASWELL:       # %bb.0:
   1772 ; HASWELL-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
   1773 ; HASWELL-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00]
   1774 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1775 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1776 ;
   1777 ; BROADWELL-SSE-LABEL: test_cvtps2pd:
   1778 ; BROADWELL-SSE:       # %bb.0:
   1779 ; BROADWELL-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
   1780 ; BROADWELL-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
   1781 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1782 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1783 ;
   1784 ; BROADWELL-LABEL: test_cvtps2pd:
   1785 ; BROADWELL:       # %bb.0:
   1786 ; BROADWELL-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
   1787 ; BROADWELL-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00]
   1788 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1789 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1790 ;
   1791 ; SKYLAKE-SSE-LABEL: test_cvtps2pd:
   1792 ; SKYLAKE-SSE:       # %bb.0:
   1793 ; SKYLAKE-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
   1794 ; SKYLAKE-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
   1795 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   1796 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1797 ;
   1798 ; SKYLAKE-LABEL: test_cvtps2pd:
   1799 ; SKYLAKE:       # %bb.0:
   1800 ; SKYLAKE-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
   1801 ; SKYLAKE-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
   1802 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1803 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1804 ;
   1805 ; SKX-SSE-LABEL: test_cvtps2pd:
   1806 ; SKX-SSE:       # %bb.0:
   1807 ; SKX-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
   1808 ; SKX-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
   1809 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   1810 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1811 ;
   1812 ; SKX-LABEL: test_cvtps2pd:
   1813 ; SKX:       # %bb.0:
   1814 ; SKX-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
   1815 ; SKX-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
   1816 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1817 ; SKX-NEXT:    retq # sched: [7:1.00]
   1818 ;
   1819 ; BTVER2-SSE-LABEL: test_cvtps2pd:
   1820 ; BTVER2-SSE:       # %bb.0:
   1821 ; BTVER2-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
   1822 ; BTVER2-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
   1823 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1824 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1825 ;
   1826 ; BTVER2-LABEL: test_cvtps2pd:
   1827 ; BTVER2:       # %bb.0:
   1828 ; BTVER2-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
   1829 ; BTVER2-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
   1830 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1831 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1832 ;
   1833 ; ZNVER1-SSE-LABEL: test_cvtps2pd:
   1834 ; ZNVER1-SSE:       # %bb.0:
   1835 ; ZNVER1-SSE-NEXT:    cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
   1836 ; ZNVER1-SSE-NEXT:    cvtps2pd (%rdi), %xmm0 # sched: [10:1.00]
   1837 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   1838 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1839 ;
   1840 ; ZNVER1-LABEL: test_cvtps2pd:
   1841 ; ZNVER1:       # %bb.0:
   1842 ; ZNVER1-NEXT:    vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00]
   1843 ; ZNVER1-NEXT:    vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
   1844 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1845 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1846   %1 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   1847   %2 = fpext <2 x float> %1 to <2 x double>
   1848   %3 = load <4 x float>, <4 x float> *%a1, align 16
   1849   %4 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   1850   %5 = fpext <2 x float> %4 to <2 x double>
   1851   %6 = fadd <2 x double> %2, %5
   1852   ret <2 x double> %6
   1853 }
   1854 
   1855 define i32 @test_cvtsd2si(double %a0, double *%a1) {
   1856 ; GENERIC-LABEL: test_cvtsd2si:
   1857 ; GENERIC:       # %bb.0:
   1858 ; GENERIC-NEXT:    cvtsd2si %xmm0, %ecx # sched: [5:1.00]
   1859 ; GENERIC-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
   1860 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   1861 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1862 ;
   1863 ; ATOM-LABEL: test_cvtsd2si:
   1864 ; ATOM:       # %bb.0:
   1865 ; ATOM-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:4.50]
   1866 ; ATOM-NEXT:    cvtsd2si %xmm0, %ecx # sched: [8:4.00]
   1867 ; ATOM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1868 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1869 ;
   1870 ; SLM-LABEL: test_cvtsd2si:
   1871 ; SLM:       # %bb.0:
   1872 ; SLM-NEXT:    cvtsd2si (%rdi), %eax # sched: [7:1.00]
   1873 ; SLM-NEXT:    cvtsd2si %xmm0, %ecx # sched: [4:0.50]
   1874 ; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1875 ; SLM-NEXT:    retq # sched: [4:1.00]
   1876 ;
   1877 ; SANDY-SSE-LABEL: test_cvtsd2si:
   1878 ; SANDY-SSE:       # %bb.0:
   1879 ; SANDY-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [5:1.00]
   1880 ; SANDY-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
   1881 ; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   1882 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1883 ;
   1884 ; SANDY-LABEL: test_cvtsd2si:
   1885 ; SANDY:       # %bb.0:
   1886 ; SANDY-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
   1887 ; SANDY-NEXT:    vcvtsd2si (%rdi), %eax # sched: [10:1.00]
   1888 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   1889 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1890 ;
   1891 ; HASWELL-SSE-LABEL: test_cvtsd2si:
   1892 ; HASWELL-SSE:       # %bb.0:
   1893 ; HASWELL-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [4:1.00]
   1894 ; HASWELL-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
   1895 ; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1896 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1897 ;
   1898 ; HASWELL-LABEL: test_cvtsd2si:
   1899 ; HASWELL:       # %bb.0:
   1900 ; HASWELL-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
   1901 ; HASWELL-NEXT:    vcvtsd2si (%rdi), %eax # sched: [9:1.00]
   1902 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1903 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1904 ;
   1905 ; BROADWELL-SSE-LABEL: test_cvtsd2si:
   1906 ; BROADWELL-SSE:       # %bb.0:
   1907 ; BROADWELL-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [9:1.00]
   1908 ; BROADWELL-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [4:1.00]
   1909 ; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1910 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1911 ;
   1912 ; BROADWELL-LABEL: test_cvtsd2si:
   1913 ; BROADWELL:       # %bb.0:
   1914 ; BROADWELL-NEXT:    vcvtsd2si (%rdi), %eax # sched: [9:1.00]
   1915 ; BROADWELL-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
   1916 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1917 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1918 ;
   1919 ; SKYLAKE-SSE-LABEL: test_cvtsd2si:
   1920 ; SKYLAKE-SSE:       # %bb.0:
   1921 ; SKYLAKE-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [6:1.00]
   1922 ; SKYLAKE-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [11:1.00]
   1923 ; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1924 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1925 ;
   1926 ; SKYLAKE-LABEL: test_cvtsd2si:
   1927 ; SKYLAKE:       # %bb.0:
   1928 ; SKYLAKE-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
   1929 ; SKYLAKE-NEXT:    vcvtsd2si (%rdi), %eax # sched: [11:1.00]
   1930 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1931 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1932 ;
   1933 ; SKX-SSE-LABEL: test_cvtsd2si:
   1934 ; SKX-SSE:       # %bb.0:
   1935 ; SKX-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [6:1.00]
   1936 ; SKX-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [11:1.00]
   1937 ; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1938 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1939 ;
   1940 ; SKX-LABEL: test_cvtsd2si:
   1941 ; SKX:       # %bb.0:
   1942 ; SKX-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
   1943 ; SKX-NEXT:    vcvtsd2si (%rdi), %eax # sched: [11:1.00]
   1944 ; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1945 ; SKX-NEXT:    retq # sched: [7:1.00]
   1946 ;
   1947 ; BTVER2-SSE-LABEL: test_cvtsd2si:
   1948 ; BTVER2-SSE:       # %bb.0:
   1949 ; BTVER2-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [12:1.00]
   1950 ; BTVER2-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [7:1.00]
   1951 ; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1952 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1953 ;
   1954 ; BTVER2-LABEL: test_cvtsd2si:
   1955 ; BTVER2:       # %bb.0:
   1956 ; BTVER2-NEXT:    vcvtsd2si (%rdi), %eax # sched: [12:1.00]
   1957 ; BTVER2-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [7:1.00]
   1958 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1959 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1960 ;
   1961 ; ZNVER1-SSE-LABEL: test_cvtsd2si:
   1962 ; ZNVER1-SSE:       # %bb.0:
   1963 ; ZNVER1-SSE-NEXT:    cvtsd2si (%rdi), %eax # sched: [12:1.00]
   1964 ; ZNVER1-SSE-NEXT:    cvtsd2si %xmm0, %ecx # sched: [5:1.00]
   1965 ; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1966 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1967 ;
   1968 ; ZNVER1-LABEL: test_cvtsd2si:
   1969 ; ZNVER1:       # %bb.0:
   1970 ; ZNVER1-NEXT:    vcvtsd2si (%rdi), %eax # sched: [12:1.00]
   1971 ; ZNVER1-NEXT:    vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
   1972 ; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1973 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1974   %1 = insertelement <2 x double> undef, double %a0, i32 0
   1975   %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %1)
   1976   %3 = load double, double *%a1, align 8
   1977   %4 = insertelement <2 x double> undef, double %3, i32 0
   1978   %5 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %4)
   1979   %6 = add i32 %2, %5
   1980   ret i32 %6
   1981 }
   1982 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
   1983 
   1984 define i64 @test_cvtsd2siq(double %a0, double *%a1) {
   1985 ; GENERIC-LABEL: test_cvtsd2siq:
   1986 ; GENERIC:       # %bb.0:
   1987 ; GENERIC-NEXT:    cvtsd2si %xmm0, %rcx # sched: [5:1.00]
   1988 ; GENERIC-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
   1989 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   1990 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1991 ;
   1992 ; ATOM-LABEL: test_cvtsd2siq:
   1993 ; ATOM:       # %bb.0:
   1994 ; ATOM-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:4.50]
   1995 ; ATOM-NEXT:    cvtsd2si %xmm0, %rcx # sched: [8:4.00]
   1996 ; ATOM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   1997 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1998 ;
   1999 ; SLM-LABEL: test_cvtsd2siq:
   2000 ; SLM:       # %bb.0:
   2001 ; SLM-NEXT:    cvtsd2si (%rdi), %rax # sched: [7:1.00]
   2002 ; SLM-NEXT:    cvtsd2si %xmm0, %rcx # sched: [4:0.50]
   2003 ; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   2004 ; SLM-NEXT:    retq # sched: [4:1.00]
   2005 ;
   2006 ; SANDY-SSE-LABEL: test_cvtsd2siq:
   2007 ; SANDY-SSE:       # %bb.0:
   2008 ; SANDY-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [5:1.00]
   2009 ; SANDY-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
   2010 ; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   2011 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2012 ;
   2013 ; SANDY-LABEL: test_cvtsd2siq:
   2014 ; SANDY:       # %bb.0:
   2015 ; SANDY-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
   2016 ; SANDY-NEXT:    vcvtsd2si (%rdi), %rax # sched: [10:1.00]
   2017 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   2018 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2019 ;
   2020 ; HASWELL-SSE-LABEL: test_cvtsd2siq:
   2021 ; HASWELL-SSE:       # %bb.0:
   2022 ; HASWELL-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [4:1.00]
   2023 ; HASWELL-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
   2024 ; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2025 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2026 ;
   2027 ; HASWELL-LABEL: test_cvtsd2siq:
   2028 ; HASWELL:       # %bb.0:
   2029 ; HASWELL-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
   2030 ; HASWELL-NEXT:    vcvtsd2si (%rdi), %rax # sched: [9:1.00]
   2031 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2032 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2033 ;
   2034 ; BROADWELL-SSE-LABEL: test_cvtsd2siq:
   2035 ; BROADWELL-SSE:       # %bb.0:
   2036 ; BROADWELL-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [9:1.00]
   2037 ; BROADWELL-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [4:1.00]
   2038 ; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2039 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2040 ;
   2041 ; BROADWELL-LABEL: test_cvtsd2siq:
   2042 ; BROADWELL:       # %bb.0:
   2043 ; BROADWELL-NEXT:    vcvtsd2si (%rdi), %rax # sched: [9:1.00]
   2044 ; BROADWELL-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
   2045 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2046 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2047 ;
   2048 ; SKYLAKE-SSE-LABEL: test_cvtsd2siq:
   2049 ; SKYLAKE-SSE:       # %bb.0:
   2050 ; SKYLAKE-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [6:1.00]
   2051 ; SKYLAKE-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [11:1.00]
   2052 ; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2053 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2054 ;
   2055 ; SKYLAKE-LABEL: test_cvtsd2siq:
   2056 ; SKYLAKE:       # %bb.0:
   2057 ; SKYLAKE-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
   2058 ; SKYLAKE-NEXT:    vcvtsd2si (%rdi), %rax # sched: [11:1.00]
   2059 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2060 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2061 ;
   2062 ; SKX-SSE-LABEL: test_cvtsd2siq:
   2063 ; SKX-SSE:       # %bb.0:
   2064 ; SKX-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [6:1.00]
   2065 ; SKX-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [11:1.00]
   2066 ; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2067 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2068 ;
   2069 ; SKX-LABEL: test_cvtsd2siq:
   2070 ; SKX:       # %bb.0:
   2071 ; SKX-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
   2072 ; SKX-NEXT:    vcvtsd2si (%rdi), %rax # sched: [11:1.00]
   2073 ; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2074 ; SKX-NEXT:    retq # sched: [7:1.00]
   2075 ;
   2076 ; BTVER2-SSE-LABEL: test_cvtsd2siq:
   2077 ; BTVER2-SSE:       # %bb.0:
   2078 ; BTVER2-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [12:1.00]
   2079 ; BTVER2-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [7:1.00]
   2080 ; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   2081 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2082 ;
   2083 ; BTVER2-LABEL: test_cvtsd2siq:
   2084 ; BTVER2:       # %bb.0:
   2085 ; BTVER2-NEXT:    vcvtsd2si (%rdi), %rax # sched: [12:1.00]
   2086 ; BTVER2-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [7:1.00]
   2087 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   2088 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2089 ;
   2090 ; ZNVER1-SSE-LABEL: test_cvtsd2siq:
   2091 ; ZNVER1-SSE:       # %bb.0:
   2092 ; ZNVER1-SSE-NEXT:    cvtsd2si (%rdi), %rax # sched: [12:1.00]
   2093 ; ZNVER1-SSE-NEXT:    cvtsd2si %xmm0, %rcx # sched: [5:1.00]
   2094 ; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2095 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2096 ;
   2097 ; ZNVER1-LABEL: test_cvtsd2siq:
   2098 ; ZNVER1:       # %bb.0:
   2099 ; ZNVER1-NEXT:    vcvtsd2si (%rdi), %rax # sched: [12:1.00]
   2100 ; ZNVER1-NEXT:    vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
   2101 ; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   2102 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2103   %1 = insertelement <2 x double> undef, double %a0, i32 0
   2104   %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %1)
   2105   %3 = load double, double *%a1, align 8
   2106   %4 = insertelement <2 x double> undef, double %3, i32 0
   2107   %5 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %4)
   2108   %6 = add i64 %2, %5
   2109   ret i64 %6
   2110 }
   2111 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
   2112 
   2113 define float @test_cvtsd2ss(double %a0, double *%a1) {
   2114 ; GENERIC-LABEL: test_cvtsd2ss:
   2115 ; GENERIC:       # %bb.0:
   2116 ; GENERIC-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
   2117 ; GENERIC-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
   2118 ; GENERIC-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
   2119 ; GENERIC-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   2120 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2121 ;
   2122 ; ATOM-LABEL: test_cvtsd2ss:
   2123 ; ATOM:       # %bb.0:
   2124 ; ATOM-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
   2125 ; ATOM-NEXT:    cvtsd2ss %xmm0, %xmm2 # sched: [6:3.00]
   2126 ; ATOM-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.50]
   2127 ; ATOM-NEXT:    cvtsd2ss %xmm1, %xmm0 # sched: [6:3.00]
   2128 ; ATOM-NEXT:    addss %xmm2, %xmm0 # sched: [5:5.00]
   2129 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2130 ;
   2131 ; SLM-LABEL: test_cvtsd2ss:
   2132 ; SLM:       # %bb.0:
   2133 ; SLM-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:0.50]
   2134 ; SLM-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00]
   2135 ; SLM-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:0.50]
   2136 ; SLM-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   2137 ; SLM-NEXT:    retq # sched: [4:1.00]
   2138 ;
   2139 ; SANDY-SSE-LABEL: test_cvtsd2ss:
   2140 ; SANDY-SSE:       # %bb.0:
   2141 ; SANDY-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
   2142 ; SANDY-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
   2143 ; SANDY-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
   2144 ; SANDY-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   2145 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2146 ;
   2147 ; SANDY-LABEL: test_cvtsd2ss:
   2148 ; SANDY:       # %bb.0:
   2149 ; SANDY-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   2150 ; SANDY-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
   2151 ; SANDY-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
   2152 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2153 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2154 ;
   2155 ; HASWELL-SSE-LABEL: test_cvtsd2ss:
   2156 ; HASWELL-SSE:       # %bb.0:
   2157 ; HASWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
   2158 ; HASWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   2159 ; HASWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
   2160 ; HASWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   2161 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2162 ;
   2163 ; HASWELL-LABEL: test_cvtsd2ss:
   2164 ; HASWELL:       # %bb.0:
   2165 ; HASWELL-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   2166 ; HASWELL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   2167 ; HASWELL-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
   2168 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2169 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2170 ;
   2171 ; BROADWELL-SSE-LABEL: test_cvtsd2ss:
   2172 ; BROADWELL-SSE:       # %bb.0:
   2173 ; BROADWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
   2174 ; BROADWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   2175 ; BROADWELL-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
   2176 ; BROADWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   2177 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2178 ;
   2179 ; BROADWELL-LABEL: test_cvtsd2ss:
   2180 ; BROADWELL:       # %bb.0:
   2181 ; BROADWELL-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   2182 ; BROADWELL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   2183 ; BROADWELL-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
   2184 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2185 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2186 ;
   2187 ; SKYLAKE-SSE-LABEL: test_cvtsd2ss:
   2188 ; SKYLAKE-SSE:       # %bb.0:
   2189 ; SKYLAKE-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
   2190 ; SKYLAKE-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   2191 ; SKYLAKE-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
   2192 ; SKYLAKE-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
   2193 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2194 ;
   2195 ; SKYLAKE-LABEL: test_cvtsd2ss:
   2196 ; SKYLAKE:       # %bb.0:
   2197 ; SKYLAKE-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   2198 ; SKYLAKE-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   2199 ; SKYLAKE-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   2200 ; SKYLAKE-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2201 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2202 ;
   2203 ; SKX-SSE-LABEL: test_cvtsd2ss:
   2204 ; SKX-SSE:       # %bb.0:
   2205 ; SKX-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
   2206 ; SKX-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   2207 ; SKX-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
   2208 ; SKX-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
   2209 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2210 ;
   2211 ; SKX-LABEL: test_cvtsd2ss:
   2212 ; SKX:       # %bb.0:
   2213 ; SKX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   2214 ; SKX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   2215 ; SKX-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   2216 ; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2217 ; SKX-NEXT:    retq # sched: [7:1.00]
   2218 ;
   2219 ; BTVER2-SSE-LABEL: test_cvtsd2ss:
   2220 ; BTVER2-SSE:       # %bb.0:
   2221 ; BTVER2-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [7:2.00]
   2222 ; BTVER2-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
   2223 ; BTVER2-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [7:2.00]
   2224 ; BTVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   2225 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2226 ;
   2227 ; BTVER2-LABEL: test_cvtsd2ss:
   2228 ; BTVER2:       # %bb.0:
   2229 ; BTVER2-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [7:2.00]
   2230 ; BTVER2-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
   2231 ; BTVER2-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [7:2.00]
   2232 ; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2233 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2234 ;
   2235 ; ZNVER1-SSE-LABEL: test_cvtsd2ss:
   2236 ; ZNVER1-SSE:       # %bb.0:
   2237 ; ZNVER1-SSE-NEXT:    cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
   2238 ; ZNVER1-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
   2239 ; ZNVER1-SSE-NEXT:    cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
   2240 ; ZNVER1-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   2241 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2242 ;
   2243 ; ZNVER1-LABEL: test_cvtsd2ss:
   2244 ; ZNVER1:       # %bb.0:
   2245 ; ZNVER1-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
   2246 ; ZNVER1-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   2247 ; ZNVER1-NEXT:    vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
   2248 ; ZNVER1-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2249 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2250   %1 = fptrunc double %a0 to float
   2251   %2 = load double, double *%a1, align 8
   2252   %3 = fptrunc double %2 to float
   2253   %4 = fadd float %1, %3
   2254   ret float %4
   2255 }
   2256 
   2257 define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
   2258 ; GENERIC-LABEL: test_cvtsi2sd:
   2259 ; GENERIC:       # %bb.0:
   2260 ; GENERIC-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
   2261 ; GENERIC-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
   2262 ; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2263 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2264 ;
   2265 ; ATOM-LABEL: test_cvtsi2sd:
   2266 ; ATOM:       # %bb.0:
   2267 ; ATOM-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [7:3.50]
   2268 ; ATOM-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [6:3.00]
   2269 ; ATOM-NEXT:    addsd %xmm1, %xmm0 # sched: [5:5.00]
   2270 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2271 ;
   2272 ; SLM-LABEL: test_cvtsi2sd:
   2273 ; SLM:       # %bb.0:
   2274 ; SLM-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [7:1.00]
   2275 ; SLM-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:0.50]
   2276 ; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2277 ; SLM-NEXT:    retq # sched: [4:1.00]
   2278 ;
   2279 ; SANDY-SSE-LABEL: test_cvtsi2sd:
   2280 ; SANDY-SSE:       # %bb.0:
   2281 ; SANDY-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
   2282 ; SANDY-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
   2283 ; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2284 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2285 ;
   2286 ; SANDY-LABEL: test_cvtsi2sd:
   2287 ; SANDY:       # %bb.0:
   2288 ; SANDY-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
   2289 ; SANDY-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2290 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2291 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2292 ;
   2293 ; HASWELL-SSE-LABEL: test_cvtsi2sd:
   2294 ; HASWELL-SSE:       # %bb.0:
   2295 ; HASWELL-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
   2296 ; HASWELL-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
   2297 ; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2298 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2299 ;
   2300 ; HASWELL-LABEL: test_cvtsi2sd:
   2301 ; HASWELL:       # %bb.0:
   2302 ; HASWELL-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
   2303 ; HASWELL-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2304 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2305 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2306 ;
   2307 ; BROADWELL-SSE-LABEL: test_cvtsi2sd:
   2308 ; BROADWELL-SSE:       # %bb.0:
   2309 ; BROADWELL-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
   2310 ; BROADWELL-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
   2311 ; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2312 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2313 ;
   2314 ; BROADWELL-LABEL: test_cvtsi2sd:
   2315 ; BROADWELL:       # %bb.0:
   2316 ; BROADWELL-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
   2317 ; BROADWELL-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2318 ; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2319 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2320 ;
   2321 ; SKYLAKE-SSE-LABEL: test_cvtsi2sd:
   2322 ; SKYLAKE-SSE:       # %bb.0:
   2323 ; SKYLAKE-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
   2324 ; SKYLAKE-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
   2325 ; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
   2326 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2327 ;
   2328 ; SKYLAKE-LABEL: test_cvtsi2sd:
   2329 ; SKYLAKE:       # %bb.0:
   2330 ; SKYLAKE-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
   2331 ; SKYLAKE-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2332 ; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2333 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2334 ;
   2335 ; SKX-SSE-LABEL: test_cvtsi2sd:
   2336 ; SKX-SSE:       # %bb.0:
   2337 ; SKX-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
   2338 ; SKX-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
   2339 ; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
   2340 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2341 ;
   2342 ; SKX-LABEL: test_cvtsi2sd:
   2343 ; SKX:       # %bb.0:
   2344 ; SKX-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
   2345 ; SKX-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2346 ; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2347 ; SKX-NEXT:    retq # sched: [7:1.00]
   2348 ;
   2349 ; BTVER2-SSE-LABEL: test_cvtsi2sd:
   2350 ; BTVER2-SSE:       # %bb.0:
   2351 ; BTVER2-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [14:1.00]
   2352 ; BTVER2-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [9:1.00]
   2353 ; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2354 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2355 ;
   2356 ; BTVER2-LABEL: test_cvtsi2sd:
   2357 ; BTVER2:       # %bb.0:
   2358 ; BTVER2-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [9:1.00]
   2359 ; BTVER2-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [14:1.00]
   2360 ; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2361 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2362 ;
   2363 ; ZNVER1-SSE-LABEL: test_cvtsi2sd:
   2364 ; ZNVER1-SSE:       # %bb.0:
   2365 ; ZNVER1-SSE-NEXT:    cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00]
   2366 ; ZNVER1-SSE-NEXT:    cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
   2367 ; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2368 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2369 ;
   2370 ; ZNVER1-LABEL: test_cvtsi2sd:
   2371 ; ZNVER1:       # %bb.0:
   2372 ; ZNVER1-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
   2373 ; ZNVER1-NEXT:    vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
   2374 ; ZNVER1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2375 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2376   %1 = sitofp i32 %a0 to double
   2377   %2 = load i32, i32 *%a1, align 8
   2378   %3 = sitofp i32 %2 to double
   2379   %4 = fadd double %1, %3
   2380   ret double %4
   2381 }
   2382 
   2383 define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
   2384 ; GENERIC-LABEL: test_cvtsi2sdq:
   2385 ; GENERIC:       # %bb.0:
   2386 ; GENERIC-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
   2387 ; GENERIC-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
   2388 ; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2389 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2390 ;
   2391 ; ATOM-LABEL: test_cvtsi2sdq:
   2392 ; ATOM:       # %bb.0:
   2393 ; ATOM-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [7:3.50]
   2394 ; ATOM-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [6:3.00]
   2395 ; ATOM-NEXT:    addsd %xmm1, %xmm0 # sched: [5:5.00]
   2396 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2397 ;
   2398 ; SLM-LABEL: test_cvtsi2sdq:
   2399 ; SLM:       # %bb.0:
   2400 ; SLM-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [7:1.00]
   2401 ; SLM-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:0.50]
   2402 ; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2403 ; SLM-NEXT:    retq # sched: [4:1.00]
   2404 ;
   2405 ; SANDY-SSE-LABEL: test_cvtsi2sdq:
   2406 ; SANDY-SSE:       # %bb.0:
   2407 ; SANDY-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
   2408 ; SANDY-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
   2409 ; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2410 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2411 ;
   2412 ; SANDY-LABEL: test_cvtsi2sdq:
   2413 ; SANDY:       # %bb.0:
   2414 ; SANDY-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
   2415 ; SANDY-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2416 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2417 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2418 ;
   2419 ; HASWELL-SSE-LABEL: test_cvtsi2sdq:
   2420 ; HASWELL-SSE:       # %bb.0:
   2421 ; HASWELL-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
   2422 ; HASWELL-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
   2423 ; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2424 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2425 ;
   2426 ; HASWELL-LABEL: test_cvtsi2sdq:
   2427 ; HASWELL:       # %bb.0:
   2428 ; HASWELL-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
   2429 ; HASWELL-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2430 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2431 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2432 ;
   2433 ; BROADWELL-SSE-LABEL: test_cvtsi2sdq:
   2434 ; BROADWELL-SSE:       # %bb.0:
   2435 ; BROADWELL-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
   2436 ; BROADWELL-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
   2437 ; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2438 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2439 ;
   2440 ; BROADWELL-LABEL: test_cvtsi2sdq:
   2441 ; BROADWELL:       # %bb.0:
   2442 ; BROADWELL-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
   2443 ; BROADWELL-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2444 ; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2445 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2446 ;
   2447 ; SKYLAKE-SSE-LABEL: test_cvtsi2sdq:
   2448 ; SKYLAKE-SSE:       # %bb.0:
   2449 ; SKYLAKE-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
   2450 ; SKYLAKE-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
   2451 ; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
   2452 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2453 ;
   2454 ; SKYLAKE-LABEL: test_cvtsi2sdq:
   2455 ; SKYLAKE:       # %bb.0:
   2456 ; SKYLAKE-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
   2457 ; SKYLAKE-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2458 ; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2459 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2460 ;
   2461 ; SKX-SSE-LABEL: test_cvtsi2sdq:
   2462 ; SKX-SSE:       # %bb.0:
   2463 ; SKX-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
   2464 ; SKX-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
   2465 ; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
   2466 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2467 ;
   2468 ; SKX-LABEL: test_cvtsi2sdq:
   2469 ; SKX:       # %bb.0:
   2470 ; SKX-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
   2471 ; SKX-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   2472 ; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2473 ; SKX-NEXT:    retq # sched: [7:1.00]
   2474 ;
   2475 ; BTVER2-SSE-LABEL: test_cvtsi2sdq:
   2476 ; BTVER2-SSE:       # %bb.0:
   2477 ; BTVER2-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [14:1.00]
   2478 ; BTVER2-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [9:1.00]
   2479 ; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2480 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2481 ;
   2482 ; BTVER2-LABEL: test_cvtsi2sdq:
   2483 ; BTVER2:       # %bb.0:
   2484 ; BTVER2-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [9:1.00]
   2485 ; BTVER2-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [14:1.00]
   2486 ; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2487 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2488 ;
   2489 ; ZNVER1-SSE-LABEL: test_cvtsi2sdq:
   2490 ; ZNVER1-SSE:       # %bb.0:
   2491 ; ZNVER1-SSE-NEXT:    cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00]
   2492 ; ZNVER1-SSE-NEXT:    cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
   2493 ; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2494 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2495 ;
   2496 ; ZNVER1-LABEL: test_cvtsi2sdq:
   2497 ; ZNVER1:       # %bb.0:
   2498 ; ZNVER1-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
   2499 ; ZNVER1-NEXT:    vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
   2500 ; ZNVER1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2501 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2502   %1 = sitofp i64 %a0 to double
   2503   %2 = load i64, i64 *%a1, align 8
   2504   %3 = sitofp i64 %2 to double
   2505   %4 = fadd double %1, %3
   2506   ret double %4
   2507 }
   2508 
   2509 ; TODO - cvtss2sd_m
   2510 
   2511 define double @test_cvtss2sd(float %a0, float *%a1) {
   2512 ; GENERIC-LABEL: test_cvtss2sd:
   2513 ; GENERIC:       # %bb.0:
   2514 ; GENERIC-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
   2515 ; GENERIC-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   2516 ; GENERIC-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
   2517 ; GENERIC-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2518 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2519 ;
   2520 ; ATOM-LABEL: test_cvtss2sd:
   2521 ; ATOM:       # %bb.0:
   2522 ; ATOM-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
   2523 ; ATOM-NEXT:    cvtss2sd %xmm0, %xmm2 # sched: [6:3.00]
   2524 ; ATOM-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.50]
   2525 ; ATOM-NEXT:    cvtss2sd %xmm1, %xmm0 # sched: [6:3.00]
   2526 ; ATOM-NEXT:    addsd %xmm2, %xmm0 # sched: [5:5.00]
   2527 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2528 ;
   2529 ; SLM-LABEL: test_cvtss2sd:
   2530 ; SLM:       # %bb.0:
   2531 ; SLM-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [4:0.50]
   2532 ; SLM-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
   2533 ; SLM-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [4:0.50]
   2534 ; SLM-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2535 ; SLM-NEXT:    retq # sched: [4:1.00]
   2536 ;
   2537 ; SANDY-SSE-LABEL: test_cvtss2sd:
   2538 ; SANDY-SSE:       # %bb.0:
   2539 ; SANDY-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
   2540 ; SANDY-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   2541 ; SANDY-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
   2542 ; SANDY-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2543 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2544 ;
   2545 ; SANDY-LABEL: test_cvtss2sd:
   2546 ; SANDY:       # %bb.0:
   2547 ; SANDY-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
   2548 ; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
   2549 ; SANDY-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
   2550 ; SANDY-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2551 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2552 ;
   2553 ; HASWELL-SSE-LABEL: test_cvtss2sd:
   2554 ; HASWELL-SSE:       # %bb.0:
   2555 ; HASWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
   2556 ; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   2557 ; HASWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
   2558 ; HASWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2559 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2560 ;
   2561 ; HASWELL-LABEL: test_cvtss2sd:
   2562 ; HASWELL:       # %bb.0:
   2563 ; HASWELL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
   2564 ; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   2565 ; HASWELL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
   2566 ; HASWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2567 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2568 ;
   2569 ; BROADWELL-SSE-LABEL: test_cvtss2sd:
   2570 ; BROADWELL-SSE:       # %bb.0:
   2571 ; BROADWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
   2572 ; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   2573 ; BROADWELL-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
   2574 ; BROADWELL-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2575 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2576 ;
   2577 ; BROADWELL-LABEL: test_cvtss2sd:
   2578 ; BROADWELL:       # %bb.0:
   2579 ; BROADWELL-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
   2580 ; BROADWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   2581 ; BROADWELL-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
   2582 ; BROADWELL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2583 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2584 ;
   2585 ; SKYLAKE-SSE-LABEL: test_cvtss2sd:
   2586 ; SKYLAKE-SSE:       # %bb.0:
   2587 ; SKYLAKE-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
   2588 ; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   2589 ; SKYLAKE-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
   2590 ; SKYLAKE-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
   2591 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2592 ;
   2593 ; SKYLAKE-LABEL: test_cvtss2sd:
   2594 ; SKYLAKE:       # %bb.0:
   2595 ; SKYLAKE-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   2596 ; SKYLAKE-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   2597 ; SKYLAKE-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   2598 ; SKYLAKE-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2599 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2600 ;
   2601 ; SKX-SSE-LABEL: test_cvtss2sd:
   2602 ; SKX-SSE:       # %bb.0:
   2603 ; SKX-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
   2604 ; SKX-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   2605 ; SKX-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
   2606 ; SKX-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [4:0.50]
   2607 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2608 ;
   2609 ; SKX-LABEL: test_cvtss2sd:
   2610 ; SKX:       # %bb.0:
   2611 ; SKX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   2612 ; SKX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   2613 ; SKX-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   2614 ; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2615 ; SKX-NEXT:    retq # sched: [7:1.00]
   2616 ;
   2617 ; BTVER2-SSE-LABEL: test_cvtss2sd:
   2618 ; BTVER2-SSE:       # %bb.0:
   2619 ; BTVER2-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [7:2.00]
   2620 ; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
   2621 ; BTVER2-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [7:2.00]
   2622 ; BTVER2-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2623 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2624 ;
   2625 ; BTVER2-LABEL: test_cvtss2sd:
   2626 ; BTVER2:       # %bb.0:
   2627 ; BTVER2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [7:2.00]
   2628 ; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
   2629 ; BTVER2-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [7:2.00]
   2630 ; BTVER2-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2631 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2632 ;
   2633 ; ZNVER1-SSE-LABEL: test_cvtss2sd:
   2634 ; ZNVER1-SSE:       # %bb.0:
   2635 ; ZNVER1-SSE-NEXT:    cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
   2636 ; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
   2637 ; ZNVER1-SSE-NEXT:    cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
   2638 ; ZNVER1-SSE-NEXT:    addsd %xmm1, %xmm0 # sched: [3:1.00]
   2639 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2640 ;
   2641 ; ZNVER1-LABEL: test_cvtss2sd:
   2642 ; ZNVER1:       # %bb.0:
   2643 ; ZNVER1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
   2644 ; ZNVER1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   2645 ; ZNVER1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
   2646 ; ZNVER1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2647 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2648   %1 = fpext float %a0 to double
   2649   %2 = load float, float *%a1, align 4
   2650   %3 = fpext float %2 to double
   2651   %4 = fadd double %1, %3
   2652   ret double %4
   2653 }
   2654 
   2655 define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
   2656 ; GENERIC-LABEL: test_cvttpd2dq:
   2657 ; GENERIC:       # %bb.0:
   2658 ; GENERIC-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
   2659 ; GENERIC-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
   2660 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2661 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2662 ;
   2663 ; ATOM-LABEL: test_cvttpd2dq:
   2664 ; ATOM:       # %bb.0:
   2665 ; ATOM-NEXT:    cvttpd2dq (%rdi), %xmm1 # sched: [8:4.00]
   2666 ; ATOM-NEXT:    cvttpd2dq %xmm0, %xmm0 # sched: [7:3.50]
   2667 ; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   2668 ; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   2669 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2670 ;
   2671 ; SLM-LABEL: test_cvttpd2dq:
   2672 ; SLM:       # %bb.0:
   2673 ; SLM-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:0.50]
   2674 ; SLM-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [7:1.00]
   2675 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2676 ; SLM-NEXT:    retq # sched: [4:1.00]
   2677 ;
   2678 ; SANDY-SSE-LABEL: test_cvttpd2dq:
   2679 ; SANDY-SSE:       # %bb.0:
   2680 ; SANDY-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
   2681 ; SANDY-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
   2682 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2683 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2684 ;
   2685 ; SANDY-LABEL: test_cvttpd2dq:
   2686 ; SANDY:       # %bb.0:
   2687 ; SANDY-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
   2688 ; SANDY-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
   2689 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2690 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2691 ;
   2692 ; HASWELL-SSE-LABEL: test_cvttpd2dq:
   2693 ; HASWELL-SSE:       # %bb.0:
   2694 ; HASWELL-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
   2695 ; HASWELL-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
   2696 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2697 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2698 ;
   2699 ; HASWELL-LABEL: test_cvttpd2dq:
   2700 ; HASWELL:       # %bb.0:
   2701 ; HASWELL-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
   2702 ; HASWELL-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
   2703 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2704 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2705 ;
   2706 ; BROADWELL-SSE-LABEL: test_cvttpd2dq:
   2707 ; BROADWELL-SSE:       # %bb.0:
   2708 ; BROADWELL-SSE-NEXT:    cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00]
   2709 ; BROADWELL-SSE-NEXT:    cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
   2710 ; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   2711 ; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
   2712 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2713 ;
   2714 ; BROADWELL-LABEL: test_cvttpd2dq:
   2715 ; BROADWELL:       # %bb.0:
   2716 ; BROADWELL-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
   2717 ; BROADWELL-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
   2718 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2719 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2720 ;
   2721 ; SKYLAKE-SSE-LABEL: test_cvttpd2dq:
   2722 ; SKYLAKE-SSE:       # %bb.0:
   2723 ; SKYLAKE-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
   2724 ; SKYLAKE-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
   2725 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   2726 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2727 ;
   2728 ; SKYLAKE-LABEL: test_cvttpd2dq:
   2729 ; SKYLAKE:       # %bb.0:
   2730 ; SKYLAKE-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
   2731 ; SKYLAKE-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
   2732 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   2733 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2734 ;
   2735 ; SKX-SSE-LABEL: test_cvttpd2dq:
   2736 ; SKX-SSE:       # %bb.0:
   2737 ; SKX-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
   2738 ; SKX-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
   2739 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   2740 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2741 ;
   2742 ; SKX-LABEL: test_cvttpd2dq:
   2743 ; SKX:       # %bb.0:
   2744 ; SKX-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
   2745 ; SKX-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
   2746 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   2747 ; SKX-NEXT:    retq # sched: [7:1.00]
   2748 ;
   2749 ; BTVER2-SSE-LABEL: test_cvttpd2dq:
   2750 ; BTVER2-SSE:       # %bb.0:
   2751 ; BTVER2-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00]
   2752 ; BTVER2-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00]
   2753 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2754 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2755 ;
   2756 ; BTVER2-LABEL: test_cvttpd2dq:
   2757 ; BTVER2:       # %bb.0:
   2758 ; BTVER2-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
   2759 ; BTVER2-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00]
   2760 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2761 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2762 ;
   2763 ; ZNVER1-SSE-LABEL: test_cvttpd2dq:
   2764 ; ZNVER1-SSE:       # %bb.0:
   2765 ; ZNVER1-SSE-NEXT:    cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
   2766 ; ZNVER1-SSE-NEXT:    cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00]
   2767 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   2768 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2769 ;
   2770 ; ZNVER1-LABEL: test_cvttpd2dq:
   2771 ; ZNVER1:       # %bb.0:
   2772 ; ZNVER1-NEXT:    vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
   2773 ; ZNVER1-NEXT:    vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
   2774 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   2775 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2776   %1 = fptosi <2 x double> %a0 to <2 x i32>
   2777   %2 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2778   %3 = load <2 x double>, <2 x double> *%a1, align 16
   2779   %4 = fptosi <2 x double> %3 to <2 x i32>
   2780   %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2781   %6 = add <4 x i32> %2, %5
   2782   ret <4 x i32> %6
   2783 }
   2784 
   2785 define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
   2786 ; GENERIC-LABEL: test_cvttps2dq:
   2787 ; GENERIC:       # %bb.0:
   2788 ; GENERIC-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
   2789 ; GENERIC-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
   2790 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2791 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2792 ;
   2793 ; ATOM-LABEL: test_cvttps2dq:
   2794 ; ATOM:       # %bb.0:
   2795 ; ATOM-NEXT:    cvttps2dq (%rdi), %xmm1 # sched: [7:3.50]
   2796 ; ATOM-NEXT:    cvttps2dq %xmm0, %xmm0 # sched: [6:3.00]
   2797 ; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   2798 ; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   2799 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2800 ;
   2801 ; SLM-LABEL: test_cvttps2dq:
   2802 ; SLM:       # %bb.0:
   2803 ; SLM-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
   2804 ; SLM-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [7:1.00]
   2805 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2806 ; SLM-NEXT:    retq # sched: [4:1.00]
   2807 ;
   2808 ; SANDY-SSE-LABEL: test_cvttps2dq:
   2809 ; SANDY-SSE:       # %bb.0:
   2810 ; SANDY-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
   2811 ; SANDY-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
   2812 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2813 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2814 ;
   2815 ; SANDY-LABEL: test_cvttps2dq:
   2816 ; SANDY:       # %bb.0:
   2817 ; SANDY-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
   2818 ; SANDY-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
   2819 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2820 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2821 ;
   2822 ; HASWELL-SSE-LABEL: test_cvttps2dq:
   2823 ; HASWELL-SSE:       # %bb.0:
   2824 ; HASWELL-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
   2825 ; HASWELL-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
   2826 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2827 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2828 ;
   2829 ; HASWELL-LABEL: test_cvttps2dq:
   2830 ; HASWELL:       # %bb.0:
   2831 ; HASWELL-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
   2832 ; HASWELL-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
   2833 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2834 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2835 ;
   2836 ; BROADWELL-SSE-LABEL: test_cvttps2dq:
   2837 ; BROADWELL-SSE:       # %bb.0:
   2838 ; BROADWELL-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
   2839 ; BROADWELL-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
   2840 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2841 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2842 ;
   2843 ; BROADWELL-LABEL: test_cvttps2dq:
   2844 ; BROADWELL:       # %bb.0:
   2845 ; BROADWELL-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
   2846 ; BROADWELL-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
   2847 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2848 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2849 ;
   2850 ; SKYLAKE-SSE-LABEL: test_cvttps2dq:
   2851 ; SKYLAKE-SSE:       # %bb.0:
   2852 ; SKYLAKE-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
   2853 ; SKYLAKE-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
   2854 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   2855 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2856 ;
   2857 ; SKYLAKE-LABEL: test_cvttps2dq:
   2858 ; SKYLAKE:       # %bb.0:
   2859 ; SKYLAKE-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
   2860 ; SKYLAKE-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
   2861 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   2862 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2863 ;
   2864 ; SKX-SSE-LABEL: test_cvttps2dq:
   2865 ; SKX-SSE:       # %bb.0:
   2866 ; SKX-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
   2867 ; SKX-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
   2868 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   2869 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2870 ;
   2871 ; SKX-LABEL: test_cvttps2dq:
   2872 ; SKX:       # %bb.0:
   2873 ; SKX-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
   2874 ; SKX-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
   2875 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   2876 ; SKX-NEXT:    retq # sched: [7:1.00]
   2877 ;
   2878 ; BTVER2-SSE-LABEL: test_cvttps2dq:
   2879 ; BTVER2-SSE:       # %bb.0:
   2880 ; BTVER2-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
   2881 ; BTVER2-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
   2882 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   2883 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2884 ;
   2885 ; BTVER2-LABEL: test_cvttps2dq:
   2886 ; BTVER2:       # %bb.0:
   2887 ; BTVER2-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
   2888 ; BTVER2-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
   2889 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   2890 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2891 ;
   2892 ; ZNVER1-SSE-LABEL: test_cvttps2dq:
   2893 ; ZNVER1-SSE:       # %bb.0:
   2894 ; ZNVER1-SSE-NEXT:    cvttps2dq %xmm0, %xmm1 # sched: [5:1.00]
   2895 ; ZNVER1-SSE-NEXT:    cvttps2dq (%rdi), %xmm0 # sched: [12:1.00]
   2896 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   2897 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2898 ;
   2899 ; ZNVER1-LABEL: test_cvttps2dq:
   2900 ; ZNVER1:       # %bb.0:
   2901 ; ZNVER1-NEXT:    vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00]
   2902 ; ZNVER1-NEXT:    vcvttps2dq %xmm0, %xmm0 # sched: [5:1.00]
   2903 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   2904 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2905   %1 = fptosi <4 x float> %a0 to <4 x i32>
   2906   %2 = load <4 x float>, <4 x float> *%a1, align 16
   2907   %3 = fptosi <4 x float> %2 to <4 x i32>
   2908   %4 = add <4 x i32> %1, %3
   2909   ret <4 x i32> %4
   2910 }
   2911 
   2912 define i32 @test_cvttsd2si(double %a0, double *%a1) {
   2913 ; GENERIC-LABEL: test_cvttsd2si:
   2914 ; GENERIC:       # %bb.0:
   2915 ; GENERIC-NEXT:    cvttsd2si %xmm0, %ecx # sched: [5:1.00]
   2916 ; GENERIC-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
   2917 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   2918 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2919 ;
   2920 ; ATOM-LABEL: test_cvttsd2si:
   2921 ; ATOM:       # %bb.0:
   2922 ; ATOM-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:4.50]
   2923 ; ATOM-NEXT:    cvttsd2si %xmm0, %ecx # sched: [8:4.00]
   2924 ; ATOM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   2925 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2926 ;
   2927 ; SLM-LABEL: test_cvttsd2si:
   2928 ; SLM:       # %bb.0:
   2929 ; SLM-NEXT:    cvttsd2si (%rdi), %eax # sched: [7:1.00]
   2930 ; SLM-NEXT:    cvttsd2si %xmm0, %ecx # sched: [4:0.50]
   2931 ; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   2932 ; SLM-NEXT:    retq # sched: [4:1.00]
   2933 ;
   2934 ; SANDY-SSE-LABEL: test_cvttsd2si:
   2935 ; SANDY-SSE:       # %bb.0:
   2936 ; SANDY-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [5:1.00]
   2937 ; SANDY-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
   2938 ; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   2939 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2940 ;
   2941 ; SANDY-LABEL: test_cvttsd2si:
   2942 ; SANDY:       # %bb.0:
   2943 ; SANDY-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
   2944 ; SANDY-NEXT:    vcvttsd2si (%rdi), %eax # sched: [10:1.00]
   2945 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   2946 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2947 ;
   2948 ; HASWELL-SSE-LABEL: test_cvttsd2si:
   2949 ; HASWELL-SSE:       # %bb.0:
   2950 ; HASWELL-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [4:1.00]
   2951 ; HASWELL-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
   2952 ; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   2953 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2954 ;
   2955 ; HASWELL-LABEL: test_cvttsd2si:
   2956 ; HASWELL:       # %bb.0:
   2957 ; HASWELL-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
   2958 ; HASWELL-NEXT:    vcvttsd2si (%rdi), %eax # sched: [9:1.00]
   2959 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   2960 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2961 ;
   2962 ; BROADWELL-SSE-LABEL: test_cvttsd2si:
   2963 ; BROADWELL-SSE:       # %bb.0:
   2964 ; BROADWELL-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [9:1.00]
   2965 ; BROADWELL-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [4:1.00]
   2966 ; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   2967 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2968 ;
   2969 ; BROADWELL-LABEL: test_cvttsd2si:
   2970 ; BROADWELL:       # %bb.0:
   2971 ; BROADWELL-NEXT:    vcvttsd2si (%rdi), %eax # sched: [9:1.00]
   2972 ; BROADWELL-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
   2973 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   2974 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2975 ;
   2976 ; SKYLAKE-SSE-LABEL: test_cvttsd2si:
   2977 ; SKYLAKE-SSE:       # %bb.0:
   2978 ; SKYLAKE-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [6:1.00]
   2979 ; SKYLAKE-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [11:1.00]
   2980 ; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   2981 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2982 ;
   2983 ; SKYLAKE-LABEL: test_cvttsd2si:
   2984 ; SKYLAKE:       # %bb.0:
   2985 ; SKYLAKE-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
   2986 ; SKYLAKE-NEXT:    vcvttsd2si (%rdi), %eax # sched: [11:1.00]
   2987 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   2988 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2989 ;
   2990 ; SKX-SSE-LABEL: test_cvttsd2si:
   2991 ; SKX-SSE:       # %bb.0:
   2992 ; SKX-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [6:1.00]
   2993 ; SKX-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [11:1.00]
   2994 ; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   2995 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2996 ;
   2997 ; SKX-LABEL: test_cvttsd2si:
   2998 ; SKX:       # %bb.0:
   2999 ; SKX-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
   3000 ; SKX-NEXT:    vcvttsd2si (%rdi), %eax # sched: [11:1.00]
   3001 ; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   3002 ; SKX-NEXT:    retq # sched: [7:1.00]
   3003 ;
   3004 ; BTVER2-SSE-LABEL: test_cvttsd2si:
   3005 ; BTVER2-SSE:       # %bb.0:
   3006 ; BTVER2-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [12:1.00]
   3007 ; BTVER2-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [7:1.00]
   3008 ; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   3009 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3010 ;
   3011 ; BTVER2-LABEL: test_cvttsd2si:
   3012 ; BTVER2:       # %bb.0:
   3013 ; BTVER2-NEXT:    vcvttsd2si (%rdi), %eax # sched: [12:1.00]
   3014 ; BTVER2-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [7:1.00]
   3015 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   3016 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3017 ;
   3018 ; ZNVER1-SSE-LABEL: test_cvttsd2si:
   3019 ; ZNVER1-SSE:       # %bb.0:
   3020 ; ZNVER1-SSE-NEXT:    cvttsd2si (%rdi), %eax # sched: [12:1.00]
   3021 ; ZNVER1-SSE-NEXT:    cvttsd2si %xmm0, %ecx # sched: [5:1.00]
   3022 ; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   3023 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3024 ;
   3025 ; ZNVER1-LABEL: test_cvttsd2si:
   3026 ; ZNVER1:       # %bb.0:
   3027 ; ZNVER1-NEXT:    vcvttsd2si (%rdi), %eax # sched: [12:1.00]
   3028 ; ZNVER1-NEXT:    vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
   3029 ; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   3030 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3031   %1 = fptosi double %a0 to i32
   3032   %2 = load double, double *%a1, align 8
   3033   %3 = fptosi double %2 to i32
   3034   %4 = add i32 %1, %3
   3035   ret i32 %4
   3036 }
   3037 
   3038 define i64 @test_cvttsd2siq(double %a0, double *%a1) {
   3039 ; GENERIC-LABEL: test_cvttsd2siq:
   3040 ; GENERIC:       # %bb.0:
   3041 ; GENERIC-NEXT:    cvttsd2si %xmm0, %rcx # sched: [5:1.00]
   3042 ; GENERIC-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
   3043 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   3044 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3045 ;
   3046 ; ATOM-LABEL: test_cvttsd2siq:
   3047 ; ATOM:       # %bb.0:
   3048 ; ATOM-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:4.50]
   3049 ; ATOM-NEXT:    cvttsd2si %xmm0, %rcx # sched: [8:4.00]
   3050 ; ATOM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   3051 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3052 ;
   3053 ; SLM-LABEL: test_cvttsd2siq:
   3054 ; SLM:       # %bb.0:
   3055 ; SLM-NEXT:    cvttsd2si (%rdi), %rax # sched: [7:1.00]
   3056 ; SLM-NEXT:    cvttsd2si %xmm0, %rcx # sched: [4:0.50]
   3057 ; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   3058 ; SLM-NEXT:    retq # sched: [4:1.00]
   3059 ;
   3060 ; SANDY-SSE-LABEL: test_cvttsd2siq:
   3061 ; SANDY-SSE:       # %bb.0:
   3062 ; SANDY-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [5:1.00]
   3063 ; SANDY-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
   3064 ; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   3065 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3066 ;
   3067 ; SANDY-LABEL: test_cvttsd2siq:
   3068 ; SANDY:       # %bb.0:
   3069 ; SANDY-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
   3070 ; SANDY-NEXT:    vcvttsd2si (%rdi), %rax # sched: [10:1.00]
   3071 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   3072 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3073 ;
   3074 ; HASWELL-SSE-LABEL: test_cvttsd2siq:
   3075 ; HASWELL-SSE:       # %bb.0:
   3076 ; HASWELL-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [4:1.00]
   3077 ; HASWELL-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
   3078 ; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3079 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3080 ;
   3081 ; HASWELL-LABEL: test_cvttsd2siq:
   3082 ; HASWELL:       # %bb.0:
   3083 ; HASWELL-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
   3084 ; HASWELL-NEXT:    vcvttsd2si (%rdi), %rax # sched: [9:1.00]
   3085 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3086 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3087 ;
   3088 ; BROADWELL-SSE-LABEL: test_cvttsd2siq:
   3089 ; BROADWELL-SSE:       # %bb.0:
   3090 ; BROADWELL-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [9:1.00]
   3091 ; BROADWELL-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [4:1.00]
   3092 ; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3093 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3094 ;
   3095 ; BROADWELL-LABEL: test_cvttsd2siq:
   3096 ; BROADWELL:       # %bb.0:
   3097 ; BROADWELL-NEXT:    vcvttsd2si (%rdi), %rax # sched: [9:1.00]
   3098 ; BROADWELL-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
   3099 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3100 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3101 ;
   3102 ; SKYLAKE-SSE-LABEL: test_cvttsd2siq:
   3103 ; SKYLAKE-SSE:       # %bb.0:
   3104 ; SKYLAKE-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [6:1.00]
   3105 ; SKYLAKE-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [11:1.00]
   3106 ; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3107 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3108 ;
   3109 ; SKYLAKE-LABEL: test_cvttsd2siq:
   3110 ; SKYLAKE:       # %bb.0:
   3111 ; SKYLAKE-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
   3112 ; SKYLAKE-NEXT:    vcvttsd2si (%rdi), %rax # sched: [11:1.00]
   3113 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3114 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3115 ;
   3116 ; SKX-SSE-LABEL: test_cvttsd2siq:
   3117 ; SKX-SSE:       # %bb.0:
   3118 ; SKX-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [6:1.00]
   3119 ; SKX-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [11:1.00]
   3120 ; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3121 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3122 ;
   3123 ; SKX-LABEL: test_cvttsd2siq:
   3124 ; SKX:       # %bb.0:
   3125 ; SKX-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
   3126 ; SKX-NEXT:    vcvttsd2si (%rdi), %rax # sched: [11:1.00]
   3127 ; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3128 ; SKX-NEXT:    retq # sched: [7:1.00]
   3129 ;
   3130 ; BTVER2-SSE-LABEL: test_cvttsd2siq:
   3131 ; BTVER2-SSE:       # %bb.0:
   3132 ; BTVER2-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [12:1.00]
   3133 ; BTVER2-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [7:1.00]
   3134 ; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   3135 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3136 ;
   3137 ; BTVER2-LABEL: test_cvttsd2siq:
   3138 ; BTVER2:       # %bb.0:
   3139 ; BTVER2-NEXT:    vcvttsd2si (%rdi), %rax # sched: [12:1.00]
   3140 ; BTVER2-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [7:1.00]
   3141 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   3142 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3143 ;
   3144 ; ZNVER1-SSE-LABEL: test_cvttsd2siq:
   3145 ; ZNVER1-SSE:       # %bb.0:
   3146 ; ZNVER1-SSE-NEXT:    cvttsd2si (%rdi), %rax # sched: [12:1.00]
   3147 ; ZNVER1-SSE-NEXT:    cvttsd2si %xmm0, %rcx # sched: [5:1.00]
   3148 ; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3149 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3150 ;
   3151 ; ZNVER1-LABEL: test_cvttsd2siq:
   3152 ; ZNVER1:       # %bb.0:
   3153 ; ZNVER1-NEXT:    vcvttsd2si (%rdi), %rax # sched: [12:1.00]
   3154 ; ZNVER1-NEXT:    vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
   3155 ; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   3156 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3157   %1 = fptosi double %a0 to i64
   3158   %2 = load double, double *%a1, align 8
   3159   %3 = fptosi double %2 to i64
   3160   %4 = add i64 %1, %3
   3161   ret i64 %4
   3162 }
   3163 
   3164 define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   3165 ; GENERIC-LABEL: test_divpd:
   3166 ; GENERIC:       # %bb.0:
   3167 ; GENERIC-NEXT:    divpd %xmm1, %xmm0 # sched: [22:22.00]
   3168 ; GENERIC-NEXT:    divpd (%rdi), %xmm0 # sched: [28:22.00]
   3169 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3170 ;
   3171 ; ATOM-LABEL: test_divpd:
   3172 ; ATOM:       # %bb.0:
   3173 ; ATOM-NEXT:    divpd %xmm1, %xmm0 # sched: [125:62.50]
   3174 ; ATOM-NEXT:    divpd (%rdi), %xmm0 # sched: [125:62.50]
   3175 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3176 ;
   3177 ; SLM-LABEL: test_divpd:
   3178 ; SLM:       # %bb.0:
   3179 ; SLM-NEXT:    divpd %xmm1, %xmm0 # sched: [69:69.00]
   3180 ; SLM-NEXT:    divpd (%rdi), %xmm0 # sched: [72:69.00]
   3181 ; SLM-NEXT:    retq # sched: [4:1.00]
   3182 ;
   3183 ; SANDY-SSE-LABEL: test_divpd:
   3184 ; SANDY-SSE:       # %bb.0:
   3185 ; SANDY-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [22:22.00]
   3186 ; SANDY-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [28:22.00]
   3187 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3188 ;
   3189 ; SANDY-LABEL: test_divpd:
   3190 ; SANDY:       # %bb.0:
   3191 ; SANDY-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
   3192 ; SANDY-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
   3193 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3194 ;
   3195 ; HASWELL-SSE-LABEL: test_divpd:
   3196 ; HASWELL-SSE:       # %bb.0:
   3197 ; HASWELL-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [20:14.00]
   3198 ; HASWELL-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [26:14.00]
   3199 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3200 ;
   3201 ; HASWELL-LABEL: test_divpd:
   3202 ; HASWELL:       # %bb.0:
   3203 ; HASWELL-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:14.00]
   3204 ; HASWELL-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:14.00]
   3205 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3206 ;
   3207 ; BROADWELL-SSE-LABEL: test_divpd:
   3208 ; BROADWELL-SSE:       # %bb.0:
   3209 ; BROADWELL-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:8.00]
   3210 ; BROADWELL-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [19:8.00]
   3211 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3212 ;
   3213 ; BROADWELL-LABEL: test_divpd:
   3214 ; BROADWELL:       # %bb.0:
   3215 ; BROADWELL-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:8.00]
   3216 ; BROADWELL-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:8.00]
   3217 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3218 ;
   3219 ; SKYLAKE-SSE-LABEL: test_divpd:
   3220 ; SKYLAKE-SSE:       # %bb.0:
   3221 ; SKYLAKE-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:3.00]
   3222 ; SKYLAKE-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [20:4.00]
   3223 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3224 ;
   3225 ; SKYLAKE-LABEL: test_divpd:
   3226 ; SKYLAKE:       # %bb.0:
   3227 ; SKYLAKE-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
   3228 ; SKYLAKE-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00]
   3229 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3230 ;
   3231 ; SKX-SSE-LABEL: test_divpd:
   3232 ; SKX-SSE:       # %bb.0:
   3233 ; SKX-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [14:3.00]
   3234 ; SKX-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [20:4.00]
   3235 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3236 ;
   3237 ; SKX-LABEL: test_divpd:
   3238 ; SKX:       # %bb.0:
   3239 ; SKX-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
   3240 ; SKX-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00]
   3241 ; SKX-NEXT:    retq # sched: [7:1.00]
   3242 ;
   3243 ; BTVER2-SSE-LABEL: test_divpd:
   3244 ; BTVER2-SSE:       # %bb.0:
   3245 ; BTVER2-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [19:19.00]
   3246 ; BTVER2-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [24:19.00]
   3247 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3248 ;
   3249 ; BTVER2-LABEL: test_divpd:
   3250 ; BTVER2:       # %bb.0:
   3251 ; BTVER2-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
   3252 ; BTVER2-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
   3253 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3254 ;
   3255 ; ZNVER1-SSE-LABEL: test_divpd:
   3256 ; ZNVER1-SSE:       # %bb.0:
   3257 ; ZNVER1-SSE-NEXT:    divpd %xmm1, %xmm0 # sched: [15:1.00]
   3258 ; ZNVER1-SSE-NEXT:    divpd (%rdi), %xmm0 # sched: [22:1.00]
   3259 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3260 ;
   3261 ; ZNVER1-LABEL: test_divpd:
   3262 ; ZNVER1:       # %bb.0:
   3263 ; ZNVER1-NEXT:    vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
   3264 ; ZNVER1-NEXT:    vdivpd (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
   3265 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3266   %1 = fdiv <2 x double> %a0, %a1
   3267   %2 = load <2 x double>, <2 x double> *%a2, align 16
   3268   %3 = fdiv <2 x double> %1, %2
   3269   ret <2 x double> %3
   3270 }
   3271 
   3272 define double @test_divsd(double %a0, double %a1, double *%a2) {
   3273 ; GENERIC-LABEL: test_divsd:
   3274 ; GENERIC:       # %bb.0:
   3275 ; GENERIC-NEXT:    divsd %xmm1, %xmm0 # sched: [22:22.00]
   3276 ; GENERIC-NEXT:    divsd (%rdi), %xmm0 # sched: [28:22.00]
   3277 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3278 ;
   3279 ; ATOM-LABEL: test_divsd:
   3280 ; ATOM:       # %bb.0:
   3281 ; ATOM-NEXT:    divsd %xmm1, %xmm0 # sched: [62:31.00]
   3282 ; ATOM-NEXT:    divsd (%rdi), %xmm0 # sched: [62:31.00]
   3283 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3284 ;
   3285 ; SLM-LABEL: test_divsd:
   3286 ; SLM:       # %bb.0:
   3287 ; SLM-NEXT:    divsd %xmm1, %xmm0 # sched: [34:32.00]
   3288 ; SLM-NEXT:    divsd (%rdi), %xmm0 # sched: [37:32.00]
   3289 ; SLM-NEXT:    retq # sched: [4:1.00]
   3290 ;
   3291 ; SANDY-SSE-LABEL: test_divsd:
   3292 ; SANDY-SSE:       # %bb.0:
   3293 ; SANDY-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [22:22.00]
   3294 ; SANDY-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [28:22.00]
   3295 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3296 ;
   3297 ; SANDY-LABEL: test_divsd:
   3298 ; SANDY:       # %bb.0:
   3299 ; SANDY-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
   3300 ; SANDY-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
   3301 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3302 ;
   3303 ; HASWELL-SSE-LABEL: test_divsd:
   3304 ; HASWELL-SSE:       # %bb.0:
   3305 ; HASWELL-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [20:14.00]
   3306 ; HASWELL-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [25:14.00]
   3307 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3308 ;
   3309 ; HASWELL-LABEL: test_divsd:
   3310 ; HASWELL:       # %bb.0:
   3311 ; HASWELL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:14.00]
   3312 ; HASWELL-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:14.00]
   3313 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3314 ;
   3315 ; BROADWELL-SSE-LABEL: test_divsd:
   3316 ; BROADWELL-SSE:       # %bb.0:
   3317 ; BROADWELL-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:4.00]
   3318 ; BROADWELL-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:8.00]
   3319 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3320 ;
   3321 ; BROADWELL-LABEL: test_divsd:
   3322 ; BROADWELL:       # %bb.0:
   3323 ; BROADWELL-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:4.00]
   3324 ; BROADWELL-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:8.00]
   3325 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3326 ;
   3327 ; SKYLAKE-SSE-LABEL: test_divsd:
   3328 ; SKYLAKE-SSE:       # %bb.0:
   3329 ; SKYLAKE-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:3.00]
   3330 ; SKYLAKE-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:4.00]
   3331 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3332 ;
   3333 ; SKYLAKE-LABEL: test_divsd:
   3334 ; SKYLAKE:       # %bb.0:
   3335 ; SKYLAKE-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
   3336 ; SKYLAKE-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00]
   3337 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3338 ;
   3339 ; SKX-SSE-LABEL: test_divsd:
   3340 ; SKX-SSE:       # %bb.0:
   3341 ; SKX-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [14:3.00]
   3342 ; SKX-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [19:4.00]
   3343 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3344 ;
   3345 ; SKX-LABEL: test_divsd:
   3346 ; SKX:       # %bb.0:
   3347 ; SKX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
   3348 ; SKX-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00]
   3349 ; SKX-NEXT:    retq # sched: [7:1.00]
   3350 ;
   3351 ; BTVER2-SSE-LABEL: test_divsd:
   3352 ; BTVER2-SSE:       # %bb.0:
   3353 ; BTVER2-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [19:19.00]
   3354 ; BTVER2-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [24:19.00]
   3355 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3356 ;
   3357 ; BTVER2-LABEL: test_divsd:
   3358 ; BTVER2:       # %bb.0:
   3359 ; BTVER2-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
   3360 ; BTVER2-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
   3361 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3362 ;
   3363 ; ZNVER1-SSE-LABEL: test_divsd:
   3364 ; ZNVER1-SSE:       # %bb.0:
   3365 ; ZNVER1-SSE-NEXT:    divsd %xmm1, %xmm0 # sched: [15:1.00]
   3366 ; ZNVER1-SSE-NEXT:    divsd (%rdi), %xmm0 # sched: [22:1.00]
   3367 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3368 ;
   3369 ; ZNVER1-LABEL: test_divsd:
   3370 ; ZNVER1:       # %bb.0:
   3371 ; ZNVER1-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
   3372 ; ZNVER1-NEXT:    vdivsd (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
   3373 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3374   %1 = fdiv double %a0, %a1
   3375   %2 = load double, double *%a2, align 8
   3376   %3 = fdiv double %1, %2
   3377   ret double %3
   3378 }
   3379 
   3380 define void @test_lfence() {
   3381 ; GENERIC-LABEL: test_lfence:
   3382 ; GENERIC:       # %bb.0:
   3383 ; GENERIC-NEXT:    lfence # sched: [1:1.00]
   3384 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3385 ;
   3386 ; ATOM-LABEL: test_lfence:
   3387 ; ATOM:       # %bb.0:
   3388 ; ATOM-NEXT:    lfence # sched: [1:0.50]
   3389 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3390 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3391 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3392 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3393 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3394 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3395 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3396 ;
   3397 ; SLM-LABEL: test_lfence:
   3398 ; SLM:       # %bb.0:
   3399 ; SLM-NEXT:    lfence # sched: [1:1.00]
   3400 ; SLM-NEXT:    retq # sched: [4:1.00]
   3401 ;
   3402 ; SANDY-SSE-LABEL: test_lfence:
   3403 ; SANDY-SSE:       # %bb.0:
   3404 ; SANDY-SSE-NEXT:    lfence # sched: [1:1.00]
   3405 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3406 ;
   3407 ; SANDY-LABEL: test_lfence:
   3408 ; SANDY:       # %bb.0:
   3409 ; SANDY-NEXT:    lfence # sched: [1:1.00]
   3410 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3411 ;
   3412 ; HASWELL-SSE-LABEL: test_lfence:
   3413 ; HASWELL-SSE:       # %bb.0:
   3414 ; HASWELL-SSE-NEXT:    lfence # sched: [2:0.50]
   3415 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3416 ;
   3417 ; HASWELL-LABEL: test_lfence:
   3418 ; HASWELL:       # %bb.0:
   3419 ; HASWELL-NEXT:    lfence # sched: [2:0.50]
   3420 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3421 ;
   3422 ; BROADWELL-SSE-LABEL: test_lfence:
   3423 ; BROADWELL-SSE:       # %bb.0:
   3424 ; BROADWELL-SSE-NEXT:    lfence # sched: [2:0.50]
   3425 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3426 ;
   3427 ; BROADWELL-LABEL: test_lfence:
   3428 ; BROADWELL:       # %bb.0:
   3429 ; BROADWELL-NEXT:    lfence # sched: [2:0.50]
   3430 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3431 ;
   3432 ; SKYLAKE-SSE-LABEL: test_lfence:
   3433 ; SKYLAKE-SSE:       # %bb.0:
   3434 ; SKYLAKE-SSE-NEXT:    lfence # sched: [2:0.50]
   3435 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3436 ;
   3437 ; SKYLAKE-LABEL: test_lfence:
   3438 ; SKYLAKE:       # %bb.0:
   3439 ; SKYLAKE-NEXT:    lfence # sched: [2:0.50]
   3440 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3441 ;
   3442 ; SKX-SSE-LABEL: test_lfence:
   3443 ; SKX-SSE:       # %bb.0:
   3444 ; SKX-SSE-NEXT:    lfence # sched: [2:0.50]
   3445 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3446 ;
   3447 ; SKX-LABEL: test_lfence:
   3448 ; SKX:       # %bb.0:
   3449 ; SKX-NEXT:    lfence # sched: [2:0.50]
   3450 ; SKX-NEXT:    retq # sched: [7:1.00]
   3451 ;
   3452 ; BTVER2-SSE-LABEL: test_lfence:
   3453 ; BTVER2-SSE:       # %bb.0:
   3454 ; BTVER2-SSE-NEXT:    lfence # sched: [1:1.00]
   3455 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3456 ;
   3457 ; BTVER2-LABEL: test_lfence:
   3458 ; BTVER2:       # %bb.0:
   3459 ; BTVER2-NEXT:    lfence # sched: [1:1.00]
   3460 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3461 ;
   3462 ; ZNVER1-SSE-LABEL: test_lfence:
   3463 ; ZNVER1-SSE:       # %bb.0:
   3464 ; ZNVER1-SSE-NEXT:    lfence # sched: [1:0.50]
   3465 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3466 ;
   3467 ; ZNVER1-LABEL: test_lfence:
   3468 ; ZNVER1:       # %bb.0:
   3469 ; ZNVER1-NEXT:    lfence # sched: [1:0.50]
   3470 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3471   call void @llvm.x86.sse2.lfence()
   3472   ret void
   3473 }
   3474 declare void @llvm.x86.sse2.lfence() nounwind readnone
   3475 
   3476 define void @test_mfence() {
   3477 ; GENERIC-LABEL: test_mfence:
   3478 ; GENERIC:       # %bb.0:
   3479 ; GENERIC-NEXT:    mfence # sched: [1:1.00]
   3480 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3481 ;
   3482 ; ATOM-LABEL: test_mfence:
   3483 ; ATOM:       # %bb.0:
   3484 ; ATOM-NEXT:    mfence # sched: [1:1.00]
   3485 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3486 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3487 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3488 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3489 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3490 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3491 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3492 ;
   3493 ; SLM-LABEL: test_mfence:
   3494 ; SLM:       # %bb.0:
   3495 ; SLM-NEXT:    mfence # sched: [1:1.00]
   3496 ; SLM-NEXT:    retq # sched: [4:1.00]
   3497 ;
   3498 ; SANDY-SSE-LABEL: test_mfence:
   3499 ; SANDY-SSE:       # %bb.0:
   3500 ; SANDY-SSE-NEXT:    mfence # sched: [1:1.00]
   3501 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3502 ;
   3503 ; SANDY-LABEL: test_mfence:
   3504 ; SANDY:       # %bb.0:
   3505 ; SANDY-NEXT:    mfence # sched: [1:1.00]
   3506 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3507 ;
   3508 ; HASWELL-SSE-LABEL: test_mfence:
   3509 ; HASWELL-SSE:       # %bb.0:
   3510 ; HASWELL-SSE-NEXT:    mfence # sched: [2:0.50]
   3511 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3512 ;
   3513 ; HASWELL-LABEL: test_mfence:
   3514 ; HASWELL:       # %bb.0:
   3515 ; HASWELL-NEXT:    mfence # sched: [2:0.50]
   3516 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3517 ;
   3518 ; BROADWELL-SSE-LABEL: test_mfence:
   3519 ; BROADWELL-SSE:       # %bb.0:
   3520 ; BROADWELL-SSE-NEXT:    mfence # sched: [2:0.50]
   3521 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3522 ;
   3523 ; BROADWELL-LABEL: test_mfence:
   3524 ; BROADWELL:       # %bb.0:
   3525 ; BROADWELL-NEXT:    mfence # sched: [2:0.50]
   3526 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3527 ;
   3528 ; SKYLAKE-SSE-LABEL: test_mfence:
   3529 ; SKYLAKE-SSE:       # %bb.0:
   3530 ; SKYLAKE-SSE-NEXT:    mfence # sched: [3:0.50]
   3531 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3532 ;
   3533 ; SKYLAKE-LABEL: test_mfence:
   3534 ; SKYLAKE:       # %bb.0:
   3535 ; SKYLAKE-NEXT:    mfence # sched: [3:0.50]
   3536 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3537 ;
   3538 ; SKX-SSE-LABEL: test_mfence:
   3539 ; SKX-SSE:       # %bb.0:
   3540 ; SKX-SSE-NEXT:    mfence # sched: [3:0.50]
   3541 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3542 ;
   3543 ; SKX-LABEL: test_mfence:
   3544 ; SKX:       # %bb.0:
   3545 ; SKX-NEXT:    mfence # sched: [3:0.50]
   3546 ; SKX-NEXT:    retq # sched: [7:1.00]
   3547 ;
   3548 ; BTVER2-SSE-LABEL: test_mfence:
   3549 ; BTVER2-SSE:       # %bb.0:
   3550 ; BTVER2-SSE-NEXT:    mfence # sched: [1:1.00]
   3551 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3552 ;
   3553 ; BTVER2-LABEL: test_mfence:
   3554 ; BTVER2:       # %bb.0:
   3555 ; BTVER2-NEXT:    mfence # sched: [1:1.00]
   3556 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3557 ;
   3558 ; ZNVER1-SSE-LABEL: test_mfence:
   3559 ; ZNVER1-SSE:       # %bb.0:
   3560 ; ZNVER1-SSE-NEXT:    mfence # sched: [1:0.50]
   3561 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3562 ;
   3563 ; ZNVER1-LABEL: test_mfence:
   3564 ; ZNVER1:       # %bb.0:
   3565 ; ZNVER1-NEXT:    mfence # sched: [1:0.50]
   3566 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3567   call void @llvm.x86.sse2.mfence()
   3568   ret void
   3569 }
   3570 declare void @llvm.x86.sse2.mfence() nounwind readnone
   3571 
   3572 define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
   3573 ; GENERIC-LABEL: test_maskmovdqu:
   3574 ; GENERIC:       # %bb.0:
   3575 ; GENERIC-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3576 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3577 ;
   3578 ; ATOM-LABEL: test_maskmovdqu:
   3579 ; ATOM:       # %bb.0:
   3580 ; ATOM-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [2:1.00]
   3581 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3582 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3583 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3584 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3585 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3586 ;
   3587 ; SLM-LABEL: test_maskmovdqu:
   3588 ; SLM:       # %bb.0:
   3589 ; SLM-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3590 ; SLM-NEXT:    retq # sched: [4:1.00]
   3591 ;
   3592 ; SANDY-SSE-LABEL: test_maskmovdqu:
   3593 ; SANDY-SSE:       # %bb.0:
   3594 ; SANDY-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3595 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3596 ;
   3597 ; SANDY-LABEL: test_maskmovdqu:
   3598 ; SANDY:       # %bb.0:
   3599 ; SANDY-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3600 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3601 ;
   3602 ; HASWELL-SSE-LABEL: test_maskmovdqu:
   3603 ; HASWELL-SSE:       # %bb.0:
   3604 ; HASWELL-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3605 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3606 ;
   3607 ; HASWELL-LABEL: test_maskmovdqu:
   3608 ; HASWELL:       # %bb.0:
   3609 ; HASWELL-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3610 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3611 ;
   3612 ; BROADWELL-SSE-LABEL: test_maskmovdqu:
   3613 ; BROADWELL-SSE:       # %bb.0:
   3614 ; BROADWELL-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3615 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3616 ;
   3617 ; BROADWELL-LABEL: test_maskmovdqu:
   3618 ; BROADWELL:       # %bb.0:
   3619 ; BROADWELL-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3620 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3621 ;
   3622 ; SKYLAKE-SSE-LABEL: test_maskmovdqu:
   3623 ; SKYLAKE-SSE:       # %bb.0:
   3624 ; SKYLAKE-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3625 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3626 ;
   3627 ; SKYLAKE-LABEL: test_maskmovdqu:
   3628 ; SKYLAKE:       # %bb.0:
   3629 ; SKYLAKE-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3630 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3631 ;
   3632 ; SKX-SSE-LABEL: test_maskmovdqu:
   3633 ; SKX-SSE:       # %bb.0:
   3634 ; SKX-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3635 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3636 ;
   3637 ; SKX-LABEL: test_maskmovdqu:
   3638 ; SKX:       # %bb.0:
   3639 ; SKX-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3640 ; SKX-NEXT:    retq # sched: [7:1.00]
   3641 ;
   3642 ; BTVER2-SSE-LABEL: test_maskmovdqu:
   3643 ; BTVER2-SSE:       # %bb.0:
   3644 ; BTVER2-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3645 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3646 ;
   3647 ; BTVER2-LABEL: test_maskmovdqu:
   3648 ; BTVER2:       # %bb.0:
   3649 ; BTVER2-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
   3650 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3651 ;
   3652 ; ZNVER1-SSE-LABEL: test_maskmovdqu:
   3653 ; ZNVER1-SSE:       # %bb.0:
   3654 ; ZNVER1-SSE-NEXT:    maskmovdqu %xmm1, %xmm0 # sched: [100:0.25]
   3655 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3656 ;
   3657 ; ZNVER1-LABEL: test_maskmovdqu:
   3658 ; ZNVER1:       # %bb.0:
   3659 ; ZNVER1-NEXT:    vmaskmovdqu %xmm1, %xmm0 # sched: [100:0.25]
   3660 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3661   call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
   3662   ret void
   3663 }
   3664 declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
   3665 
   3666 define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   3667 ; GENERIC-LABEL: test_maxpd:
   3668 ; GENERIC:       # %bb.0:
   3669 ; GENERIC-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
   3670 ; GENERIC-NEXT:    maxpd (%rdi), %xmm0 # sched: [9:1.00]
   3671 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3672 ;
   3673 ; ATOM-LABEL: test_maxpd:
   3674 ; ATOM:       # %bb.0:
   3675 ; ATOM-NEXT:    maxpd %xmm1, %xmm0 # sched: [6:3.00]
   3676 ; ATOM-NEXT:    maxpd (%rdi), %xmm0 # sched: [7:3.50]
   3677 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3678 ;
   3679 ; SLM-LABEL: test_maxpd:
   3680 ; SLM:       # %bb.0:
   3681 ; SLM-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
   3682 ; SLM-NEXT:    maxpd (%rdi), %xmm0 # sched: [6:1.00]
   3683 ; SLM-NEXT:    retq # sched: [4:1.00]
   3684 ;
   3685 ; SANDY-SSE-LABEL: test_maxpd:
   3686 ; SANDY-SSE:       # %bb.0:
   3687 ; SANDY-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
   3688 ; SANDY-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [9:1.00]
   3689 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3690 ;
   3691 ; SANDY-LABEL: test_maxpd:
   3692 ; SANDY:       # %bb.0:
   3693 ; SANDY-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3694 ; SANDY-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   3695 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3696 ;
   3697 ; HASWELL-SSE-LABEL: test_maxpd:
   3698 ; HASWELL-SSE:       # %bb.0:
   3699 ; HASWELL-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
   3700 ; HASWELL-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [9:1.00]
   3701 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3702 ;
   3703 ; HASWELL-LABEL: test_maxpd:
   3704 ; HASWELL:       # %bb.0:
   3705 ; HASWELL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3706 ; HASWELL-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   3707 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3708 ;
   3709 ; BROADWELL-SSE-LABEL: test_maxpd:
   3710 ; BROADWELL-SSE:       # %bb.0:
   3711 ; BROADWELL-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
   3712 ; BROADWELL-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [8:1.00]
   3713 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3714 ;
   3715 ; BROADWELL-LABEL: test_maxpd:
   3716 ; BROADWELL:       # %bb.0:
   3717 ; BROADWELL-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3718 ; BROADWELL-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   3719 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3720 ;
   3721 ; SKYLAKE-SSE-LABEL: test_maxpd:
   3722 ; SKYLAKE-SSE:       # %bb.0:
   3723 ; SKYLAKE-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [4:0.50]
   3724 ; SKYLAKE-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [10:0.50]
   3725 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3726 ;
   3727 ; SKYLAKE-LABEL: test_maxpd:
   3728 ; SKYLAKE:       # %bb.0:
   3729 ; SKYLAKE-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3730 ; SKYLAKE-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   3731 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3732 ;
   3733 ; SKX-SSE-LABEL: test_maxpd:
   3734 ; SKX-SSE:       # %bb.0:
   3735 ; SKX-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [4:0.50]
   3736 ; SKX-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [10:0.50]
   3737 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3738 ;
   3739 ; SKX-LABEL: test_maxpd:
   3740 ; SKX:       # %bb.0:
   3741 ; SKX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3742 ; SKX-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   3743 ; SKX-NEXT:    retq # sched: [7:1.00]
   3744 ;
   3745 ; BTVER2-SSE-LABEL: test_maxpd:
   3746 ; BTVER2-SSE:       # %bb.0:
   3747 ; BTVER2-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [2:1.00]
   3748 ; BTVER2-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [7:1.00]
   3749 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3750 ;
   3751 ; BTVER2-LABEL: test_maxpd:
   3752 ; BTVER2:       # %bb.0:
   3753 ; BTVER2-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   3754 ; BTVER2-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3755 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3756 ;
   3757 ; ZNVER1-SSE-LABEL: test_maxpd:
   3758 ; ZNVER1-SSE:       # %bb.0:
   3759 ; ZNVER1-SSE-NEXT:    maxpd %xmm1, %xmm0 # sched: [3:1.00]
   3760 ; ZNVER1-SSE-NEXT:    maxpd (%rdi), %xmm0 # sched: [10:1.00]
   3761 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3762 ;
   3763 ; ZNVER1-LABEL: test_maxpd:
   3764 ; ZNVER1:       # %bb.0:
   3765 ; ZNVER1-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3766 ; ZNVER1-NEXT:    vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   3767 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3768   %1 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
   3769   %2 = load <2 x double>, <2 x double> *%a2, align 16
   3770   %3 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %1, <2 x double> %2)
   3771   ret <2 x double> %3
   3772 }
   3773 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
   3774 
   3775 define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   3776 ; GENERIC-LABEL: test_maxsd:
   3777 ; GENERIC:       # %bb.0:
   3778 ; GENERIC-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
   3779 ; GENERIC-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:1.00]
   3780 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3781 ;
   3782 ; ATOM-LABEL: test_maxsd:
   3783 ; ATOM:       # %bb.0:
   3784 ; ATOM-NEXT:    maxsd %xmm1, %xmm0 # sched: [5:5.00]
   3785 ; ATOM-NEXT:    maxsd (%rdi), %xmm0 # sched: [5:5.00]
   3786 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3787 ;
   3788 ; SLM-LABEL: test_maxsd:
   3789 ; SLM:       # %bb.0:
   3790 ; SLM-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
   3791 ; SLM-NEXT:    maxsd (%rdi), %xmm0 # sched: [6:1.00]
   3792 ; SLM-NEXT:    retq # sched: [4:1.00]
   3793 ;
   3794 ; SANDY-SSE-LABEL: test_maxsd:
   3795 ; SANDY-SSE:       # %bb.0:
   3796 ; SANDY-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
   3797 ; SANDY-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:1.00]
   3798 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3799 ;
   3800 ; SANDY-LABEL: test_maxsd:
   3801 ; SANDY:       # %bb.0:
   3802 ; SANDY-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3803 ; SANDY-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   3804 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3805 ;
   3806 ; HASWELL-SSE-LABEL: test_maxsd:
   3807 ; HASWELL-SSE:       # %bb.0:
   3808 ; HASWELL-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
   3809 ; HASWELL-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [8:1.00]
   3810 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3811 ;
   3812 ; HASWELL-LABEL: test_maxsd:
   3813 ; HASWELL:       # %bb.0:
   3814 ; HASWELL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3815 ; HASWELL-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   3816 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3817 ;
   3818 ; BROADWELL-SSE-LABEL: test_maxsd:
   3819 ; BROADWELL-SSE:       # %bb.0:
   3820 ; BROADWELL-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
   3821 ; BROADWELL-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [8:1.00]
   3822 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3823 ;
   3824 ; BROADWELL-LABEL: test_maxsd:
   3825 ; BROADWELL:       # %bb.0:
   3826 ; BROADWELL-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3827 ; BROADWELL-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   3828 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3829 ;
   3830 ; SKYLAKE-SSE-LABEL: test_maxsd:
   3831 ; SKYLAKE-SSE:       # %bb.0:
   3832 ; SKYLAKE-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [4:0.50]
   3833 ; SKYLAKE-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:0.50]
   3834 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3835 ;
   3836 ; SKYLAKE-LABEL: test_maxsd:
   3837 ; SKYLAKE:       # %bb.0:
   3838 ; SKYLAKE-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3839 ; SKYLAKE-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   3840 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3841 ;
   3842 ; SKX-SSE-LABEL: test_maxsd:
   3843 ; SKX-SSE:       # %bb.0:
   3844 ; SKX-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [4:0.50]
   3845 ; SKX-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [9:0.50]
   3846 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3847 ;
   3848 ; SKX-LABEL: test_maxsd:
   3849 ; SKX:       # %bb.0:
   3850 ; SKX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3851 ; SKX-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   3852 ; SKX-NEXT:    retq # sched: [7:1.00]
   3853 ;
   3854 ; BTVER2-SSE-LABEL: test_maxsd:
   3855 ; BTVER2-SSE:       # %bb.0:
   3856 ; BTVER2-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [2:1.00]
   3857 ; BTVER2-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [7:1.00]
   3858 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3859 ;
   3860 ; BTVER2-LABEL: test_maxsd:
   3861 ; BTVER2:       # %bb.0:
   3862 ; BTVER2-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   3863 ; BTVER2-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3864 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3865 ;
   3866 ; ZNVER1-SSE-LABEL: test_maxsd:
   3867 ; ZNVER1-SSE:       # %bb.0:
   3868 ; ZNVER1-SSE-NEXT:    maxsd %xmm1, %xmm0 # sched: [3:1.00]
   3869 ; ZNVER1-SSE-NEXT:    maxsd (%rdi), %xmm0 # sched: [10:1.00]
   3870 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3871 ;
   3872 ; ZNVER1-LABEL: test_maxsd:
   3873 ; ZNVER1:       # %bb.0:
   3874 ; ZNVER1-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3875 ; ZNVER1-NEXT:    vmaxsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   3876 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3877   %1 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
   3878   %2 = load <2 x double>, <2 x double> *%a2, align 16
   3879   %3 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2)
   3880   ret <2 x double> %3
   3881 }
   3882 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
   3883 
   3884 define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   3885 ; GENERIC-LABEL: test_minpd:
   3886 ; GENERIC:       # %bb.0:
   3887 ; GENERIC-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
   3888 ; GENERIC-NEXT:    minpd (%rdi), %xmm0 # sched: [9:1.00]
   3889 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3890 ;
   3891 ; ATOM-LABEL: test_minpd:
   3892 ; ATOM:       # %bb.0:
   3893 ; ATOM-NEXT:    minpd %xmm1, %xmm0 # sched: [6:3.00]
   3894 ; ATOM-NEXT:    minpd (%rdi), %xmm0 # sched: [7:3.50]
   3895 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3896 ;
   3897 ; SLM-LABEL: test_minpd:
   3898 ; SLM:       # %bb.0:
   3899 ; SLM-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
   3900 ; SLM-NEXT:    minpd (%rdi), %xmm0 # sched: [6:1.00]
   3901 ; SLM-NEXT:    retq # sched: [4:1.00]
   3902 ;
   3903 ; SANDY-SSE-LABEL: test_minpd:
   3904 ; SANDY-SSE:       # %bb.0:
   3905 ; SANDY-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
   3906 ; SANDY-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [9:1.00]
   3907 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3908 ;
   3909 ; SANDY-LABEL: test_minpd:
   3910 ; SANDY:       # %bb.0:
   3911 ; SANDY-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3912 ; SANDY-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   3913 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3914 ;
   3915 ; HASWELL-SSE-LABEL: test_minpd:
   3916 ; HASWELL-SSE:       # %bb.0:
   3917 ; HASWELL-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
   3918 ; HASWELL-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [9:1.00]
   3919 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3920 ;
   3921 ; HASWELL-LABEL: test_minpd:
   3922 ; HASWELL:       # %bb.0:
   3923 ; HASWELL-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3924 ; HASWELL-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   3925 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3926 ;
   3927 ; BROADWELL-SSE-LABEL: test_minpd:
   3928 ; BROADWELL-SSE:       # %bb.0:
   3929 ; BROADWELL-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
   3930 ; BROADWELL-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [8:1.00]
   3931 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3932 ;
   3933 ; BROADWELL-LABEL: test_minpd:
   3934 ; BROADWELL:       # %bb.0:
   3935 ; BROADWELL-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3936 ; BROADWELL-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   3937 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3938 ;
   3939 ; SKYLAKE-SSE-LABEL: test_minpd:
   3940 ; SKYLAKE-SSE:       # %bb.0:
   3941 ; SKYLAKE-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [4:0.50]
   3942 ; SKYLAKE-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [10:0.50]
   3943 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3944 ;
   3945 ; SKYLAKE-LABEL: test_minpd:
   3946 ; SKYLAKE:       # %bb.0:
   3947 ; SKYLAKE-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3948 ; SKYLAKE-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   3949 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3950 ;
   3951 ; SKX-SSE-LABEL: test_minpd:
   3952 ; SKX-SSE:       # %bb.0:
   3953 ; SKX-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [4:0.50]
   3954 ; SKX-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [10:0.50]
   3955 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3956 ;
   3957 ; SKX-LABEL: test_minpd:
   3958 ; SKX:       # %bb.0:
   3959 ; SKX-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3960 ; SKX-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   3961 ; SKX-NEXT:    retq # sched: [7:1.00]
   3962 ;
   3963 ; BTVER2-SSE-LABEL: test_minpd:
   3964 ; BTVER2-SSE:       # %bb.0:
   3965 ; BTVER2-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [2:1.00]
   3966 ; BTVER2-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [7:1.00]
   3967 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3968 ;
   3969 ; BTVER2-LABEL: test_minpd:
   3970 ; BTVER2:       # %bb.0:
   3971 ; BTVER2-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   3972 ; BTVER2-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3973 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3974 ;
   3975 ; ZNVER1-SSE-LABEL: test_minpd:
   3976 ; ZNVER1-SSE:       # %bb.0:
   3977 ; ZNVER1-SSE-NEXT:    minpd %xmm1, %xmm0 # sched: [3:1.00]
   3978 ; ZNVER1-SSE-NEXT:    minpd (%rdi), %xmm0 # sched: [10:1.00]
   3979 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3980 ;
   3981 ; ZNVER1-LABEL: test_minpd:
   3982 ; ZNVER1:       # %bb.0:
   3983 ; ZNVER1-NEXT:    vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3984 ; ZNVER1-NEXT:    vminpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   3985 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3986   %1 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
   3987   %2 = load <2 x double>, <2 x double> *%a2, align 16
   3988   %3 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %1, <2 x double> %2)
   3989   ret <2 x double> %3
   3990 }
   3991 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
   3992 
   3993 define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   3994 ; GENERIC-LABEL: test_minsd:
   3995 ; GENERIC:       # %bb.0:
   3996 ; GENERIC-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
   3997 ; GENERIC-NEXT:    minsd (%rdi), %xmm0 # sched: [9:1.00]
   3998 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3999 ;
   4000 ; ATOM-LABEL: test_minsd:
   4001 ; ATOM:       # %bb.0:
   4002 ; ATOM-NEXT:    minsd %xmm1, %xmm0 # sched: [5:5.00]
   4003 ; ATOM-NEXT:    minsd (%rdi), %xmm0 # sched: [5:5.00]
   4004 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4005 ;
   4006 ; SLM-LABEL: test_minsd:
   4007 ; SLM:       # %bb.0:
   4008 ; SLM-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
   4009 ; SLM-NEXT:    minsd (%rdi), %xmm0 # sched: [6:1.00]
   4010 ; SLM-NEXT:    retq # sched: [4:1.00]
   4011 ;
   4012 ; SANDY-SSE-LABEL: test_minsd:
   4013 ; SANDY-SSE:       # %bb.0:
   4014 ; SANDY-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
   4015 ; SANDY-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [9:1.00]
   4016 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4017 ;
   4018 ; SANDY-LABEL: test_minsd:
   4019 ; SANDY:       # %bb.0:
   4020 ; SANDY-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4021 ; SANDY-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   4022 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4023 ;
   4024 ; HASWELL-SSE-LABEL: test_minsd:
   4025 ; HASWELL-SSE:       # %bb.0:
   4026 ; HASWELL-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
   4027 ; HASWELL-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [8:1.00]
   4028 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4029 ;
   4030 ; HASWELL-LABEL: test_minsd:
   4031 ; HASWELL:       # %bb.0:
   4032 ; HASWELL-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4033 ; HASWELL-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   4034 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4035 ;
   4036 ; BROADWELL-SSE-LABEL: test_minsd:
   4037 ; BROADWELL-SSE:       # %bb.0:
   4038 ; BROADWELL-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
   4039 ; BROADWELL-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [8:1.00]
   4040 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4041 ;
   4042 ; BROADWELL-LABEL: test_minsd:
   4043 ; BROADWELL:       # %bb.0:
   4044 ; BROADWELL-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4045 ; BROADWELL-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   4046 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4047 ;
   4048 ; SKYLAKE-SSE-LABEL: test_minsd:
   4049 ; SKYLAKE-SSE:       # %bb.0:
   4050 ; SKYLAKE-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [4:0.50]
   4051 ; SKYLAKE-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [9:0.50]
   4052 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4053 ;
   4054 ; SKYLAKE-LABEL: test_minsd:
   4055 ; SKYLAKE:       # %bb.0:
   4056 ; SKYLAKE-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4057 ; SKYLAKE-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   4058 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4059 ;
   4060 ; SKX-SSE-LABEL: test_minsd:
   4061 ; SKX-SSE:       # %bb.0:
   4062 ; SKX-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [4:0.50]
   4063 ; SKX-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [9:0.50]
   4064 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4065 ;
   4066 ; SKX-LABEL: test_minsd:
   4067 ; SKX:       # %bb.0:
   4068 ; SKX-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4069 ; SKX-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   4070 ; SKX-NEXT:    retq # sched: [7:1.00]
   4071 ;
   4072 ; BTVER2-SSE-LABEL: test_minsd:
   4073 ; BTVER2-SSE:       # %bb.0:
   4074 ; BTVER2-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [2:1.00]
   4075 ; BTVER2-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [7:1.00]
   4076 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4077 ;
   4078 ; BTVER2-LABEL: test_minsd:
   4079 ; BTVER2:       # %bb.0:
   4080 ; BTVER2-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   4081 ; BTVER2-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   4082 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4083 ;
   4084 ; ZNVER1-SSE-LABEL: test_minsd:
   4085 ; ZNVER1-SSE:       # %bb.0:
   4086 ; ZNVER1-SSE-NEXT:    minsd %xmm1, %xmm0 # sched: [3:1.00]
   4087 ; ZNVER1-SSE-NEXT:    minsd (%rdi), %xmm0 # sched: [10:1.00]
   4088 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4089 ;
   4090 ; ZNVER1-LABEL: test_minsd:
   4091 ; ZNVER1:       # %bb.0:
   4092 ; ZNVER1-NEXT:    vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4093 ; ZNVER1-NEXT:    vminsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   4094 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4095   %1 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
   4096   %2 = load <2 x double>, <2 x double> *%a2, align 16
   4097   %3 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2)
   4098   ret <2 x double> %3
   4099 }
   4100 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
   4101 
   4102 define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
   4103 ; GENERIC-LABEL: test_movapd:
   4104 ; GENERIC:       # %bb.0:
   4105 ; GENERIC-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
   4106 ; GENERIC-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   4107 ; GENERIC-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
   4108 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4109 ;
   4110 ; ATOM-LABEL: test_movapd:
   4111 ; ATOM:       # %bb.0:
   4112 ; ATOM-NEXT:    movapd (%rdi), %xmm0 # sched: [1:1.00]
   4113 ; ATOM-NEXT:    addpd %xmm0, %xmm0 # sched: [6:3.00]
   4114 ; ATOM-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
   4115 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4116 ;
   4117 ; SLM-LABEL: test_movapd:
   4118 ; SLM:       # %bb.0:
   4119 ; SLM-NEXT:    movapd (%rdi), %xmm0 # sched: [3:1.00]
   4120 ; SLM-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   4121 ; SLM-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
   4122 ; SLM-NEXT:    retq # sched: [4:1.00]
   4123 ;
   4124 ; SANDY-SSE-LABEL: test_movapd:
   4125 ; SANDY-SSE:       # %bb.0:
   4126 ; SANDY-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
   4127 ; SANDY-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   4128 ; SANDY-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
   4129 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4130 ;
   4131 ; SANDY-LABEL: test_movapd:
   4132 ; SANDY:       # %bb.0:
   4133 ; SANDY-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
   4134 ; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   4135 ; SANDY-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
   4136 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4137 ;
   4138 ; HASWELL-SSE-LABEL: test_movapd:
   4139 ; HASWELL-SSE:       # %bb.0:
   4140 ; HASWELL-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
   4141 ; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   4142 ; HASWELL-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
   4143 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4144 ;
   4145 ; HASWELL-LABEL: test_movapd:
   4146 ; HASWELL:       # %bb.0:
   4147 ; HASWELL-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
   4148 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   4149 ; HASWELL-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
   4150 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4151 ;
   4152 ; BROADWELL-SSE-LABEL: test_movapd:
   4153 ; BROADWELL-SSE:       # %bb.0:
   4154 ; BROADWELL-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [5:0.50]
   4155 ; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   4156 ; BROADWELL-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
   4157 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4158 ;
   4159 ; BROADWELL-LABEL: test_movapd:
   4160 ; BROADWELL:       # %bb.0:
   4161 ; BROADWELL-NEXT:    vmovapd (%rdi), %xmm0 # sched: [5:0.50]
   4162 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   4163 ; BROADWELL-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
   4164 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4165 ;
   4166 ; SKYLAKE-SSE-LABEL: test_movapd:
   4167 ; SKYLAKE-SSE:       # %bb.0:
   4168 ; SKYLAKE-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
   4169 ; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
   4170 ; SKYLAKE-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
   4171 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4172 ;
   4173 ; SKYLAKE-LABEL: test_movapd:
   4174 ; SKYLAKE:       # %bb.0:
   4175 ; SKYLAKE-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
   4176 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   4177 ; SKYLAKE-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
   4178 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4179 ;
   4180 ; SKX-SSE-LABEL: test_movapd:
   4181 ; SKX-SSE:       # %bb.0:
   4182 ; SKX-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [6:0.50]
   4183 ; SKX-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
   4184 ; SKX-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
   4185 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4186 ;
   4187 ; SKX-LABEL: test_movapd:
   4188 ; SKX:       # %bb.0:
   4189 ; SKX-NEXT:    vmovapd (%rdi), %xmm0 # sched: [6:0.50]
   4190 ; SKX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   4191 ; SKX-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
   4192 ; SKX-NEXT:    retq # sched: [7:1.00]
   4193 ;
   4194 ; BTVER2-SSE-LABEL: test_movapd:
   4195 ; BTVER2-SSE:       # %bb.0:
   4196 ; BTVER2-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [5:1.00]
   4197 ; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   4198 ; BTVER2-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:1.00]
   4199 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4200 ;
   4201 ; BTVER2-LABEL: test_movapd:
   4202 ; BTVER2:       # %bb.0:
   4203 ; BTVER2-NEXT:    vmovapd (%rdi), %xmm0 # sched: [5:1.00]
   4204 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   4205 ; BTVER2-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:1.00]
   4206 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4207 ;
   4208 ; ZNVER1-SSE-LABEL: test_movapd:
   4209 ; ZNVER1-SSE:       # %bb.0:
   4210 ; ZNVER1-SSE-NEXT:    movapd (%rdi), %xmm0 # sched: [8:0.50]
   4211 ; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   4212 ; ZNVER1-SSE-NEXT:    movapd %xmm0, (%rsi) # sched: [1:0.50]
   4213 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4214 ;
   4215 ; ZNVER1-LABEL: test_movapd:
   4216 ; ZNVER1:       # %bb.0:
   4217 ; ZNVER1-NEXT:    vmovapd (%rdi), %xmm0 # sched: [8:0.50]
   4218 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   4219 ; ZNVER1-NEXT:    vmovapd %xmm0, (%rsi) # sched: [1:0.50]
   4220 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4221   %1 = load <2 x double>, <2 x double> *%a0, align 16
   4222   %2 = fadd <2 x double> %1, %1
   4223   store <2 x double> %2, <2 x double> *%a1, align 16
   4224   ret void
   4225 }
   4226 
   4227 define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
   4228 ; GENERIC-LABEL: test_movdqa:
   4229 ; GENERIC:       # %bb.0:
   4230 ; GENERIC-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
   4231 ; GENERIC-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4232 ; GENERIC-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
   4233 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4234 ;
   4235 ; ATOM-LABEL: test_movdqa:
   4236 ; ATOM:       # %bb.0:
   4237 ; ATOM-NEXT:    movdqa (%rdi), %xmm0 # sched: [1:1.00]
   4238 ; ATOM-NEXT:    paddq %xmm0, %xmm0 # sched: [2:1.00]
   4239 ; ATOM-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
   4240 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4241 ;
   4242 ; SLM-LABEL: test_movdqa:
   4243 ; SLM:       # %bb.0:
   4244 ; SLM-NEXT:    movdqa (%rdi), %xmm0 # sched: [3:1.00]
   4245 ; SLM-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4246 ; SLM-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
   4247 ; SLM-NEXT:    retq # sched: [4:1.00]
   4248 ;
   4249 ; SANDY-SSE-LABEL: test_movdqa:
   4250 ; SANDY-SSE:       # %bb.0:
   4251 ; SANDY-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
   4252 ; SANDY-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4253 ; SANDY-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
   4254 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4255 ;
   4256 ; SANDY-LABEL: test_movdqa:
   4257 ; SANDY:       # %bb.0:
   4258 ; SANDY-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
   4259 ; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   4260 ; SANDY-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
   4261 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4262 ;
   4263 ; HASWELL-SSE-LABEL: test_movdqa:
   4264 ; HASWELL-SSE:       # %bb.0:
   4265 ; HASWELL-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
   4266 ; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4267 ; HASWELL-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
   4268 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4269 ;
   4270 ; HASWELL-LABEL: test_movdqa:
   4271 ; HASWELL:       # %bb.0:
   4272 ; HASWELL-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
   4273 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   4274 ; HASWELL-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
   4275 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4276 ;
   4277 ; BROADWELL-SSE-LABEL: test_movdqa:
   4278 ; BROADWELL-SSE:       # %bb.0:
   4279 ; BROADWELL-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [5:0.50]
   4280 ; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4281 ; BROADWELL-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
   4282 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4283 ;
   4284 ; BROADWELL-LABEL: test_movdqa:
   4285 ; BROADWELL:       # %bb.0:
   4286 ; BROADWELL-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [5:0.50]
   4287 ; BROADWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   4288 ; BROADWELL-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
   4289 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4290 ;
   4291 ; SKYLAKE-SSE-LABEL: test_movdqa:
   4292 ; SKYLAKE-SSE:       # %bb.0:
   4293 ; SKYLAKE-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
   4294 ; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
   4295 ; SKYLAKE-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
   4296 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4297 ;
   4298 ; SKYLAKE-LABEL: test_movdqa:
   4299 ; SKYLAKE:       # %bb.0:
   4300 ; SKYLAKE-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
   4301 ; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   4302 ; SKYLAKE-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
   4303 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4304 ;
   4305 ; SKX-SSE-LABEL: test_movdqa:
   4306 ; SKX-SSE:       # %bb.0:
   4307 ; SKX-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [6:0.50]
   4308 ; SKX-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
   4309 ; SKX-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
   4310 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4311 ;
   4312 ; SKX-LABEL: test_movdqa:
   4313 ; SKX:       # %bb.0:
   4314 ; SKX-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
   4315 ; SKX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   4316 ; SKX-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
   4317 ; SKX-NEXT:    retq # sched: [7:1.00]
   4318 ;
   4319 ; BTVER2-SSE-LABEL: test_movdqa:
   4320 ; BTVER2-SSE:       # %bb.0:
   4321 ; BTVER2-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [5:1.00]
   4322 ; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4323 ; BTVER2-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:1.00]
   4324 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4325 ;
   4326 ; BTVER2-LABEL: test_movdqa:
   4327 ; BTVER2:       # %bb.0:
   4328 ; BTVER2-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [5:1.00]
   4329 ; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   4330 ; BTVER2-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
   4331 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4332 ;
   4333 ; ZNVER1-SSE-LABEL: test_movdqa:
   4334 ; ZNVER1-SSE:       # %bb.0:
   4335 ; ZNVER1-SSE-NEXT:    movdqa (%rdi), %xmm0 # sched: [8:0.50]
   4336 ; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.25]
   4337 ; ZNVER1-SSE-NEXT:    movdqa %xmm0, (%rsi) # sched: [1:0.50]
   4338 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4339 ;
   4340 ; ZNVER1-LABEL: test_movdqa:
   4341 ; ZNVER1:       # %bb.0:
   4342 ; ZNVER1-NEXT:    vmovdqa (%rdi), %xmm0 # sched: [8:0.50]
   4343 ; ZNVER1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
   4344 ; ZNVER1-NEXT:    vmovdqa %xmm0, (%rsi) # sched: [1:0.50]
   4345 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4346   %1 = load <2 x i64>, <2 x i64> *%a0, align 16
   4347   %2 = add <2 x i64> %1, %1
   4348   store <2 x i64> %2, <2 x i64> *%a1, align 16
   4349   ret void
   4350 }
   4351 
   4352 define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
   4353 ; GENERIC-LABEL: test_movdqu:
   4354 ; GENERIC:       # %bb.0:
   4355 ; GENERIC-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
   4356 ; GENERIC-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4357 ; GENERIC-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
   4358 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4359 ;
   4360 ; ATOM-LABEL: test_movdqu:
   4361 ; ATOM:       # %bb.0:
   4362 ; ATOM-NEXT:    movdqu (%rdi), %xmm0 # sched: [3:1.50]
   4363 ; ATOM-NEXT:    paddq %xmm0, %xmm0 # sched: [2:1.00]
   4364 ; ATOM-NEXT:    movdqu %xmm0, (%rsi) # sched: [2:1.00]
   4365 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4366 ;
   4367 ; SLM-LABEL: test_movdqu:
   4368 ; SLM:       # %bb.0:
   4369 ; SLM-NEXT:    movdqu (%rdi), %xmm0 # sched: [3:1.00]
   4370 ; SLM-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4371 ; SLM-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
   4372 ; SLM-NEXT:    retq # sched: [4:1.00]
   4373 ;
   4374 ; SANDY-SSE-LABEL: test_movdqu:
   4375 ; SANDY-SSE:       # %bb.0:
   4376 ; SANDY-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
   4377 ; SANDY-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4378 ; SANDY-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
   4379 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4380 ;
   4381 ; SANDY-LABEL: test_movdqu:
   4382 ; SANDY:       # %bb.0:
   4383 ; SANDY-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
   4384 ; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   4385 ; SANDY-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
   4386 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4387 ;
   4388 ; HASWELL-SSE-LABEL: test_movdqu:
   4389 ; HASWELL-SSE:       # %bb.0:
   4390 ; HASWELL-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
   4391 ; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4392 ; HASWELL-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
   4393 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4394 ;
   4395 ; HASWELL-LABEL: test_movdqu:
   4396 ; HASWELL:       # %bb.0:
   4397 ; HASWELL-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
   4398 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   4399 ; HASWELL-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
   4400 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4401 ;
   4402 ; BROADWELL-SSE-LABEL: test_movdqu:
   4403 ; BROADWELL-SSE:       # %bb.0:
   4404 ; BROADWELL-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [5:0.50]
   4405 ; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4406 ; BROADWELL-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
   4407 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4408 ;
   4409 ; BROADWELL-LABEL: test_movdqu:
   4410 ; BROADWELL:       # %bb.0:
   4411 ; BROADWELL-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [5:0.50]
   4412 ; BROADWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   4413 ; BROADWELL-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
   4414 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4415 ;
   4416 ; SKYLAKE-SSE-LABEL: test_movdqu:
   4417 ; SKYLAKE-SSE:       # %bb.0:
   4418 ; SKYLAKE-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
   4419 ; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
   4420 ; SKYLAKE-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
   4421 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4422 ;
   4423 ; SKYLAKE-LABEL: test_movdqu:
   4424 ; SKYLAKE:       # %bb.0:
   4425 ; SKYLAKE-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
   4426 ; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   4427 ; SKYLAKE-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
   4428 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4429 ;
   4430 ; SKX-SSE-LABEL: test_movdqu:
   4431 ; SKX-SSE:       # %bb.0:
   4432 ; SKX-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [6:0.50]
   4433 ; SKX-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
   4434 ; SKX-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
   4435 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4436 ;
   4437 ; SKX-LABEL: test_movdqu:
   4438 ; SKX:       # %bb.0:
   4439 ; SKX-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
   4440 ; SKX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   4441 ; SKX-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
   4442 ; SKX-NEXT:    retq # sched: [7:1.00]
   4443 ;
   4444 ; BTVER2-SSE-LABEL: test_movdqu:
   4445 ; BTVER2-SSE:       # %bb.0:
   4446 ; BTVER2-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [5:1.00]
   4447 ; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   4448 ; BTVER2-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:1.00]
   4449 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4450 ;
   4451 ; BTVER2-LABEL: test_movdqu:
   4452 ; BTVER2:       # %bb.0:
   4453 ; BTVER2-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [5:1.00]
   4454 ; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   4455 ; BTVER2-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
   4456 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4457 ;
   4458 ; ZNVER1-SSE-LABEL: test_movdqu:
   4459 ; ZNVER1-SSE:       # %bb.0:
   4460 ; ZNVER1-SSE-NEXT:    movdqu (%rdi), %xmm0 # sched: [8:0.50]
   4461 ; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.25]
   4462 ; ZNVER1-SSE-NEXT:    movdqu %xmm0, (%rsi) # sched: [1:0.50]
   4463 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4464 ;
   4465 ; ZNVER1-LABEL: test_movdqu:
   4466 ; ZNVER1:       # %bb.0:
   4467 ; ZNVER1-NEXT:    vmovdqu (%rdi), %xmm0 # sched: [8:0.50]
   4468 ; ZNVER1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
   4469 ; ZNVER1-NEXT:    vmovdqu %xmm0, (%rsi) # sched: [1:0.50]
   4470 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4471   %1 = load <2 x i64>, <2 x i64> *%a0, align 1
   4472   %2 = add <2 x i64> %1, %1
   4473   store <2 x i64> %2, <2 x i64> *%a1, align 1
   4474   ret void
   4475 }
   4476 
   4477 define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
   4478 ; GENERIC-LABEL: test_movd:
   4479 ; GENERIC:       # %bb.0:
   4480 ; GENERIC-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
   4481 ; GENERIC-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
   4482 ; GENERIC-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4483 ; GENERIC-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
   4484 ; GENERIC-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
   4485 ; GENERIC-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
   4486 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4487 ;
   4488 ; ATOM-LABEL: test_movd:
   4489 ; ATOM:       # %bb.0:
   4490 ; ATOM-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
   4491 ; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4492 ; ATOM-NEXT:    movd %xmm1, %eax # sched: [3:3.00]
   4493 ; ATOM-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
   4494 ; ATOM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4495 ; ATOM-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
   4496 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4497 ;
   4498 ; SLM-LABEL: test_movd:
   4499 ; SLM:       # %bb.0:
   4500 ; SLM-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [3:1.00]
   4501 ; SLM-NEXT:    movd %edi, %xmm1 # sched: [1:0.50]
   4502 ; SLM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4503 ; SLM-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
   4504 ; SLM-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
   4505 ; SLM-NEXT:    movd %xmm2, %eax # sched: [1:0.50]
   4506 ; SLM-NEXT:    retq # sched: [4:1.00]
   4507 ;
   4508 ; SANDY-SSE-LABEL: test_movd:
   4509 ; SANDY-SSE:       # %bb.0:
   4510 ; SANDY-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
   4511 ; SANDY-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
   4512 ; SANDY-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4513 ; SANDY-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
   4514 ; SANDY-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
   4515 ; SANDY-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
   4516 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4517 ;
   4518 ; SANDY-LABEL: test_movd:
   4519 ; SANDY:       # %bb.0:
   4520 ; SANDY-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
   4521 ; SANDY-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
   4522 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   4523 ; SANDY-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
   4524 ; SANDY-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   4525 ; SANDY-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
   4526 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4527 ;
   4528 ; HASWELL-SSE-LABEL: test_movd:
   4529 ; HASWELL-SSE:       # %bb.0:
   4530 ; HASWELL-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
   4531 ; HASWELL-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
   4532 ; HASWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4533 ; HASWELL-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
   4534 ; HASWELL-SSE-NEXT:    movd %xmm2, %eax # sched: [1:1.00]
   4535 ; HASWELL-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
   4536 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4537 ;
   4538 ; HASWELL-LABEL: test_movd:
   4539 ; HASWELL:       # %bb.0:
   4540 ; HASWELL-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
   4541 ; HASWELL-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
   4542 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   4543 ; HASWELL-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
   4544 ; HASWELL-NEXT:    vmovd %xmm0, %eax # sched: [1:1.00]
   4545 ; HASWELL-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
   4546 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4547 ;
   4548 ; BROADWELL-SSE-LABEL: test_movd:
   4549 ; BROADWELL-SSE:       # %bb.0:
   4550 ; BROADWELL-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
   4551 ; BROADWELL-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
   4552 ; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4553 ; BROADWELL-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
   4554 ; BROADWELL-SSE-NEXT:    movd %xmm2, %eax # sched: [1:1.00]
   4555 ; BROADWELL-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
   4556 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4557 ;
   4558 ; BROADWELL-LABEL: test_movd:
   4559 ; BROADWELL:       # %bb.0:
   4560 ; BROADWELL-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
   4561 ; BROADWELL-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
   4562 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   4563 ; BROADWELL-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
   4564 ; BROADWELL-NEXT:    vmovd %xmm0, %eax # sched: [1:1.00]
   4565 ; BROADWELL-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
   4566 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4567 ;
   4568 ; SKYLAKE-SSE-LABEL: test_movd:
   4569 ; SKYLAKE-SSE:       # %bb.0:
   4570 ; SKYLAKE-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
   4571 ; SKYLAKE-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
   4572 ; SKYLAKE-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.33]
   4573 ; SKYLAKE-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.33]
   4574 ; SKYLAKE-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
   4575 ; SKYLAKE-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
   4576 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4577 ;
   4578 ; SKYLAKE-LABEL: test_movd:
   4579 ; SKYLAKE:       # %bb.0:
   4580 ; SKYLAKE-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
   4581 ; SKYLAKE-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
   4582 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
   4583 ; SKYLAKE-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
   4584 ; SKYLAKE-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   4585 ; SKYLAKE-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
   4586 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4587 ;
   4588 ; SKX-SSE-LABEL: test_movd:
   4589 ; SKX-SSE:       # %bb.0:
   4590 ; SKX-SSE-NEXT:    movd %edi, %xmm1 # sched: [1:1.00]
   4591 ; SKX-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
   4592 ; SKX-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.33]
   4593 ; SKX-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.33]
   4594 ; SKX-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
   4595 ; SKX-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:1.00]
   4596 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4597 ;
   4598 ; SKX-LABEL: test_movd:
   4599 ; SKX:       # %bb.0:
   4600 ; SKX-NEXT:    vmovd %edi, %xmm1 # sched: [1:1.00]
   4601 ; SKX-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
   4602 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
   4603 ; SKX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
   4604 ; SKX-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   4605 ; SKX-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:1.00]
   4606 ; SKX-NEXT:    retq # sched: [7:1.00]
   4607 ;
   4608 ; BTVER2-SSE-LABEL: test_movd:
   4609 ; BTVER2-SSE:       # %bb.0:
   4610 ; BTVER2-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
   4611 ; BTVER2-SSE-NEXT:    movd %edi, %xmm1 # sched: [8:0.50]
   4612 ; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.50]
   4613 ; BTVER2-SSE-NEXT:    movd %xmm2, %eax # sched: [4:1.00]
   4614 ; BTVER2-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   4615 ; BTVER2-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [2:1.00]
   4616 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4617 ;
   4618 ; BTVER2-LABEL: test_movd:
   4619 ; BTVER2:       # %bb.0:
   4620 ; BTVER2-NEXT:    vmovd %edi, %xmm1 # sched: [8:0.50]
   4621 ; BTVER2-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
   4622 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   4623 ; BTVER2-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
   4624 ; BTVER2-NEXT:    vmovd %xmm0, %eax # sched: [4:1.00]
   4625 ; BTVER2-NEXT:    vmovd %xmm1, (%rsi) # sched: [2:1.00]
   4626 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4627 ;
   4628 ; ZNVER1-SSE-LABEL: test_movd:
   4629 ; ZNVER1-SSE:       # %bb.0:
   4630 ; ZNVER1-SSE-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
   4631 ; ZNVER1-SSE-NEXT:    movd %edi, %xmm1 # sched: [3:1.00]
   4632 ; ZNVER1-SSE-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.25]
   4633 ; ZNVER1-SSE-NEXT:    movd %xmm1, (%rsi) # sched: [1:0.50]
   4634 ; ZNVER1-SSE-NEXT:    paddd %xmm0, %xmm2 # sched: [1:0.25]
   4635 ; ZNVER1-SSE-NEXT:    movd %xmm2, %eax # sched: [2:1.00]
   4636 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4637 ;
   4638 ; ZNVER1-LABEL: test_movd:
   4639 ; ZNVER1:       # %bb.0:
   4640 ; ZNVER1-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
   4641 ; ZNVER1-NEXT:    vmovd %edi, %xmm1 # sched: [3:1.00]
   4642 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
   4643 ; ZNVER1-NEXT:    vmovd %xmm1, (%rsi) # sched: [1:0.50]
   4644 ; ZNVER1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.25]
   4645 ; ZNVER1-NEXT:    vmovd %xmm0, %eax # sched: [2:1.00]
   4646 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4647   %1 = insertelement <4 x i32> undef, i32 %a1, i32 0
   4648   %2 = load i32, i32 *%a2
   4649   %3 = insertelement <4 x i32> undef, i32 %2, i32 0
   4650   %4 = add <4 x i32> %a0, %1
   4651   %5 = add <4 x i32> %a0, %3
   4652   %6 = extractelement <4 x i32> %4, i32 0
   4653   %7 = extractelement <4 x i32> %5, i32 0
   4654   store i32 %6, i32* %a2
   4655   ret i32 %7
   4656 }
   4657 
   4658 define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
   4659 ; GENERIC-LABEL: test_movd_64:
   4660 ; GENERIC:       # %bb.0:
   4661 ; GENERIC-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
   4662 ; GENERIC-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
   4663 ; GENERIC-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   4664 ; GENERIC-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
   4665 ; GENERIC-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
   4666 ; GENERIC-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
   4667 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4668 ;
   4669 ; ATOM-LABEL: test_movd_64:
   4670 ; ATOM:       # %bb.0:
   4671 ; ATOM-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
   4672 ; ATOM-NEXT:    movq %rdi, %xmm2 # sched: [1:1.00]
   4673 ; ATOM-NEXT:    paddq %xmm0, %xmm1 # sched: [2:1.00]
   4674 ; ATOM-NEXT:    paddq %xmm0, %xmm2 # sched: [2:1.00]
   4675 ; ATOM-NEXT:    movq %xmm1, %rax # sched: [3:3.00]
   4676 ; ATOM-NEXT:    movq %xmm2, (%rsi) # sched: [1:1.00]
   4677 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4678 ;
   4679 ; SLM-LABEL: test_movd_64:
   4680 ; SLM:       # %bb.0:
   4681 ; SLM-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [3:1.00]
   4682 ; SLM-NEXT:    movq %rdi, %xmm1 # sched: [1:0.50]
   4683 ; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   4684 ; SLM-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
   4685 ; SLM-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
   4686 ; SLM-NEXT:    movq %xmm2, %rax # sched: [1:0.50]
   4687 ; SLM-NEXT:    retq # sched: [4:1.00]
   4688 ;
   4689 ; SANDY-SSE-LABEL: test_movd_64:
   4690 ; SANDY-SSE:       # %bb.0:
   4691 ; SANDY-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
   4692 ; SANDY-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
   4693 ; SANDY-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   4694 ; SANDY-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
   4695 ; SANDY-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
   4696 ; SANDY-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
   4697 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4698 ;
   4699 ; SANDY-LABEL: test_movd_64:
   4700 ; SANDY:       # %bb.0:
   4701 ; SANDY-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
   4702 ; SANDY-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
   4703 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   4704 ; SANDY-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
   4705 ; SANDY-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
   4706 ; SANDY-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
   4707 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4708 ;
   4709 ; HASWELL-SSE-LABEL: test_movd_64:
   4710 ; HASWELL-SSE:       # %bb.0:
   4711 ; HASWELL-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
   4712 ; HASWELL-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
   4713 ; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   4714 ; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
   4715 ; HASWELL-SSE-NEXT:    movq %xmm2, %rax # sched: [1:1.00]
   4716 ; HASWELL-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
   4717 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4718 ;
   4719 ; HASWELL-LABEL: test_movd_64:
   4720 ; HASWELL:       # %bb.0:
   4721 ; HASWELL-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
   4722 ; HASWELL-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
   4723 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   4724 ; HASWELL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
   4725 ; HASWELL-NEXT:    vmovq %xmm0, %rax # sched: [1:1.00]
   4726 ; HASWELL-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
   4727 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4728 ;
   4729 ; BROADWELL-SSE-LABEL: test_movd_64:
   4730 ; BROADWELL-SSE:       # %bb.0:
   4731 ; BROADWELL-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
   4732 ; BROADWELL-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
   4733 ; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   4734 ; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
   4735 ; BROADWELL-SSE-NEXT:    movq %xmm2, %rax # sched: [1:1.00]
   4736 ; BROADWELL-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
   4737 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4738 ;
   4739 ; BROADWELL-LABEL: test_movd_64:
   4740 ; BROADWELL:       # %bb.0:
   4741 ; BROADWELL-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
   4742 ; BROADWELL-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
   4743 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   4744 ; BROADWELL-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
   4745 ; BROADWELL-NEXT:    vmovq %xmm0, %rax # sched: [1:1.00]
   4746 ; BROADWELL-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
   4747 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4748 ;
   4749 ; SKYLAKE-SSE-LABEL: test_movd_64:
   4750 ; SKYLAKE-SSE:       # %bb.0:
   4751 ; SKYLAKE-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
   4752 ; SKYLAKE-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
   4753 ; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
   4754 ; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.33]
   4755 ; SKYLAKE-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
   4756 ; SKYLAKE-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
   4757 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4758 ;
   4759 ; SKYLAKE-LABEL: test_movd_64:
   4760 ; SKYLAKE:       # %bb.0:
   4761 ; SKYLAKE-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
   4762 ; SKYLAKE-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
   4763 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
   4764 ; SKYLAKE-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
   4765 ; SKYLAKE-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
   4766 ; SKYLAKE-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
   4767 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4768 ;
   4769 ; SKX-SSE-LABEL: test_movd_64:
   4770 ; SKX-SSE:       # %bb.0:
   4771 ; SKX-SSE-NEXT:    movq %rdi, %xmm1 # sched: [1:1.00]
   4772 ; SKX-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
   4773 ; SKX-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
   4774 ; SKX-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.33]
   4775 ; SKX-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
   4776 ; SKX-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:1.00]
   4777 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4778 ;
   4779 ; SKX-LABEL: test_movd_64:
   4780 ; SKX:       # %bb.0:
   4781 ; SKX-NEXT:    vmovq %rdi, %xmm1 # sched: [1:1.00]
   4782 ; SKX-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
   4783 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
   4784 ; SKX-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
   4785 ; SKX-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
   4786 ; SKX-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:1.00]
   4787 ; SKX-NEXT:    retq # sched: [7:1.00]
   4788 ;
   4789 ; BTVER2-SSE-LABEL: test_movd_64:
   4790 ; BTVER2-SSE:       # %bb.0:
   4791 ; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
   4792 ; BTVER2-SSE-NEXT:    movq %rdi, %xmm1 # sched: [8:0.50]
   4793 ; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.50]
   4794 ; BTVER2-SSE-NEXT:    movq %xmm2, %rax # sched: [4:1.00]
   4795 ; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   4796 ; BTVER2-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [2:1.00]
   4797 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4798 ;
   4799 ; BTVER2-LABEL: test_movd_64:
   4800 ; BTVER2:       # %bb.0:
   4801 ; BTVER2-NEXT:    vmovq %rdi, %xmm1 # sched: [8:0.50]
   4802 ; BTVER2-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
   4803 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   4804 ; BTVER2-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
   4805 ; BTVER2-NEXT:    vmovq %xmm0, %rax # sched: [4:1.00]
   4806 ; BTVER2-NEXT:    vmovq %xmm1, (%rsi) # sched: [2:1.00]
   4807 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4808 ;
   4809 ; ZNVER1-SSE-LABEL: test_movd_64:
   4810 ; ZNVER1-SSE:       # %bb.0:
   4811 ; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
   4812 ; ZNVER1-SSE-NEXT:    movq %rdi, %xmm1 # sched: [3:1.00]
   4813 ; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.25]
   4814 ; ZNVER1-SSE-NEXT:    movq %xmm1, (%rsi) # sched: [1:0.50]
   4815 ; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm2 # sched: [1:0.25]
   4816 ; ZNVER1-SSE-NEXT:    movq %xmm2, %rax # sched: [2:1.00]
   4817 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4818 ;
   4819 ; ZNVER1-LABEL: test_movd_64:
   4820 ; ZNVER1:       # %bb.0:
   4821 ; ZNVER1-NEXT:    vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
   4822 ; ZNVER1-NEXT:    vmovq %rdi, %xmm1 # sched: [3:1.00]
   4823 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
   4824 ; ZNVER1-NEXT:    vmovq %xmm1, (%rsi) # sched: [1:0.50]
   4825 ; ZNVER1-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.25]
   4826 ; ZNVER1-NEXT:    vmovq %xmm0, %rax # sched: [2:1.00]
   4827 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4828   %1 = insertelement <2 x i64> undef, i64 %a1, i64 0
   4829   %2 = load i64, i64 *%a2
   4830   %3 = insertelement <2 x i64> undef, i64 %2, i64 0
   4831   %4 = add <2 x i64> %a0, %1
   4832   %5 = add <2 x i64> %a0, %3
   4833   %6 = extractelement <2 x i64> %4, i64 0
   4834   %7 = extractelement <2 x i64> %5, i64 0
   4835   store i64 %6, i64* %a2
   4836   ret i64 %7
   4837 }
   4838 
   4839 define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
   4840 ; GENERIC-LABEL: test_movhpd:
   4841 ; GENERIC:       # %bb.0:
   4842 ; GENERIC-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   4843 ; GENERIC-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4844 ; GENERIC-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
   4845 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4846 ;
   4847 ; ATOM-LABEL: test_movhpd:
   4848 ; ATOM:       # %bb.0:
   4849 ; ATOM-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
   4850 ; ATOM-NEXT:    addpd %xmm0, %xmm1 # sched: [6:3.00]
   4851 ; ATOM-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
   4852 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4853 ;
   4854 ; SLM-LABEL: test_movhpd:
   4855 ; SLM:       # %bb.0:
   4856 ; SLM-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
   4857 ; SLM-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4858 ; SLM-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
   4859 ; SLM-NEXT:    retq # sched: [4:1.00]
   4860 ;
   4861 ; SANDY-SSE-LABEL: test_movhpd:
   4862 ; SANDY-SSE:       # %bb.0:
   4863 ; SANDY-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   4864 ; SANDY-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4865 ; SANDY-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
   4866 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4867 ;
   4868 ; SANDY-LABEL: test_movhpd:
   4869 ; SANDY:       # %bb.0:
   4870 ; SANDY-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   4871 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4872 ; SANDY-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
   4873 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4874 ;
   4875 ; HASWELL-SSE-LABEL: test_movhpd:
   4876 ; HASWELL-SSE:       # %bb.0:
   4877 ; HASWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4878 ; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4879 ; HASWELL-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
   4880 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4881 ;
   4882 ; HASWELL-LABEL: test_movhpd:
   4883 ; HASWELL:       # %bb.0:
   4884 ; HASWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4885 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4886 ; HASWELL-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
   4887 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4888 ;
   4889 ; BROADWELL-SSE-LABEL: test_movhpd:
   4890 ; BROADWELL-SSE:       # %bb.0:
   4891 ; BROADWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4892 ; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4893 ; BROADWELL-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
   4894 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4895 ;
   4896 ; BROADWELL-LABEL: test_movhpd:
   4897 ; BROADWELL:       # %bb.0:
   4898 ; BROADWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4899 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4900 ; BROADWELL-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
   4901 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4902 ;
   4903 ; SKYLAKE-SSE-LABEL: test_movhpd:
   4904 ; SKYLAKE-SSE:       # %bb.0:
   4905 ; SKYLAKE-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4906 ; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.50]
   4907 ; SKYLAKE-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
   4908 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4909 ;
   4910 ; SKYLAKE-LABEL: test_movhpd:
   4911 ; SKYLAKE:       # %bb.0:
   4912 ; SKYLAKE-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4913 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4914 ; SKYLAKE-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
   4915 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4916 ;
   4917 ; SKX-SSE-LABEL: test_movhpd:
   4918 ; SKX-SSE:       # %bb.0:
   4919 ; SKX-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4920 ; SKX-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.50]
   4921 ; SKX-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:1.00]
   4922 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4923 ;
   4924 ; SKX-LABEL: test_movhpd:
   4925 ; SKX:       # %bb.0:
   4926 ; SKX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4927 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4928 ; SKX-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
   4929 ; SKX-NEXT:    retq # sched: [7:1.00]
   4930 ;
   4931 ; BTVER2-SSE-LABEL: test_movhpd:
   4932 ; BTVER2-SSE:       # %bb.0:
   4933 ; BTVER2-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4934 ; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4935 ; BTVER2-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [2:1.00]
   4936 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4937 ;
   4938 ; BTVER2-LABEL: test_movhpd:
   4939 ; BTVER2:       # %bb.0:
   4940 ; BTVER2-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   4941 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4942 ; BTVER2-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
   4943 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4944 ;
   4945 ; ZNVER1-SSE-LABEL: test_movhpd:
   4946 ; ZNVER1-SSE:       # %bb.0:
   4947 ; ZNVER1-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
   4948 ; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4949 ; ZNVER1-SSE-NEXT:    movhpd %xmm1, (%rdi) # sched: [1:0.50]
   4950 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4951 ;
   4952 ; ZNVER1-LABEL: test_movhpd:
   4953 ; ZNVER1:       # %bb.0:
   4954 ; ZNVER1-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
   4955 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4956 ; ZNVER1-NEXT:    vmovhpd %xmm0, (%rdi) # sched: [1:0.50]
   4957 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4958   %1 = bitcast x86_mmx* %a2 to double*
   4959   %2 = load double, double *%1, align 8
   4960   %3 = insertelement <2 x double> %a1, double %2, i32 1
   4961   %4 = fadd <2 x double> %a0, %3
   4962   %5 = extractelement <2 x double> %4, i32 1
   4963   store double %5, double* %1
   4964   ret void
   4965 }
   4966 
   4967 define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
   4968 ; GENERIC-LABEL: test_movlpd:
   4969 ; GENERIC:       # %bb.0:
   4970 ; GENERIC-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
   4971 ; GENERIC-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4972 ; GENERIC-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
   4973 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4974 ;
   4975 ; ATOM-LABEL: test_movlpd:
   4976 ; ATOM:       # %bb.0:
   4977 ; ATOM-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
   4978 ; ATOM-NEXT:    addpd %xmm0, %xmm1 # sched: [6:3.00]
   4979 ; ATOM-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
   4980 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4981 ;
   4982 ; SLM-LABEL: test_movlpd:
   4983 ; SLM:       # %bb.0:
   4984 ; SLM-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
   4985 ; SLM-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4986 ; SLM-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
   4987 ; SLM-NEXT:    retq # sched: [4:1.00]
   4988 ;
   4989 ; SANDY-SSE-LABEL: test_movlpd:
   4990 ; SANDY-SSE:       # %bb.0:
   4991 ; SANDY-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
   4992 ; SANDY-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   4993 ; SANDY-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
   4994 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4995 ;
   4996 ; SANDY-LABEL: test_movlpd:
   4997 ; SANDY:       # %bb.0:
   4998 ; SANDY-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
   4999 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5000 ; SANDY-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
   5001 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5002 ;
   5003 ; HASWELL-SSE-LABEL: test_movlpd:
   5004 ; HASWELL-SSE:       # %bb.0:
   5005 ; HASWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5006 ; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   5007 ; HASWELL-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
   5008 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5009 ;
   5010 ; HASWELL-LABEL: test_movlpd:
   5011 ; HASWELL:       # %bb.0:
   5012 ; HASWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5013 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5014 ; HASWELL-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
   5015 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5016 ;
   5017 ; BROADWELL-SSE-LABEL: test_movlpd:
   5018 ; BROADWELL-SSE:       # %bb.0:
   5019 ; BROADWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5020 ; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   5021 ; BROADWELL-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
   5022 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5023 ;
   5024 ; BROADWELL-LABEL: test_movlpd:
   5025 ; BROADWELL:       # %bb.0:
   5026 ; BROADWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5027 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5028 ; BROADWELL-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
   5029 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5030 ;
   5031 ; SKYLAKE-SSE-LABEL: test_movlpd:
   5032 ; SKYLAKE-SSE:       # %bb.0:
   5033 ; SKYLAKE-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5034 ; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.50]
   5035 ; SKYLAKE-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
   5036 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5037 ;
   5038 ; SKYLAKE-LABEL: test_movlpd:
   5039 ; SKYLAKE:       # %bb.0:
   5040 ; SKYLAKE-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5041 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5042 ; SKYLAKE-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
   5043 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5044 ;
   5045 ; SKX-SSE-LABEL: test_movlpd:
   5046 ; SKX-SSE:       # %bb.0:
   5047 ; SKX-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5048 ; SKX-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.50]
   5049 ; SKX-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:1.00]
   5050 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5051 ;
   5052 ; SKX-LABEL: test_movlpd:
   5053 ; SKX:       # %bb.0:
   5054 ; SKX-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5055 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5056 ; SKX-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
   5057 ; SKX-NEXT:    retq # sched: [7:1.00]
   5058 ;
   5059 ; BTVER2-SSE-LABEL: test_movlpd:
   5060 ; BTVER2-SSE:       # %bb.0:
   5061 ; BTVER2-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5062 ; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   5063 ; BTVER2-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [2:1.00]
   5064 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5065 ;
   5066 ; BTVER2-LABEL: test_movlpd:
   5067 ; BTVER2:       # %bb.0:
   5068 ; BTVER2-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   5069 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5070 ; BTVER2-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [2:1.00]
   5071 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5072 ;
   5073 ; ZNVER1-SSE-LABEL: test_movlpd:
   5074 ; ZNVER1-SSE:       # %bb.0:
   5075 ; ZNVER1-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
   5076 ; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   5077 ; ZNVER1-SSE-NEXT:    movlpd %xmm1, (%rdi) # sched: [1:0.50]
   5078 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5079 ;
   5080 ; ZNVER1-LABEL: test_movlpd:
   5081 ; ZNVER1:       # %bb.0:
   5082 ; ZNVER1-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
   5083 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5084 ; ZNVER1-NEXT:    vmovlpd %xmm0, (%rdi) # sched: [1:0.50]
   5085 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5086   %1 = bitcast x86_mmx* %a2 to double*
   5087   %2 = load double, double *%1, align 8
   5088   %3 = insertelement <2 x double> %a1, double %2, i32 0
   5089   %4 = fadd <2 x double> %a0, %3
   5090   %5 = extractelement <2 x double> %4, i32 0
   5091   store double %5, double* %1
   5092   ret void
   5093 }
   5094 
   5095 define i32 @test_movmskpd(<2 x double> %a0) {
   5096 ; GENERIC-LABEL: test_movmskpd:
   5097 ; GENERIC:       # %bb.0:
   5098 ; GENERIC-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
   5099 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5100 ;
   5101 ; ATOM-LABEL: test_movmskpd:
   5102 ; ATOM:       # %bb.0:
   5103 ; ATOM-NEXT:    movmskpd %xmm0, %eax # sched: [3:3.00]
   5104 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5105 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5106 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5107 ;
   5108 ; SLM-LABEL: test_movmskpd:
   5109 ; SLM:       # %bb.0:
   5110 ; SLM-NEXT:    movmskpd %xmm0, %eax # sched: [4:1.00]
   5111 ; SLM-NEXT:    retq # sched: [4:1.00]
   5112 ;
   5113 ; SANDY-SSE-LABEL: test_movmskpd:
   5114 ; SANDY-SSE:       # %bb.0:
   5115 ; SANDY-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
   5116 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5117 ;
   5118 ; SANDY-LABEL: test_movmskpd:
   5119 ; SANDY:       # %bb.0:
   5120 ; SANDY-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
   5121 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5122 ;
   5123 ; HASWELL-SSE-LABEL: test_movmskpd:
   5124 ; HASWELL-SSE:       # %bb.0:
   5125 ; HASWELL-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [3:1.00]
   5126 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5127 ;
   5128 ; HASWELL-LABEL: test_movmskpd:
   5129 ; HASWELL:       # %bb.0:
   5130 ; HASWELL-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
   5131 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5132 ;
   5133 ; BROADWELL-SSE-LABEL: test_movmskpd:
   5134 ; BROADWELL-SSE:       # %bb.0:
   5135 ; BROADWELL-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [3:1.00]
   5136 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5137 ;
   5138 ; BROADWELL-LABEL: test_movmskpd:
   5139 ; BROADWELL:       # %bb.0:
   5140 ; BROADWELL-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
   5141 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5142 ;
   5143 ; SKYLAKE-SSE-LABEL: test_movmskpd:
   5144 ; SKYLAKE-SSE:       # %bb.0:
   5145 ; SKYLAKE-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
   5146 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5147 ;
   5148 ; SKYLAKE-LABEL: test_movmskpd:
   5149 ; SKYLAKE:       # %bb.0:
   5150 ; SKYLAKE-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
   5151 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5152 ;
   5153 ; SKX-SSE-LABEL: test_movmskpd:
   5154 ; SKX-SSE:       # %bb.0:
   5155 ; SKX-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [2:1.00]
   5156 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5157 ;
   5158 ; SKX-LABEL: test_movmskpd:
   5159 ; SKX:       # %bb.0:
   5160 ; SKX-NEXT:    vmovmskpd %xmm0, %eax # sched: [2:1.00]
   5161 ; SKX-NEXT:    retq # sched: [7:1.00]
   5162 ;
   5163 ; BTVER2-SSE-LABEL: test_movmskpd:
   5164 ; BTVER2-SSE:       # %bb.0:
   5165 ; BTVER2-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [3:1.00]
   5166 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5167 ;
   5168 ; BTVER2-LABEL: test_movmskpd:
   5169 ; BTVER2:       # %bb.0:
   5170 ; BTVER2-NEXT:    vmovmskpd %xmm0, %eax # sched: [3:1.00]
   5171 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5172 ;
   5173 ; ZNVER1-SSE-LABEL: test_movmskpd:
   5174 ; ZNVER1-SSE:       # %bb.0:
   5175 ; ZNVER1-SSE-NEXT:    movmskpd %xmm0, %eax # sched: [1:1.00]
   5176 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5177 ;
   5178 ; ZNVER1-LABEL: test_movmskpd:
   5179 ; ZNVER1:       # %bb.0:
   5180 ; ZNVER1-NEXT:    vmovmskpd %xmm0, %eax # sched: [1:1.00]
   5181 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5182   %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
   5183   ret i32 %1
   5184 }
   5185 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
   5186 
   5187 define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) {
   5188 ; GENERIC-LABEL: test_movntdqa:
   5189 ; GENERIC:       # %bb.0:
   5190 ; GENERIC-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   5191 ; GENERIC-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
   5192 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5193 ;
   5194 ; ATOM-LABEL: test_movntdqa:
   5195 ; ATOM:       # %bb.0:
   5196 ; ATOM-NEXT:    paddq %xmm0, %xmm0 # sched: [2:1.00]
   5197 ; ATOM-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
   5198 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5199 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5200 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5201 ;
   5202 ; SLM-LABEL: test_movntdqa:
   5203 ; SLM:       # %bb.0:
   5204 ; SLM-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   5205 ; SLM-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
   5206 ; SLM-NEXT:    retq # sched: [4:1.00]
   5207 ;
   5208 ; SANDY-SSE-LABEL: test_movntdqa:
   5209 ; SANDY-SSE:       # %bb.0:
   5210 ; SANDY-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   5211 ; SANDY-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
   5212 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5213 ;
   5214 ; SANDY-LABEL: test_movntdqa:
   5215 ; SANDY:       # %bb.0:
   5216 ; SANDY-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   5217 ; SANDY-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
   5218 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5219 ;
   5220 ; HASWELL-SSE-LABEL: test_movntdqa:
   5221 ; HASWELL-SSE:       # %bb.0:
   5222 ; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   5223 ; HASWELL-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
   5224 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5225 ;
   5226 ; HASWELL-LABEL: test_movntdqa:
   5227 ; HASWELL:       # %bb.0:
   5228 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   5229 ; HASWELL-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
   5230 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5231 ;
   5232 ; BROADWELL-SSE-LABEL: test_movntdqa:
   5233 ; BROADWELL-SSE:       # %bb.0:
   5234 ; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   5235 ; BROADWELL-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
   5236 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5237 ;
   5238 ; BROADWELL-LABEL: test_movntdqa:
   5239 ; BROADWELL:       # %bb.0:
   5240 ; BROADWELL-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   5241 ; BROADWELL-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
   5242 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5243 ;
   5244 ; SKYLAKE-SSE-LABEL: test_movntdqa:
   5245 ; SKYLAKE-SSE:       # %bb.0:
   5246 ; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
   5247 ; SKYLAKE-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
   5248 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5249 ;
   5250 ; SKYLAKE-LABEL: test_movntdqa:
   5251 ; SKYLAKE:       # %bb.0:
   5252 ; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   5253 ; SKYLAKE-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
   5254 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5255 ;
   5256 ; SKX-SSE-LABEL: test_movntdqa:
   5257 ; SKX-SSE:       # %bb.0:
   5258 ; SKX-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.33]
   5259 ; SKX-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:1.00]
   5260 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5261 ;
   5262 ; SKX-LABEL: test_movntdqa:
   5263 ; SKX:       # %bb.0:
   5264 ; SKX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   5265 ; SKX-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
   5266 ; SKX-NEXT:    retq # sched: [7:1.00]
   5267 ;
   5268 ; BTVER2-SSE-LABEL: test_movntdqa:
   5269 ; BTVER2-SSE:       # %bb.0:
   5270 ; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.50]
   5271 ; BTVER2-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [2:1.00]
   5272 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5273 ;
   5274 ; BTVER2-LABEL: test_movntdqa:
   5275 ; BTVER2:       # %bb.0:
   5276 ; BTVER2-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
   5277 ; BTVER2-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [2:1.00]
   5278 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5279 ;
   5280 ; ZNVER1-SSE-LABEL: test_movntdqa:
   5281 ; ZNVER1-SSE:       # %bb.0:
   5282 ; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm0 # sched: [1:0.25]
   5283 ; ZNVER1-SSE-NEXT:    movntdq %xmm0, (%rdi) # sched: [1:0.50]
   5284 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5285 ;
   5286 ; ZNVER1-LABEL: test_movntdqa:
   5287 ; ZNVER1:       # %bb.0:
   5288 ; ZNVER1-NEXT:    vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
   5289 ; ZNVER1-NEXT:    vmovntdq %xmm0, (%rdi) # sched: [1:0.50]
   5290 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5291   %1 = add <2 x i64> %a0, %a0
   5292   store <2 x i64> %1, <2 x i64> *%a1, align 16, !nontemporal !0
   5293   ret void
   5294 }
   5295 
   5296 define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) {
   5297 ; GENERIC-LABEL: test_movntpd:
   5298 ; GENERIC:       # %bb.0:
   5299 ; GENERIC-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5300 ; GENERIC-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
   5301 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5302 ;
   5303 ; ATOM-LABEL: test_movntpd:
   5304 ; ATOM:       # %bb.0:
   5305 ; ATOM-NEXT:    addpd %xmm0, %xmm0 # sched: [6:3.00]
   5306 ; ATOM-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
   5307 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5308 ;
   5309 ; SLM-LABEL: test_movntpd:
   5310 ; SLM:       # %bb.0:
   5311 ; SLM-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5312 ; SLM-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
   5313 ; SLM-NEXT:    retq # sched: [4:1.00]
   5314 ;
   5315 ; SANDY-SSE-LABEL: test_movntpd:
   5316 ; SANDY-SSE:       # %bb.0:
   5317 ; SANDY-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5318 ; SANDY-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
   5319 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5320 ;
   5321 ; SANDY-LABEL: test_movntpd:
   5322 ; SANDY:       # %bb.0:
   5323 ; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5324 ; SANDY-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
   5325 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5326 ;
   5327 ; HASWELL-SSE-LABEL: test_movntpd:
   5328 ; HASWELL-SSE:       # %bb.0:
   5329 ; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5330 ; HASWELL-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
   5331 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5332 ;
   5333 ; HASWELL-LABEL: test_movntpd:
   5334 ; HASWELL:       # %bb.0:
   5335 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5336 ; HASWELL-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
   5337 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5338 ;
   5339 ; BROADWELL-SSE-LABEL: test_movntpd:
   5340 ; BROADWELL-SSE:       # %bb.0:
   5341 ; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5342 ; BROADWELL-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
   5343 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5344 ;
   5345 ; BROADWELL-LABEL: test_movntpd:
   5346 ; BROADWELL:       # %bb.0:
   5347 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5348 ; BROADWELL-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
   5349 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5350 ;
   5351 ; SKYLAKE-SSE-LABEL: test_movntpd:
   5352 ; SKYLAKE-SSE:       # %bb.0:
   5353 ; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
   5354 ; SKYLAKE-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
   5355 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5356 ;
   5357 ; SKYLAKE-LABEL: test_movntpd:
   5358 ; SKYLAKE:       # %bb.0:
   5359 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   5360 ; SKYLAKE-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
   5361 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5362 ;
   5363 ; SKX-SSE-LABEL: test_movntpd:
   5364 ; SKX-SSE:       # %bb.0:
   5365 ; SKX-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
   5366 ; SKX-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:1.00]
   5367 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5368 ;
   5369 ; SKX-LABEL: test_movntpd:
   5370 ; SKX:       # %bb.0:
   5371 ; SKX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   5372 ; SKX-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
   5373 ; SKX-NEXT:    retq # sched: [7:1.00]
   5374 ;
   5375 ; BTVER2-SSE-LABEL: test_movntpd:
   5376 ; BTVER2-SSE:       # %bb.0:
   5377 ; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5378 ; BTVER2-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [3:1.00]
   5379 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5380 ;
   5381 ; BTVER2-LABEL: test_movntpd:
   5382 ; BTVER2:       # %bb.0:
   5383 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5384 ; BTVER2-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [3:1.00]
   5385 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5386 ;
   5387 ; ZNVER1-SSE-LABEL: test_movntpd:
   5388 ; ZNVER1-SSE:       # %bb.0:
   5389 ; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5390 ; ZNVER1-SSE-NEXT:    movntpd %xmm0, (%rdi) # sched: [1:0.50]
   5391 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5392 ;
   5393 ; ZNVER1-LABEL: test_movntpd:
   5394 ; ZNVER1:       # %bb.0:
   5395 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5396 ; ZNVER1-NEXT:    vmovntpd %xmm0, (%rdi) # sched: [1:0.50]
   5397 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5398   %1 = fadd <2 x double> %a0, %a0
   5399   store <2 x double> %1, <2 x double> *%a1, align 16, !nontemporal !0
   5400   ret void
   5401 }
   5402 
   5403 define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
   5404 ; GENERIC-LABEL: test_movq_mem:
   5405 ; GENERIC:       # %bb.0:
   5406 ; GENERIC-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
   5407 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5408 ; GENERIC-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
   5409 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5410 ;
   5411 ; ATOM-LABEL: test_movq_mem:
   5412 ; ATOM:       # %bb.0:
   5413 ; ATOM-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
   5414 ; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
   5415 ; ATOM-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
   5416 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5417 ;
   5418 ; SLM-LABEL: test_movq_mem:
   5419 ; SLM:       # %bb.0:
   5420 ; SLM-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00]
   5421 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5422 ; SLM-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
   5423 ; SLM-NEXT:    retq # sched: [4:1.00]
   5424 ;
   5425 ; SANDY-SSE-LABEL: test_movq_mem:
   5426 ; SANDY-SSE:       # %bb.0:
   5427 ; SANDY-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
   5428 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5429 ; SANDY-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
   5430 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5431 ;
   5432 ; SANDY-LABEL: test_movq_mem:
   5433 ; SANDY:       # %bb.0:
   5434 ; SANDY-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
   5435 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   5436 ; SANDY-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
   5437 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5438 ;
   5439 ; HASWELL-SSE-LABEL: test_movq_mem:
   5440 ; HASWELL-SSE:       # %bb.0:
   5441 ; HASWELL-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   5442 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5443 ; HASWELL-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
   5444 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5445 ;
   5446 ; HASWELL-LABEL: test_movq_mem:
   5447 ; HASWELL:       # %bb.0:
   5448 ; HASWELL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   5449 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   5450 ; HASWELL-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
   5451 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5452 ;
   5453 ; BROADWELL-SSE-LABEL: test_movq_mem:
   5454 ; BROADWELL-SSE:       # %bb.0:
   5455 ; BROADWELL-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   5456 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5457 ; BROADWELL-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
   5458 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5459 ;
   5460 ; BROADWELL-LABEL: test_movq_mem:
   5461 ; BROADWELL:       # %bb.0:
   5462 ; BROADWELL-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   5463 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   5464 ; BROADWELL-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
   5465 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5466 ;
   5467 ; SKYLAKE-SSE-LABEL: test_movq_mem:
   5468 ; SKYLAKE-SSE:       # %bb.0:
   5469 ; SKYLAKE-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   5470 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   5471 ; SKYLAKE-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
   5472 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5473 ;
   5474 ; SKYLAKE-LABEL: test_movq_mem:
   5475 ; SKYLAKE:       # %bb.0:
   5476 ; SKYLAKE-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   5477 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   5478 ; SKYLAKE-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
   5479 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5480 ;
   5481 ; SKX-SSE-LABEL: test_movq_mem:
   5482 ; SKX-SSE:       # %bb.0:
   5483 ; SKX-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   5484 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   5485 ; SKX-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:1.00]
   5486 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5487 ;
   5488 ; SKX-LABEL: test_movq_mem:
   5489 ; SKX:       # %bb.0:
   5490 ; SKX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
   5491 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   5492 ; SKX-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:1.00]
   5493 ; SKX-NEXT:    retq # sched: [7:1.00]
   5494 ;
   5495 ; BTVER2-SSE-LABEL: test_movq_mem:
   5496 ; BTVER2-SSE:       # %bb.0:
   5497 ; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
   5498 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5499 ; BTVER2-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [2:1.00]
   5500 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5501 ;
   5502 ; BTVER2-LABEL: test_movq_mem:
   5503 ; BTVER2:       # %bb.0:
   5504 ; BTVER2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
   5505 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   5506 ; BTVER2-NEXT:    vmovq %xmm0, (%rdi) # sched: [2:1.00]
   5507 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5508 ;
   5509 ; ZNVER1-SSE-LABEL: test_movq_mem:
   5510 ; ZNVER1-SSE:       # %bb.0:
   5511 ; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
   5512 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   5513 ; ZNVER1-SSE-NEXT:    movq %xmm0, (%rdi) # sched: [1:0.50]
   5514 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5515 ;
   5516 ; ZNVER1-LABEL: test_movq_mem:
   5517 ; ZNVER1:       # %bb.0:
   5518 ; ZNVER1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
   5519 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   5520 ; ZNVER1-NEXT:    vmovq %xmm0, (%rdi) # sched: [1:0.50]
   5521 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5522   %1 = load i64, i64* %a1, align 1
   5523   %2 = insertelement <2 x i64> zeroinitializer, i64 %1, i32 0
   5524   %3 = add <2 x i64> %a0, %2
   5525   %4 = extractelement <2 x i64> %3, i32 0
   5526   store i64 %4, i64 *%a1, align 1
   5527   ret <2 x i64> %3
   5528 }
   5529 
   5530 define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) {
   5531 ; GENERIC-LABEL: test_movq_reg:
   5532 ; GENERIC:       # %bb.0:
   5533 ; GENERIC-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5534 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5535 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5536 ;
   5537 ; ATOM-LABEL: test_movq_reg:
   5538 ; ATOM:       # %bb.0:
   5539 ; ATOM-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
   5540 ; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
   5541 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5542 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5543 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5544 ;
   5545 ; SLM-LABEL: test_movq_reg:
   5546 ; SLM:       # %bb.0:
   5547 ; SLM-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
   5548 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5549 ; SLM-NEXT:    retq # sched: [4:1.00]
   5550 ;
   5551 ; SANDY-SSE-LABEL: test_movq_reg:
   5552 ; SANDY-SSE:       # %bb.0:
   5553 ; SANDY-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5554 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5555 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5556 ;
   5557 ; SANDY-LABEL: test_movq_reg:
   5558 ; SANDY:       # %bb.0:
   5559 ; SANDY-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5560 ; SANDY-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   5561 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5562 ;
   5563 ; HASWELL-SSE-LABEL: test_movq_reg:
   5564 ; HASWELL-SSE:       # %bb.0:
   5565 ; HASWELL-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5566 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5567 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5568 ;
   5569 ; HASWELL-LABEL: test_movq_reg:
   5570 ; HASWELL:       # %bb.0:
   5571 ; HASWELL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5572 ; HASWELL-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   5573 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5574 ;
   5575 ; BROADWELL-SSE-LABEL: test_movq_reg:
   5576 ; BROADWELL-SSE:       # %bb.0:
   5577 ; BROADWELL-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5578 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5579 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5580 ;
   5581 ; BROADWELL-LABEL: test_movq_reg:
   5582 ; BROADWELL:       # %bb.0:
   5583 ; BROADWELL-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5584 ; BROADWELL-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   5585 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5586 ;
   5587 ; SKYLAKE-SSE-LABEL: test_movq_reg:
   5588 ; SKYLAKE-SSE:       # %bb.0:
   5589 ; SKYLAKE-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5590 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   5591 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5592 ;
   5593 ; SKYLAKE-LABEL: test_movq_reg:
   5594 ; SKYLAKE:       # %bb.0:
   5595 ; SKYLAKE-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5596 ; SKYLAKE-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   5597 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5598 ;
   5599 ; SKX-SSE-LABEL: test_movq_reg:
   5600 ; SKX-SSE:       # %bb.0:
   5601 ; SKX-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5602 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   5603 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5604 ;
   5605 ; SKX-LABEL: test_movq_reg:
   5606 ; SKX:       # %bb.0:
   5607 ; SKX-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
   5608 ; SKX-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   5609 ; SKX-NEXT:    retq # sched: [7:1.00]
   5610 ;
   5611 ; BTVER2-SSE-LABEL: test_movq_reg:
   5612 ; BTVER2-SSE:       # %bb.0:
   5613 ; BTVER2-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
   5614 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   5615 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5616 ;
   5617 ; BTVER2-LABEL: test_movq_reg:
   5618 ; BTVER2:       # %bb.0:
   5619 ; BTVER2-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
   5620 ; BTVER2-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   5621 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5622 ;
   5623 ; ZNVER1-SSE-LABEL: test_movq_reg:
   5624 ; ZNVER1-SSE:       # %bb.0:
   5625 ; ZNVER1-SSE-NEXT:    movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
   5626 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   5627 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5628 ;
   5629 ; ZNVER1-LABEL: test_movq_reg:
   5630 ; ZNVER1:       # %bb.0:
   5631 ; ZNVER1-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
   5632 ; ZNVER1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
   5633 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5634   %1 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
   5635   %2 = add <2 x i64> %a1, %1
   5636   ret <2 x i64> %2
   5637 }
   5638 
   5639 define void @test_movsd_mem(double* %a0, double* %a1) {
   5640 ; GENERIC-LABEL: test_movsd_mem:
   5641 ; GENERIC:       # %bb.0:
   5642 ; GENERIC-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
   5643 ; GENERIC-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
   5644 ; GENERIC-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
   5645 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5646 ;
   5647 ; ATOM-LABEL: test_movsd_mem:
   5648 ; ATOM:       # %bb.0:
   5649 ; ATOM-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [1:1.00]
   5650 ; ATOM-NEXT:    addsd %xmm0, %xmm0 # sched: [5:5.00]
   5651 ; ATOM-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
   5652 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5653 ;
   5654 ; SLM-LABEL: test_movsd_mem:
   5655 ; SLM:       # %bb.0:
   5656 ; SLM-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00]
   5657 ; SLM-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
   5658 ; SLM-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
   5659 ; SLM-NEXT:    retq # sched: [4:1.00]
   5660 ;
   5661 ; SANDY-SSE-LABEL: test_movsd_mem:
   5662 ; SANDY-SSE:       # %bb.0:
   5663 ; SANDY-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
   5664 ; SANDY-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
   5665 ; SANDY-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
   5666 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5667 ;
   5668 ; SANDY-LABEL: test_movsd_mem:
   5669 ; SANDY:       # %bb.0:
   5670 ; SANDY-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
   5671 ; SANDY-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5672 ; SANDY-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
   5673 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5674 ;
   5675 ; HASWELL-SSE-LABEL: test_movsd_mem:
   5676 ; HASWELL-SSE:       # %bb.0:
   5677 ; HASWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   5678 ; HASWELL-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
   5679 ; HASWELL-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
   5680 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5681 ;
   5682 ; HASWELL-LABEL: test_movsd_mem:
   5683 ; HASWELL:       # %bb.0:
   5684 ; HASWELL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   5685 ; HASWELL-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5686 ; HASWELL-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
   5687 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5688 ;
   5689 ; BROADWELL-SSE-LABEL: test_movsd_mem:
   5690 ; BROADWELL-SSE:       # %bb.0:
   5691 ; BROADWELL-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   5692 ; BROADWELL-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
   5693 ; BROADWELL-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
   5694 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5695 ;
   5696 ; BROADWELL-LABEL: test_movsd_mem:
   5697 ; BROADWELL:       # %bb.0:
   5698 ; BROADWELL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   5699 ; BROADWELL-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5700 ; BROADWELL-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
   5701 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5702 ;
   5703 ; SKYLAKE-SSE-LABEL: test_movsd_mem:
   5704 ; SKYLAKE-SSE:       # %bb.0:
   5705 ; SKYLAKE-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   5706 ; SKYLAKE-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [4:0.50]
   5707 ; SKYLAKE-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
   5708 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5709 ;
   5710 ; SKYLAKE-LABEL: test_movsd_mem:
   5711 ; SKYLAKE:       # %bb.0:
   5712 ; SKYLAKE-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   5713 ; SKYLAKE-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   5714 ; SKYLAKE-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
   5715 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5716 ;
   5717 ; SKX-SSE-LABEL: test_movsd_mem:
   5718 ; SKX-SSE:       # %bb.0:
   5719 ; SKX-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   5720 ; SKX-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [4:0.50]
   5721 ; SKX-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:1.00]
   5722 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5723 ;
   5724 ; SKX-LABEL: test_movsd_mem:
   5725 ; SKX:       # %bb.0:
   5726 ; SKX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
   5727 ; SKX-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   5728 ; SKX-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:1.00]
   5729 ; SKX-NEXT:    retq # sched: [7:1.00]
   5730 ;
   5731 ; BTVER2-SSE-LABEL: test_movsd_mem:
   5732 ; BTVER2-SSE:       # %bb.0:
   5733 ; BTVER2-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
   5734 ; BTVER2-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
   5735 ; BTVER2-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [2:1.00]
   5736 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5737 ;
   5738 ; BTVER2-LABEL: test_movsd_mem:
   5739 ; BTVER2:       # %bb.0:
   5740 ; BTVER2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
   5741 ; BTVER2-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5742 ; BTVER2-NEXT:    vmovsd %xmm0, (%rsi) # sched: [2:1.00]
   5743 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5744 ;
   5745 ; ZNVER1-SSE-LABEL: test_movsd_mem:
   5746 ; ZNVER1-SSE:       # %bb.0:
   5747 ; ZNVER1-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
   5748 ; ZNVER1-SSE-NEXT:    addsd %xmm0, %xmm0 # sched: [3:1.00]
   5749 ; ZNVER1-SSE-NEXT:    movsd %xmm0, (%rsi) # sched: [1:0.50]
   5750 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5751 ;
   5752 ; ZNVER1-LABEL: test_movsd_mem:
   5753 ; ZNVER1:       # %bb.0:
   5754 ; ZNVER1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
   5755 ; ZNVER1-NEXT:    vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5756 ; ZNVER1-NEXT:    vmovsd %xmm0, (%rsi) # sched: [1:0.50]
   5757 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5758   %1 = load double, double* %a0, align 1
   5759   %2 = fadd double %1, %1
   5760   store double %2, double *%a1, align 1
   5761   ret void
   5762 }
   5763 
   5764 define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
   5765 ; GENERIC-LABEL: test_movsd_reg:
   5766 ; GENERIC:       # %bb.0:
   5767 ; GENERIC-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
   5768 ; GENERIC-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
   5769 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5770 ;
   5771 ; ATOM-LABEL: test_movsd_reg:
   5772 ; ATOM:       # %bb.0:
   5773 ; ATOM-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
   5774 ; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   5775 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5776 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5777 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5778 ; ATOM-NEXT:    nop # sched: [1:0.50]
   5779 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5780 ;
   5781 ; SLM-LABEL: test_movsd_reg:
   5782 ; SLM:       # %bb.0:
   5783 ; SLM-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
   5784 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   5785 ; SLM-NEXT:    retq # sched: [4:1.00]
   5786 ;
   5787 ; SANDY-SSE-LABEL: test_movsd_reg:
   5788 ; SANDY-SSE:       # %bb.0:
   5789 ; SANDY-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
   5790 ; SANDY-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
   5791 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5792 ;
   5793 ; SANDY-LABEL: test_movsd_reg:
   5794 ; SANDY:       # %bb.0:
   5795 ; SANDY-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
   5796 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5797 ;
   5798 ; HASWELL-SSE-LABEL: test_movsd_reg:
   5799 ; HASWELL-SSE:       # %bb.0:
   5800 ; HASWELL-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
   5801 ; HASWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
   5802 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5803 ;
   5804 ; HASWELL-LABEL: test_movsd_reg:
   5805 ; HASWELL:       # %bb.0:
   5806 ; HASWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
   5807 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5808 ;
   5809 ; BROADWELL-SSE-LABEL: test_movsd_reg:
   5810 ; BROADWELL-SSE:       # %bb.0:
   5811 ; BROADWELL-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
   5812 ; BROADWELL-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:1.00]
   5813 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5814 ;
   5815 ; BROADWELL-LABEL: test_movsd_reg:
   5816 ; BROADWELL:       # %bb.0:
   5817 ; BROADWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
   5818 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5819 ;
   5820 ; SKYLAKE-SSE-LABEL: test_movsd_reg:
   5821 ; SKYLAKE-SSE:       # %bb.0:
   5822 ; SKYLAKE-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
   5823 ; SKYLAKE-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.33]
   5824 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5825 ;
   5826 ; SKYLAKE-LABEL: test_movsd_reg:
   5827 ; SKYLAKE:       # %bb.0:
   5828 ; SKYLAKE-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
   5829 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5830 ;
   5831 ; SKX-SSE-LABEL: test_movsd_reg:
   5832 ; SKX-SSE:       # %bb.0:
   5833 ; SKX-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
   5834 ; SKX-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.33]
   5835 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5836 ;
   5837 ; SKX-LABEL: test_movsd_reg:
   5838 ; SKX:       # %bb.0:
   5839 ; SKX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
   5840 ; SKX-NEXT:    retq # sched: [7:1.00]
   5841 ;
   5842 ; BTVER2-SSE-LABEL: test_movsd_reg:
   5843 ; BTVER2-SSE:       # %bb.0:
   5844 ; BTVER2-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
   5845 ; BTVER2-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   5846 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5847 ;
   5848 ; BTVER2-LABEL: test_movsd_reg:
   5849 ; BTVER2:       # %bb.0:
   5850 ; BTVER2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
   5851 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5852 ;
   5853 ; ZNVER1-SSE-LABEL: test_movsd_reg:
   5854 ; ZNVER1-SSE:       # %bb.0:
   5855 ; ZNVER1-SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
   5856 ; ZNVER1-SSE-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.25]
   5857 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5858 ;
   5859 ; ZNVER1-LABEL: test_movsd_reg:
   5860 ; ZNVER1:       # %bb.0:
   5861 ; ZNVER1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
   5862 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5863   %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 0>
   5864   ret <2 x double> %1
   5865 }
   5866 
   5867 define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
   5868 ; GENERIC-LABEL: test_movupd:
   5869 ; GENERIC:       # %bb.0:
   5870 ; GENERIC-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
   5871 ; GENERIC-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5872 ; GENERIC-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
   5873 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5874 ;
   5875 ; ATOM-LABEL: test_movupd:
   5876 ; ATOM:       # %bb.0:
   5877 ; ATOM-NEXT:    movupd (%rdi), %xmm0 # sched: [3:1.50]
   5878 ; ATOM-NEXT:    addpd %xmm0, %xmm0 # sched: [6:3.00]
   5879 ; ATOM-NEXT:    movupd %xmm0, (%rsi) # sched: [2:1.00]
   5880 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5881 ;
   5882 ; SLM-LABEL: test_movupd:
   5883 ; SLM:       # %bb.0:
   5884 ; SLM-NEXT:    movupd (%rdi), %xmm0 # sched: [3:1.00]
   5885 ; SLM-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5886 ; SLM-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
   5887 ; SLM-NEXT:    retq # sched: [4:1.00]
   5888 ;
   5889 ; SANDY-SSE-LABEL: test_movupd:
   5890 ; SANDY-SSE:       # %bb.0:
   5891 ; SANDY-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
   5892 ; SANDY-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5893 ; SANDY-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
   5894 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5895 ;
   5896 ; SANDY-LABEL: test_movupd:
   5897 ; SANDY:       # %bb.0:
   5898 ; SANDY-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
   5899 ; SANDY-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5900 ; SANDY-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
   5901 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5902 ;
   5903 ; HASWELL-SSE-LABEL: test_movupd:
   5904 ; HASWELL-SSE:       # %bb.0:
   5905 ; HASWELL-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
   5906 ; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5907 ; HASWELL-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
   5908 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5909 ;
   5910 ; HASWELL-LABEL: test_movupd:
   5911 ; HASWELL:       # %bb.0:
   5912 ; HASWELL-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
   5913 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5914 ; HASWELL-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
   5915 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5916 ;
   5917 ; BROADWELL-SSE-LABEL: test_movupd:
   5918 ; BROADWELL-SSE:       # %bb.0:
   5919 ; BROADWELL-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [5:0.50]
   5920 ; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5921 ; BROADWELL-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
   5922 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5923 ;
   5924 ; BROADWELL-LABEL: test_movupd:
   5925 ; BROADWELL:       # %bb.0:
   5926 ; BROADWELL-NEXT:    vmovupd (%rdi), %xmm0 # sched: [5:0.50]
   5927 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5928 ; BROADWELL-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
   5929 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5930 ;
   5931 ; SKYLAKE-SSE-LABEL: test_movupd:
   5932 ; SKYLAKE-SSE:       # %bb.0:
   5933 ; SKYLAKE-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
   5934 ; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
   5935 ; SKYLAKE-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
   5936 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5937 ;
   5938 ; SKYLAKE-LABEL: test_movupd:
   5939 ; SKYLAKE:       # %bb.0:
   5940 ; SKYLAKE-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
   5941 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   5942 ; SKYLAKE-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
   5943 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5944 ;
   5945 ; SKX-SSE-LABEL: test_movupd:
   5946 ; SKX-SSE:       # %bb.0:
   5947 ; SKX-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [6:0.50]
   5948 ; SKX-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [4:0.50]
   5949 ; SKX-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
   5950 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5951 ;
   5952 ; SKX-LABEL: test_movupd:
   5953 ; SKX:       # %bb.0:
   5954 ; SKX-NEXT:    vmovupd (%rdi), %xmm0 # sched: [6:0.50]
   5955 ; SKX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   5956 ; SKX-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
   5957 ; SKX-NEXT:    retq # sched: [7:1.00]
   5958 ;
   5959 ; BTVER2-SSE-LABEL: test_movupd:
   5960 ; BTVER2-SSE:       # %bb.0:
   5961 ; BTVER2-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [5:1.00]
   5962 ; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5963 ; BTVER2-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:1.00]
   5964 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5965 ;
   5966 ; BTVER2-LABEL: test_movupd:
   5967 ; BTVER2:       # %bb.0:
   5968 ; BTVER2-NEXT:    vmovupd (%rdi), %xmm0 # sched: [5:1.00]
   5969 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5970 ; BTVER2-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
   5971 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5972 ;
   5973 ; ZNVER1-SSE-LABEL: test_movupd:
   5974 ; ZNVER1-SSE:       # %bb.0:
   5975 ; ZNVER1-SSE-NEXT:    movupd (%rdi), %xmm0 # sched: [8:0.50]
   5976 ; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm0 # sched: [3:1.00]
   5977 ; ZNVER1-SSE-NEXT:    movupd %xmm0, (%rsi) # sched: [1:0.50]
   5978 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5979 ;
   5980 ; ZNVER1-LABEL: test_movupd:
   5981 ; ZNVER1:       # %bb.0:
   5982 ; ZNVER1-NEXT:    vmovupd (%rdi), %xmm0 # sched: [8:0.50]
   5983 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   5984 ; ZNVER1-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:0.50]
   5985 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5986   %1 = load <2 x double>, <2 x double> *%a0, align 1
   5987   %2 = fadd <2 x double> %1, %1
   5988   store <2 x double> %2, <2 x double> *%a1, align 1
   5989   ret void
   5990 }
   5991 
   5992 define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   5993 ; GENERIC-LABEL: test_mulpd:
   5994 ; GENERIC:       # %bb.0:
   5995 ; GENERIC-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:1.00]
   5996 ; GENERIC-NEXT:    mulpd (%rdi), %xmm0 # sched: [11:1.00]
   5997 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5998 ;
   5999 ; ATOM-LABEL: test_mulpd:
   6000 ; ATOM:       # %bb.0:
   6001 ; ATOM-NEXT:    mulpd %xmm1, %xmm0 # sched: [9:4.50]
   6002 ; ATOM-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:5.00]
   6003 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6004 ;
   6005 ; SLM-LABEL: test_mulpd:
   6006 ; SLM:       # %bb.0:
   6007 ; SLM-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:2.00]
   6008 ; SLM-NEXT:    mulpd (%rdi), %xmm0 # sched: [8:2.00]
   6009 ; SLM-NEXT:    retq # sched: [4:1.00]
   6010 ;
   6011 ; SANDY-SSE-LABEL: test_mulpd:
   6012 ; SANDY-SSE:       # %bb.0:
   6013 ; SANDY-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:1.00]
   6014 ; SANDY-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [11:1.00]
   6015 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6016 ;
   6017 ; SANDY-LABEL: test_mulpd:
   6018 ; SANDY:       # %bb.0:
   6019 ; SANDY-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   6020 ; SANDY-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   6021 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6022 ;
   6023 ; HASWELL-SSE-LABEL: test_mulpd:
   6024 ; HASWELL-SSE:       # %bb.0:
   6025 ; HASWELL-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [5:0.50]
   6026 ; HASWELL-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [11:0.50]
   6027 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6028 ;
   6029 ; HASWELL-LABEL: test_mulpd:
   6030 ; HASWELL:       # %bb.0:
   6031 ; HASWELL-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
   6032 ; HASWELL-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
   6033 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6034 ;
   6035 ; BROADWELL-SSE-LABEL: test_mulpd:
   6036 ; BROADWELL-SSE:       # %bb.0:
   6037 ; BROADWELL-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [3:0.50]
   6038 ; BROADWELL-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [8:0.50]
   6039 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6040 ;
   6041 ; BROADWELL-LABEL: test_mulpd:
   6042 ; BROADWELL:       # %bb.0:
   6043 ; BROADWELL-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
   6044 ; BROADWELL-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   6045 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6046 ;
   6047 ; SKYLAKE-SSE-LABEL: test_mulpd:
   6048 ; SKYLAKE-SSE:       # %bb.0:
   6049 ; SKYLAKE-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [4:0.50]
   6050 ; SKYLAKE-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:0.50]
   6051 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6052 ;
   6053 ; SKYLAKE-LABEL: test_mulpd:
   6054 ; SKYLAKE:       # %bb.0:
   6055 ; SKYLAKE-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   6056 ; SKYLAKE-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   6057 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6058 ;
   6059 ; SKX-SSE-LABEL: test_mulpd:
   6060 ; SKX-SSE:       # %bb.0:
   6061 ; SKX-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [4:0.50]
   6062 ; SKX-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:0.50]
   6063 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6064 ;
   6065 ; SKX-LABEL: test_mulpd:
   6066 ; SKX:       # %bb.0:
   6067 ; SKX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   6068 ; SKX-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   6069 ; SKX-NEXT:    retq # sched: [7:1.00]
   6070 ;
   6071 ; BTVER2-SSE-LABEL: test_mulpd:
   6072 ; BTVER2-SSE:       # %bb.0:
   6073 ; BTVER2-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [4:2.00]
   6074 ; BTVER2-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [9:2.00]
   6075 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6076 ;
   6077 ; BTVER2-LABEL: test_mulpd:
   6078 ; BTVER2:       # %bb.0:
   6079 ; BTVER2-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
   6080 ; BTVER2-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
   6081 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6082 ;
   6083 ; ZNVER1-SSE-LABEL: test_mulpd:
   6084 ; ZNVER1-SSE:       # %bb.0:
   6085 ; ZNVER1-SSE-NEXT:    mulpd %xmm1, %xmm0 # sched: [3:0.50]
   6086 ; ZNVER1-SSE-NEXT:    mulpd (%rdi), %xmm0 # sched: [10:0.50]
   6087 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6088 ;
   6089 ; ZNVER1-LABEL: test_mulpd:
   6090 ; ZNVER1:       # %bb.0:
   6091 ; ZNVER1-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
   6092 ; ZNVER1-NEXT:    vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   6093 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6094   %1 = fmul <2 x double> %a0, %a1
   6095   %2 = load <2 x double>, <2 x double> *%a2, align 16
   6096   %3 = fmul <2 x double> %1, %2
   6097   ret <2 x double> %3
   6098 }
   6099 
   6100 define double @test_mulsd(double %a0, double %a1, double *%a2) {
   6101 ; GENERIC-LABEL: test_mulsd:
   6102 ; GENERIC:       # %bb.0:
   6103 ; GENERIC-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:1.00]
   6104 ; GENERIC-NEXT:    mulsd (%rdi), %xmm0 # sched: [11:1.00]
   6105 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6106 ;
   6107 ; ATOM-LABEL: test_mulsd:
   6108 ; ATOM:       # %bb.0:
   6109 ; ATOM-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:5.00]
   6110 ; ATOM-NEXT:    mulsd (%rdi), %xmm0 # sched: [5:5.00]
   6111 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6112 ;
   6113 ; SLM-LABEL: test_mulsd:
   6114 ; SLM:       # %bb.0:
   6115 ; SLM-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:2.00]
   6116 ; SLM-NEXT:    mulsd (%rdi), %xmm0 # sched: [8:2.00]
   6117 ; SLM-NEXT:    retq # sched: [4:1.00]
   6118 ;
   6119 ; SANDY-SSE-LABEL: test_mulsd:
   6120 ; SANDY-SSE:       # %bb.0:
   6121 ; SANDY-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:1.00]
   6122 ; SANDY-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [11:1.00]
   6123 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6124 ;
   6125 ; SANDY-LABEL: test_mulsd:
   6126 ; SANDY:       # %bb.0:
   6127 ; SANDY-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   6128 ; SANDY-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   6129 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6130 ;
   6131 ; HASWELL-SSE-LABEL: test_mulsd:
   6132 ; HASWELL-SSE:       # %bb.0:
   6133 ; HASWELL-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [5:0.50]
   6134 ; HASWELL-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [10:0.50]
   6135 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6136 ;
   6137 ; HASWELL-LABEL: test_mulsd:
   6138 ; HASWELL:       # %bb.0:
   6139 ; HASWELL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
   6140 ; HASWELL-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   6141 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6142 ;
   6143 ; BROADWELL-SSE-LABEL: test_mulsd:
   6144 ; BROADWELL-SSE:       # %bb.0:
   6145 ; BROADWELL-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [3:0.50]
   6146 ; BROADWELL-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [8:0.50]
   6147 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6148 ;
   6149 ; BROADWELL-LABEL: test_mulsd:
   6150 ; BROADWELL:       # %bb.0:
   6151 ; BROADWELL-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
   6152 ; BROADWELL-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   6153 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6154 ;
   6155 ; SKYLAKE-SSE-LABEL: test_mulsd:
   6156 ; SKYLAKE-SSE:       # %bb.0:
   6157 ; SKYLAKE-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [4:0.50]
   6158 ; SKYLAKE-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [9:0.50]
   6159 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6160 ;
   6161 ; SKYLAKE-LABEL: test_mulsd:
   6162 ; SKYLAKE:       # %bb.0:
   6163 ; SKYLAKE-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   6164 ; SKYLAKE-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   6165 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6166 ;
   6167 ; SKX-SSE-LABEL: test_mulsd:
   6168 ; SKX-SSE:       # %bb.0:
   6169 ; SKX-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [4:0.50]
   6170 ; SKX-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [9:0.50]
   6171 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6172 ;
   6173 ; SKX-LABEL: test_mulsd:
   6174 ; SKX:       # %bb.0:
   6175 ; SKX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   6176 ; SKX-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   6177 ; SKX-NEXT:    retq # sched: [7:1.00]
   6178 ;
   6179 ; BTVER2-SSE-LABEL: test_mulsd:
   6180 ; BTVER2-SSE:       # %bb.0:
   6181 ; BTVER2-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [4:2.00]
   6182 ; BTVER2-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [9:2.00]
   6183 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6184 ;
   6185 ; BTVER2-LABEL: test_mulsd:
   6186 ; BTVER2:       # %bb.0:
   6187 ; BTVER2-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
   6188 ; BTVER2-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
   6189 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6190 ;
   6191 ; ZNVER1-SSE-LABEL: test_mulsd:
   6192 ; ZNVER1-SSE:       # %bb.0:
   6193 ; ZNVER1-SSE-NEXT:    mulsd %xmm1, %xmm0 # sched: [3:0.50]
   6194 ; ZNVER1-SSE-NEXT:    mulsd (%rdi), %xmm0 # sched: [10:0.50]
   6195 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6196 ;
   6197 ; ZNVER1-LABEL: test_mulsd:
   6198 ; ZNVER1:       # %bb.0:
   6199 ; ZNVER1-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
   6200 ; ZNVER1-NEXT:    vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   6201 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6202   %1 = fmul double %a0, %a1
   6203   %2 = load double, double *%a2, align 8
   6204   %3 = fmul double %1, %2
   6205   ret double %3
   6206 }
   6207 
   6208 define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   6209 ; GENERIC-LABEL: test_orpd:
   6210 ; GENERIC:       # %bb.0:
   6211 ; GENERIC-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
   6212 ; GENERIC-NEXT:    orpd (%rdi), %xmm0 # sched: [7:1.00]
   6213 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   6214 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6215 ;
   6216 ; ATOM-LABEL: test_orpd:
   6217 ; ATOM:       # %bb.0:
   6218 ; ATOM-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
   6219 ; ATOM-NEXT:    orpd (%rdi), %xmm0 # sched: [1:1.00]
   6220 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
   6221 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6222 ;
   6223 ; SLM-LABEL: test_orpd:
   6224 ; SLM:       # %bb.0:
   6225 ; SLM-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
   6226 ; SLM-NEXT:    orpd (%rdi), %xmm0 # sched: [4:1.00]
   6227 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   6228 ; SLM-NEXT:    retq # sched: [4:1.00]
   6229 ;
   6230 ; SANDY-SSE-LABEL: test_orpd:
   6231 ; SANDY-SSE:       # %bb.0:
   6232 ; SANDY-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
   6233 ; SANDY-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:1.00]
   6234 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   6235 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6236 ;
   6237 ; SANDY-LABEL: test_orpd:
   6238 ; SANDY:       # %bb.0:
   6239 ; SANDY-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6240 ; SANDY-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6241 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   6242 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6243 ;
   6244 ; HASWELL-SSE-LABEL: test_orpd:
   6245 ; HASWELL-SSE:       # %bb.0:
   6246 ; HASWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
   6247 ; HASWELL-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:1.00]
   6248 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   6249 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6250 ;
   6251 ; HASWELL-LABEL: test_orpd:
   6252 ; HASWELL:       # %bb.0:
   6253 ; HASWELL-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6254 ; HASWELL-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6255 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   6256 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6257 ;
   6258 ; BROADWELL-SSE-LABEL: test_orpd:
   6259 ; BROADWELL-SSE:       # %bb.0:
   6260 ; BROADWELL-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:1.00]
   6261 ; BROADWELL-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [6:1.00]
   6262 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   6263 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6264 ;
   6265 ; BROADWELL-LABEL: test_orpd:
   6266 ; BROADWELL:       # %bb.0:
   6267 ; BROADWELL-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6268 ; BROADWELL-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6269 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   6270 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6271 ;
   6272 ; SKYLAKE-SSE-LABEL: test_orpd:
   6273 ; SKYLAKE-SSE:       # %bb.0:
   6274 ; SKYLAKE-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
   6275 ; SKYLAKE-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:0.50]
   6276 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   6277 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6278 ;
   6279 ; SKYLAKE-LABEL: test_orpd:
   6280 ; SKYLAKE:       # %bb.0:
   6281 ; SKYLAKE-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6282 ; SKYLAKE-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6283 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   6284 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6285 ;
   6286 ; SKX-SSE-LABEL: test_orpd:
   6287 ; SKX-SSE:       # %bb.0:
   6288 ; SKX-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.33]
   6289 ; SKX-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [7:0.50]
   6290 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   6291 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6292 ;
   6293 ; SKX-LABEL: test_orpd:
   6294 ; SKX:       # %bb.0:
   6295 ; SKX-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6296 ; SKX-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6297 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   6298 ; SKX-NEXT:    retq # sched: [7:1.00]
   6299 ;
   6300 ; BTVER2-SSE-LABEL: test_orpd:
   6301 ; BTVER2-SSE:       # %bb.0:
   6302 ; BTVER2-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.50]
   6303 ; BTVER2-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [6:1.00]
   6304 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   6305 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6306 ;
   6307 ; BTVER2-LABEL: test_orpd:
   6308 ; BTVER2:       # %bb.0:
   6309 ; BTVER2-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6310 ; BTVER2-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6311 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   6312 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6313 ;
   6314 ; ZNVER1-SSE-LABEL: test_orpd:
   6315 ; ZNVER1-SSE:       # %bb.0:
   6316 ; ZNVER1-SSE-NEXT:    orpd %xmm1, %xmm0 # sched: [1:0.25]
   6317 ; ZNVER1-SSE-NEXT:    orpd (%rdi), %xmm0 # sched: [8:0.50]
   6318 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   6319 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6320 ;
   6321 ; ZNVER1-LABEL: test_orpd:
   6322 ; ZNVER1:       # %bb.0:
   6323 ; ZNVER1-NEXT:    vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   6324 ; ZNVER1-NEXT:    vorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   6325 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   6326 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6327   %1 = bitcast <2 x double> %a0 to <4 x i32>
   6328   %2 = bitcast <2 x double> %a1 to <4 x i32>
   6329   %3 = or <4 x i32> %1, %2
   6330   %4 = load <2 x double>, <2 x double> *%a2, align 16
   6331   %5 = bitcast <2 x double> %4 to <4 x i32>
   6332   %6 = or <4 x i32> %3, %5
   6333   %7 = bitcast <4 x i32> %6 to <2 x double>
   6334   %8 = fadd <2 x double> %a1, %7
   6335   ret <2 x double> %8
   6336 }
   6337 
   6338 define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   6339 ; GENERIC-LABEL: test_packssdw:
   6340 ; GENERIC:       # %bb.0:
   6341 ; GENERIC-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.50]
   6342 ; GENERIC-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:0.50]
   6343 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6344 ;
   6345 ; ATOM-LABEL: test_packssdw:
   6346 ; ATOM:       # %bb.0:
   6347 ; ATOM-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
   6348 ; ATOM-NEXT:    packssdw (%rdi), %xmm0 # sched: [1:1.00]
   6349 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6350 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6351 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6352 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6353 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6354 ;
   6355 ; SLM-LABEL: test_packssdw:
   6356 ; SLM:       # %bb.0:
   6357 ; SLM-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
   6358 ; SLM-NEXT:    packssdw (%rdi), %xmm0 # sched: [4:1.00]
   6359 ; SLM-NEXT:    retq # sched: [4:1.00]
   6360 ;
   6361 ; SANDY-SSE-LABEL: test_packssdw:
   6362 ; SANDY-SSE:       # %bb.0:
   6363 ; SANDY-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.50]
   6364 ; SANDY-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:0.50]
   6365 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6366 ;
   6367 ; SANDY-LABEL: test_packssdw:
   6368 ; SANDY:       # %bb.0:
   6369 ; SANDY-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6370 ; SANDY-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6371 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6372 ;
   6373 ; HASWELL-SSE-LABEL: test_packssdw:
   6374 ; HASWELL-SSE:       # %bb.0:
   6375 ; HASWELL-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
   6376 ; HASWELL-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:1.00]
   6377 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6378 ;
   6379 ; HASWELL-LABEL: test_packssdw:
   6380 ; HASWELL:       # %bb.0:
   6381 ; HASWELL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6382 ; HASWELL-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6383 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6384 ;
   6385 ; BROADWELL-SSE-LABEL: test_packssdw:
   6386 ; BROADWELL-SSE:       # %bb.0:
   6387 ; BROADWELL-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
   6388 ; BROADWELL-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [6:1.00]
   6389 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6390 ;
   6391 ; BROADWELL-LABEL: test_packssdw:
   6392 ; BROADWELL:       # %bb.0:
   6393 ; BROADWELL-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6394 ; BROADWELL-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6395 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6396 ;
   6397 ; SKYLAKE-SSE-LABEL: test_packssdw:
   6398 ; SKYLAKE-SSE:       # %bb.0:
   6399 ; SKYLAKE-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
   6400 ; SKYLAKE-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:1.00]
   6401 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6402 ;
   6403 ; SKYLAKE-LABEL: test_packssdw:
   6404 ; SKYLAKE:       # %bb.0:
   6405 ; SKYLAKE-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6406 ; SKYLAKE-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6407 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6408 ;
   6409 ; SKX-SSE-LABEL: test_packssdw:
   6410 ; SKX-SSE:       # %bb.0:
   6411 ; SKX-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:1.00]
   6412 ; SKX-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [7:1.00]
   6413 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6414 ;
   6415 ; SKX-LABEL: test_packssdw:
   6416 ; SKX:       # %bb.0:
   6417 ; SKX-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6418 ; SKX-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6419 ; SKX-NEXT:    retq # sched: [7:1.00]
   6420 ;
   6421 ; BTVER2-SSE-LABEL: test_packssdw:
   6422 ; BTVER2-SSE:       # %bb.0:
   6423 ; BTVER2-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.50]
   6424 ; BTVER2-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [6:1.00]
   6425 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6426 ;
   6427 ; BTVER2-LABEL: test_packssdw:
   6428 ; BTVER2:       # %bb.0:
   6429 ; BTVER2-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6430 ; BTVER2-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6431 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6432 ;
   6433 ; ZNVER1-SSE-LABEL: test_packssdw:
   6434 ; ZNVER1-SSE:       # %bb.0:
   6435 ; ZNVER1-SSE-NEXT:    packssdw %xmm1, %xmm0 # sched: [1:0.25]
   6436 ; ZNVER1-SSE-NEXT:    packssdw (%rdi), %xmm0 # sched: [8:0.50]
   6437 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6438 ;
   6439 ; ZNVER1-LABEL: test_packssdw:
   6440 ; ZNVER1:       # %bb.0:
   6441 ; ZNVER1-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   6442 ; ZNVER1-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   6443 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6444   %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
   6445   %2 = bitcast <8 x i16> %1 to <4 x i32>
   6446   %3 = load <4 x i32>, <4 x i32> *%a2, align 16
   6447   %4 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %2, <4 x i32> %3)
   6448   ret <8 x i16> %4
   6449 }
   6450 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
   6451 
   6452 define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   6453 ; GENERIC-LABEL: test_packsswb:
   6454 ; GENERIC:       # %bb.0:
   6455 ; GENERIC-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.50]
   6456 ; GENERIC-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:0.50]
   6457 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6458 ;
   6459 ; ATOM-LABEL: test_packsswb:
   6460 ; ATOM:       # %bb.0:
   6461 ; ATOM-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
   6462 ; ATOM-NEXT:    packsswb (%rdi), %xmm0 # sched: [1:1.00]
   6463 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6464 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6465 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6466 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6467 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6468 ;
   6469 ; SLM-LABEL: test_packsswb:
   6470 ; SLM:       # %bb.0:
   6471 ; SLM-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
   6472 ; SLM-NEXT:    packsswb (%rdi), %xmm0 # sched: [4:1.00]
   6473 ; SLM-NEXT:    retq # sched: [4:1.00]
   6474 ;
   6475 ; SANDY-SSE-LABEL: test_packsswb:
   6476 ; SANDY-SSE:       # %bb.0:
   6477 ; SANDY-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.50]
   6478 ; SANDY-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:0.50]
   6479 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6480 ;
   6481 ; SANDY-LABEL: test_packsswb:
   6482 ; SANDY:       # %bb.0:
   6483 ; SANDY-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6484 ; SANDY-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6485 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6486 ;
   6487 ; HASWELL-SSE-LABEL: test_packsswb:
   6488 ; HASWELL-SSE:       # %bb.0:
   6489 ; HASWELL-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
   6490 ; HASWELL-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:1.00]
   6491 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6492 ;
   6493 ; HASWELL-LABEL: test_packsswb:
   6494 ; HASWELL:       # %bb.0:
   6495 ; HASWELL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6496 ; HASWELL-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6497 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6498 ;
   6499 ; BROADWELL-SSE-LABEL: test_packsswb:
   6500 ; BROADWELL-SSE:       # %bb.0:
   6501 ; BROADWELL-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
   6502 ; BROADWELL-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [6:1.00]
   6503 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6504 ;
   6505 ; BROADWELL-LABEL: test_packsswb:
   6506 ; BROADWELL:       # %bb.0:
   6507 ; BROADWELL-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6508 ; BROADWELL-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6509 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6510 ;
   6511 ; SKYLAKE-SSE-LABEL: test_packsswb:
   6512 ; SKYLAKE-SSE:       # %bb.0:
   6513 ; SKYLAKE-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
   6514 ; SKYLAKE-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:1.00]
   6515 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6516 ;
   6517 ; SKYLAKE-LABEL: test_packsswb:
   6518 ; SKYLAKE:       # %bb.0:
   6519 ; SKYLAKE-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6520 ; SKYLAKE-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6521 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6522 ;
   6523 ; SKX-SSE-LABEL: test_packsswb:
   6524 ; SKX-SSE:       # %bb.0:
   6525 ; SKX-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:1.00]
   6526 ; SKX-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [7:1.00]
   6527 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6528 ;
   6529 ; SKX-LABEL: test_packsswb:
   6530 ; SKX:       # %bb.0:
   6531 ; SKX-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6532 ; SKX-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6533 ; SKX-NEXT:    retq # sched: [7:1.00]
   6534 ;
   6535 ; BTVER2-SSE-LABEL: test_packsswb:
   6536 ; BTVER2-SSE:       # %bb.0:
   6537 ; BTVER2-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.50]
   6538 ; BTVER2-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [6:1.00]
   6539 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6540 ;
   6541 ; BTVER2-LABEL: test_packsswb:
   6542 ; BTVER2:       # %bb.0:
   6543 ; BTVER2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6544 ; BTVER2-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6545 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6546 ;
   6547 ; ZNVER1-SSE-LABEL: test_packsswb:
   6548 ; ZNVER1-SSE:       # %bb.0:
   6549 ; ZNVER1-SSE-NEXT:    packsswb %xmm1, %xmm0 # sched: [1:0.25]
   6550 ; ZNVER1-SSE-NEXT:    packsswb (%rdi), %xmm0 # sched: [8:0.50]
   6551 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6552 ;
   6553 ; ZNVER1-LABEL: test_packsswb:
   6554 ; ZNVER1:       # %bb.0:
   6555 ; ZNVER1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   6556 ; ZNVER1-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   6557 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6558   %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
   6559   %2 = bitcast <16 x i8> %1 to <8 x i16>
   6560   %3 = load <8 x i16>, <8 x i16> *%a2, align 16
   6561   %4 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %2, <8 x i16> %3)
   6562   ret <16 x i8> %4
   6563 }
   6564 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
   6565 
   6566 define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   6567 ; GENERIC-LABEL: test_packuswb:
   6568 ; GENERIC:       # %bb.0:
   6569 ; GENERIC-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.50]
   6570 ; GENERIC-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:0.50]
   6571 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6572 ;
   6573 ; ATOM-LABEL: test_packuswb:
   6574 ; ATOM:       # %bb.0:
   6575 ; ATOM-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
   6576 ; ATOM-NEXT:    packuswb (%rdi), %xmm0 # sched: [1:1.00]
   6577 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6578 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6579 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6580 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6581 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6582 ;
   6583 ; SLM-LABEL: test_packuswb:
   6584 ; SLM:       # %bb.0:
   6585 ; SLM-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
   6586 ; SLM-NEXT:    packuswb (%rdi), %xmm0 # sched: [4:1.00]
   6587 ; SLM-NEXT:    retq # sched: [4:1.00]
   6588 ;
   6589 ; SANDY-SSE-LABEL: test_packuswb:
   6590 ; SANDY-SSE:       # %bb.0:
   6591 ; SANDY-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.50]
   6592 ; SANDY-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:0.50]
   6593 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6594 ;
   6595 ; SANDY-LABEL: test_packuswb:
   6596 ; SANDY:       # %bb.0:
   6597 ; SANDY-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6598 ; SANDY-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6599 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6600 ;
   6601 ; HASWELL-SSE-LABEL: test_packuswb:
   6602 ; HASWELL-SSE:       # %bb.0:
   6603 ; HASWELL-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
   6604 ; HASWELL-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:1.00]
   6605 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6606 ;
   6607 ; HASWELL-LABEL: test_packuswb:
   6608 ; HASWELL:       # %bb.0:
   6609 ; HASWELL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6610 ; HASWELL-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6611 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6612 ;
   6613 ; BROADWELL-SSE-LABEL: test_packuswb:
   6614 ; BROADWELL-SSE:       # %bb.0:
   6615 ; BROADWELL-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
   6616 ; BROADWELL-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [6:1.00]
   6617 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6618 ;
   6619 ; BROADWELL-LABEL: test_packuswb:
   6620 ; BROADWELL:       # %bb.0:
   6621 ; BROADWELL-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6622 ; BROADWELL-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6623 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6624 ;
   6625 ; SKYLAKE-SSE-LABEL: test_packuswb:
   6626 ; SKYLAKE-SSE:       # %bb.0:
   6627 ; SKYLAKE-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
   6628 ; SKYLAKE-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:1.00]
   6629 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6630 ;
   6631 ; SKYLAKE-LABEL: test_packuswb:
   6632 ; SKYLAKE:       # %bb.0:
   6633 ; SKYLAKE-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6634 ; SKYLAKE-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6635 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6636 ;
   6637 ; SKX-SSE-LABEL: test_packuswb:
   6638 ; SKX-SSE:       # %bb.0:
   6639 ; SKX-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:1.00]
   6640 ; SKX-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [7:1.00]
   6641 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6642 ;
   6643 ; SKX-LABEL: test_packuswb:
   6644 ; SKX:       # %bb.0:
   6645 ; SKX-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6646 ; SKX-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6647 ; SKX-NEXT:    retq # sched: [7:1.00]
   6648 ;
   6649 ; BTVER2-SSE-LABEL: test_packuswb:
   6650 ; BTVER2-SSE:       # %bb.0:
   6651 ; BTVER2-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.50]
   6652 ; BTVER2-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [6:1.00]
   6653 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6654 ;
   6655 ; BTVER2-LABEL: test_packuswb:
   6656 ; BTVER2:       # %bb.0:
   6657 ; BTVER2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6658 ; BTVER2-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6659 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6660 ;
   6661 ; ZNVER1-SSE-LABEL: test_packuswb:
   6662 ; ZNVER1-SSE:       # %bb.0:
   6663 ; ZNVER1-SSE-NEXT:    packuswb %xmm1, %xmm0 # sched: [1:0.25]
   6664 ; ZNVER1-SSE-NEXT:    packuswb (%rdi), %xmm0 # sched: [8:0.50]
   6665 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6666 ;
   6667 ; ZNVER1-LABEL: test_packuswb:
   6668 ; ZNVER1:       # %bb.0:
   6669 ; ZNVER1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   6670 ; ZNVER1-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   6671 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6672   %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
   6673   %2 = bitcast <16 x i8> %1 to <8 x i16>
   6674   %3 = load <8 x i16>, <8 x i16> *%a2, align 16
   6675   %4 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %2, <8 x i16> %3)
   6676   ret <16 x i8> %4
   6677 }
   6678 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
   6679 
   6680 define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   6681 ; GENERIC-LABEL: test_paddb:
   6682 ; GENERIC:       # %bb.0:
   6683 ; GENERIC-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
   6684 ; GENERIC-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
   6685 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6686 ;
   6687 ; ATOM-LABEL: test_paddb:
   6688 ; ATOM:       # %bb.0:
   6689 ; ATOM-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
   6690 ; ATOM-NEXT:    paddb (%rdi), %xmm0 # sched: [1:1.00]
   6691 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6692 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6693 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6694 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6695 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6696 ;
   6697 ; SLM-LABEL: test_paddb:
   6698 ; SLM:       # %bb.0:
   6699 ; SLM-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
   6700 ; SLM-NEXT:    paddb (%rdi), %xmm0 # sched: [4:1.00]
   6701 ; SLM-NEXT:    retq # sched: [4:1.00]
   6702 ;
   6703 ; SANDY-SSE-LABEL: test_paddb:
   6704 ; SANDY-SSE:       # %bb.0:
   6705 ; SANDY-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
   6706 ; SANDY-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
   6707 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6708 ;
   6709 ; SANDY-LABEL: test_paddb:
   6710 ; SANDY:       # %bb.0:
   6711 ; SANDY-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6712 ; SANDY-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6713 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6714 ;
   6715 ; HASWELL-SSE-LABEL: test_paddb:
   6716 ; HASWELL-SSE:       # %bb.0:
   6717 ; HASWELL-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
   6718 ; HASWELL-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
   6719 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6720 ;
   6721 ; HASWELL-LABEL: test_paddb:
   6722 ; HASWELL:       # %bb.0:
   6723 ; HASWELL-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6724 ; HASWELL-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6725 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6726 ;
   6727 ; BROADWELL-SSE-LABEL: test_paddb:
   6728 ; BROADWELL-SSE:       # %bb.0:
   6729 ; BROADWELL-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
   6730 ; BROADWELL-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [6:0.50]
   6731 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6732 ;
   6733 ; BROADWELL-LABEL: test_paddb:
   6734 ; BROADWELL:       # %bb.0:
   6735 ; BROADWELL-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6736 ; BROADWELL-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   6737 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6738 ;
   6739 ; SKYLAKE-SSE-LABEL: test_paddb:
   6740 ; SKYLAKE-SSE:       # %bb.0:
   6741 ; SKYLAKE-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.33]
   6742 ; SKYLAKE-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
   6743 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6744 ;
   6745 ; SKYLAKE-LABEL: test_paddb:
   6746 ; SKYLAKE:       # %bb.0:
   6747 ; SKYLAKE-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6748 ; SKYLAKE-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6749 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6750 ;
   6751 ; SKX-SSE-LABEL: test_paddb:
   6752 ; SKX-SSE:       # %bb.0:
   6753 ; SKX-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.33]
   6754 ; SKX-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [7:0.50]
   6755 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6756 ;
   6757 ; SKX-LABEL: test_paddb:
   6758 ; SKX:       # %bb.0:
   6759 ; SKX-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6760 ; SKX-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6761 ; SKX-NEXT:    retq # sched: [7:1.00]
   6762 ;
   6763 ; BTVER2-SSE-LABEL: test_paddb:
   6764 ; BTVER2-SSE:       # %bb.0:
   6765 ; BTVER2-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.50]
   6766 ; BTVER2-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [6:1.00]
   6767 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6768 ;
   6769 ; BTVER2-LABEL: test_paddb:
   6770 ; BTVER2:       # %bb.0:
   6771 ; BTVER2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6772 ; BTVER2-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6773 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6774 ;
   6775 ; ZNVER1-SSE-LABEL: test_paddb:
   6776 ; ZNVER1-SSE:       # %bb.0:
   6777 ; ZNVER1-SSE-NEXT:    paddb %xmm1, %xmm0 # sched: [1:0.25]
   6778 ; ZNVER1-SSE-NEXT:    paddb (%rdi), %xmm0 # sched: [8:0.50]
   6779 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6780 ;
   6781 ; ZNVER1-LABEL: test_paddb:
   6782 ; ZNVER1:       # %bb.0:
   6783 ; ZNVER1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   6784 ; ZNVER1-NEXT:    vpaddb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   6785 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6786   %1 = add <16 x i8> %a0, %a1
   6787   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   6788   %3 = add <16 x i8> %1, %2
   6789   ret <16 x i8> %3
   6790 }
   6791 
   6792 define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   6793 ; GENERIC-LABEL: test_paddd:
   6794 ; GENERIC:       # %bb.0:
   6795 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   6796 ; GENERIC-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
   6797 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6798 ;
   6799 ; ATOM-LABEL: test_paddd:
   6800 ; ATOM:       # %bb.0:
   6801 ; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   6802 ; ATOM-NEXT:    paddd (%rdi), %xmm0 # sched: [1:1.00]
   6803 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6804 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6805 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6806 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6807 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6808 ;
   6809 ; SLM-LABEL: test_paddd:
   6810 ; SLM:       # %bb.0:
   6811 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   6812 ; SLM-NEXT:    paddd (%rdi), %xmm0 # sched: [4:1.00]
   6813 ; SLM-NEXT:    retq # sched: [4:1.00]
   6814 ;
   6815 ; SANDY-SSE-LABEL: test_paddd:
   6816 ; SANDY-SSE:       # %bb.0:
   6817 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   6818 ; SANDY-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
   6819 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6820 ;
   6821 ; SANDY-LABEL: test_paddd:
   6822 ; SANDY:       # %bb.0:
   6823 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6824 ; SANDY-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6825 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6826 ;
   6827 ; HASWELL-SSE-LABEL: test_paddd:
   6828 ; HASWELL-SSE:       # %bb.0:
   6829 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   6830 ; HASWELL-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
   6831 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6832 ;
   6833 ; HASWELL-LABEL: test_paddd:
   6834 ; HASWELL:       # %bb.0:
   6835 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6836 ; HASWELL-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6837 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6838 ;
   6839 ; BROADWELL-SSE-LABEL: test_paddd:
   6840 ; BROADWELL-SSE:       # %bb.0:
   6841 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   6842 ; BROADWELL-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [6:0.50]
   6843 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6844 ;
   6845 ; BROADWELL-LABEL: test_paddd:
   6846 ; BROADWELL:       # %bb.0:
   6847 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6848 ; BROADWELL-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   6849 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6850 ;
   6851 ; SKYLAKE-SSE-LABEL: test_paddd:
   6852 ; SKYLAKE-SSE:       # %bb.0:
   6853 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   6854 ; SKYLAKE-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
   6855 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6856 ;
   6857 ; SKYLAKE-LABEL: test_paddd:
   6858 ; SKYLAKE:       # %bb.0:
   6859 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6860 ; SKYLAKE-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6861 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6862 ;
   6863 ; SKX-SSE-LABEL: test_paddd:
   6864 ; SKX-SSE:       # %bb.0:
   6865 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   6866 ; SKX-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [7:0.50]
   6867 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6868 ;
   6869 ; SKX-LABEL: test_paddd:
   6870 ; SKX:       # %bb.0:
   6871 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6872 ; SKX-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6873 ; SKX-NEXT:    retq # sched: [7:1.00]
   6874 ;
   6875 ; BTVER2-SSE-LABEL: test_paddd:
   6876 ; BTVER2-SSE:       # %bb.0:
   6877 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   6878 ; BTVER2-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [6:1.00]
   6879 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6880 ;
   6881 ; BTVER2-LABEL: test_paddd:
   6882 ; BTVER2:       # %bb.0:
   6883 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6884 ; BTVER2-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6885 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6886 ;
   6887 ; ZNVER1-SSE-LABEL: test_paddd:
   6888 ; ZNVER1-SSE:       # %bb.0:
   6889 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   6890 ; ZNVER1-SSE-NEXT:    paddd (%rdi), %xmm0 # sched: [8:0.50]
   6891 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6892 ;
   6893 ; ZNVER1-LABEL: test_paddd:
   6894 ; ZNVER1:       # %bb.0:
   6895 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   6896 ; ZNVER1-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   6897 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6898   %1 = add <4 x i32> %a0, %a1
   6899   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   6900   %3 = add <4 x i32> %1, %2
   6901   ret <4 x i32> %3
   6902 }
   6903 
   6904 define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   6905 ; GENERIC-LABEL: test_paddq:
   6906 ; GENERIC:       # %bb.0:
   6907 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   6908 ; GENERIC-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
   6909 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6910 ;
   6911 ; ATOM-LABEL: test_paddq:
   6912 ; ATOM:       # %bb.0:
   6913 ; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
   6914 ; ATOM-NEXT:    paddq (%rdi), %xmm0 # sched: [3:1.50]
   6915 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6916 ;
   6917 ; SLM-LABEL: test_paddq:
   6918 ; SLM:       # %bb.0:
   6919 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   6920 ; SLM-NEXT:    paddq (%rdi), %xmm0 # sched: [4:1.00]
   6921 ; SLM-NEXT:    retq # sched: [4:1.00]
   6922 ;
   6923 ; SANDY-SSE-LABEL: test_paddq:
   6924 ; SANDY-SSE:       # %bb.0:
   6925 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   6926 ; SANDY-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
   6927 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6928 ;
   6929 ; SANDY-LABEL: test_paddq:
   6930 ; SANDY:       # %bb.0:
   6931 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6932 ; SANDY-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6933 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6934 ;
   6935 ; HASWELL-SSE-LABEL: test_paddq:
   6936 ; HASWELL-SSE:       # %bb.0:
   6937 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   6938 ; HASWELL-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
   6939 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6940 ;
   6941 ; HASWELL-LABEL: test_paddq:
   6942 ; HASWELL:       # %bb.0:
   6943 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6944 ; HASWELL-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6945 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6946 ;
   6947 ; BROADWELL-SSE-LABEL: test_paddq:
   6948 ; BROADWELL-SSE:       # %bb.0:
   6949 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   6950 ; BROADWELL-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [6:0.50]
   6951 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6952 ;
   6953 ; BROADWELL-LABEL: test_paddq:
   6954 ; BROADWELL:       # %bb.0:
   6955 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6956 ; BROADWELL-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   6957 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6958 ;
   6959 ; SKYLAKE-SSE-LABEL: test_paddq:
   6960 ; SKYLAKE-SSE:       # %bb.0:
   6961 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   6962 ; SKYLAKE-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
   6963 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6964 ;
   6965 ; SKYLAKE-LABEL: test_paddq:
   6966 ; SKYLAKE:       # %bb.0:
   6967 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6968 ; SKYLAKE-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6969 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6970 ;
   6971 ; SKX-SSE-LABEL: test_paddq:
   6972 ; SKX-SSE:       # %bb.0:
   6973 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   6974 ; SKX-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [7:0.50]
   6975 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6976 ;
   6977 ; SKX-LABEL: test_paddq:
   6978 ; SKX:       # %bb.0:
   6979 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6980 ; SKX-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6981 ; SKX-NEXT:    retq # sched: [7:1.00]
   6982 ;
   6983 ; BTVER2-SSE-LABEL: test_paddq:
   6984 ; BTVER2-SSE:       # %bb.0:
   6985 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   6986 ; BTVER2-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [6:1.00]
   6987 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6988 ;
   6989 ; BTVER2-LABEL: test_paddq:
   6990 ; BTVER2:       # %bb.0:
   6991 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6992 ; BTVER2-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6993 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6994 ;
   6995 ; ZNVER1-SSE-LABEL: test_paddq:
   6996 ; ZNVER1-SSE:       # %bb.0:
   6997 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   6998 ; ZNVER1-SSE-NEXT:    paddq (%rdi), %xmm0 # sched: [8:0.50]
   6999 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   7000 ;
   7001 ; ZNVER1-LABEL: test_paddq:
   7002 ; ZNVER1:       # %bb.0:
   7003 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7004 ; ZNVER1-NEXT:    vpaddq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   7005 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   7006   %1 = add <2 x i64> %a0, %a1
   7007   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   7008   %3 = add <2 x i64> %1, %2
   7009   ret <2 x i64> %3
   7010 }
   7011 
   7012 define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   7013 ; GENERIC-LABEL: test_paddsb:
   7014 ; GENERIC:       # %bb.0:
   7015 ; GENERIC-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
   7016 ; GENERIC-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
   7017 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7018 ;
   7019 ; ATOM-LABEL: test_paddsb:
   7020 ; ATOM:       # %bb.0:
   7021 ; ATOM-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
   7022 ; ATOM-NEXT:    paddsb (%rdi), %xmm0 # sched: [1:1.00]
   7023 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7024 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7025 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7026 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7027 ; ATOM-NEXT:    retq # sched: [79:39.50]
   7028 ;
   7029 ; SLM-LABEL: test_paddsb:
   7030 ; SLM:       # %bb.0:
   7031 ; SLM-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
   7032 ; SLM-NEXT:    paddsb (%rdi), %xmm0 # sched: [4:1.00]
   7033 ; SLM-NEXT:    retq # sched: [4:1.00]
   7034 ;
   7035 ; SANDY-SSE-LABEL: test_paddsb:
   7036 ; SANDY-SSE:       # %bb.0:
   7037 ; SANDY-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
   7038 ; SANDY-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
   7039 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   7040 ;
   7041 ; SANDY-LABEL: test_paddsb:
   7042 ; SANDY:       # %bb.0:
   7043 ; SANDY-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7044 ; SANDY-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7045 ; SANDY-NEXT:    retq # sched: [1:1.00]
   7046 ;
   7047 ; HASWELL-SSE-LABEL: test_paddsb:
   7048 ; HASWELL-SSE:       # %bb.0:
   7049 ; HASWELL-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
   7050 ; HASWELL-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
   7051 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7052 ;
   7053 ; HASWELL-LABEL: test_paddsb:
   7054 ; HASWELL:       # %bb.0:
   7055 ; HASWELL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7056 ; HASWELL-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7057 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   7058 ;
   7059 ; BROADWELL-SSE-LABEL: test_paddsb:
   7060 ; BROADWELL-SSE:       # %bb.0:
   7061 ; BROADWELL-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
   7062 ; BROADWELL-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [6:0.50]
   7063 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7064 ;
   7065 ; BROADWELL-LABEL: test_paddsb:
   7066 ; BROADWELL:       # %bb.0:
   7067 ; BROADWELL-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7068 ; BROADWELL-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   7069 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   7070 ;
   7071 ; SKYLAKE-SSE-LABEL: test_paddsb:
   7072 ; SKYLAKE-SSE:       # %bb.0:
   7073 ; SKYLAKE-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
   7074 ; SKYLAKE-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
   7075 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   7076 ;
   7077 ; SKYLAKE-LABEL: test_paddsb:
   7078 ; SKYLAKE:       # %bb.0:
   7079 ; SKYLAKE-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7080 ; SKYLAKE-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7081 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   7082 ;
   7083 ; SKX-SSE-LABEL: test_paddsb:
   7084 ; SKX-SSE:       # %bb.0:
   7085 ; SKX-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
   7086 ; SKX-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [7:0.50]
   7087 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   7088 ;
   7089 ; SKX-LABEL: test_paddsb:
   7090 ; SKX:       # %bb.0:
   7091 ; SKX-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7092 ; SKX-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7093 ; SKX-NEXT:    retq # sched: [7:1.00]
   7094 ;
   7095 ; BTVER2-SSE-LABEL: test_paddsb:
   7096 ; BTVER2-SSE:       # %bb.0:
   7097 ; BTVER2-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.50]
   7098 ; BTVER2-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [6:1.00]
   7099 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   7100 ;
   7101 ; BTVER2-LABEL: test_paddsb:
   7102 ; BTVER2:       # %bb.0:
   7103 ; BTVER2-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7104 ; BTVER2-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   7105 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   7106 ;
   7107 ; ZNVER1-SSE-LABEL: test_paddsb:
   7108 ; ZNVER1-SSE:       # %bb.0:
   7109 ; ZNVER1-SSE-NEXT:    paddsb %xmm1, %xmm0 # sched: [1:0.25]
   7110 ; ZNVER1-SSE-NEXT:    paddsb (%rdi), %xmm0 # sched: [8:0.50]
   7111 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   7112 ;
   7113 ; ZNVER1-LABEL: test_paddsb:
   7114 ; ZNVER1:       # %bb.0:
   7115 ; ZNVER1-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7116 ; ZNVER1-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   7117 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   7118   %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1)
   7119   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   7120   %3 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %1, <16 x i8> %2)
   7121   ret <16 x i8> %3
   7122 }
   7123 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
   7124 
   7125 define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   7126 ; GENERIC-LABEL: test_paddsw:
   7127 ; GENERIC:       # %bb.0:
   7128 ; GENERIC-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
   7129 ; GENERIC-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
   7130 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7131 ;
   7132 ; ATOM-LABEL: test_paddsw:
   7133 ; ATOM:       # %bb.0:
   7134 ; ATOM-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
   7135 ; ATOM-NEXT:    paddsw (%rdi), %xmm0 # sched: [1:1.00]
   7136 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7137 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7138 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7139 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7140 ; ATOM-NEXT:    retq # sched: [79:39.50]
   7141 ;
   7142 ; SLM-LABEL: test_paddsw:
   7143 ; SLM:       # %bb.0:
   7144 ; SLM-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
   7145 ; SLM-NEXT:    paddsw (%rdi), %xmm0 # sched: [4:1.00]
   7146 ; SLM-NEXT:    retq # sched: [4:1.00]
   7147 ;
   7148 ; SANDY-SSE-LABEL: test_paddsw:
   7149 ; SANDY-SSE:       # %bb.0:
   7150 ; SANDY-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
   7151 ; SANDY-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
   7152 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   7153 ;
   7154 ; SANDY-LABEL: test_paddsw:
   7155 ; SANDY:       # %bb.0:
   7156 ; SANDY-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7157 ; SANDY-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7158 ; SANDY-NEXT:    retq # sched: [1:1.00]
   7159 ;
   7160 ; HASWELL-SSE-LABEL: test_paddsw:
   7161 ; HASWELL-SSE:       # %bb.0:
   7162 ; HASWELL-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
   7163 ; HASWELL-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
   7164 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7165 ;
   7166 ; HASWELL-LABEL: test_paddsw:
   7167 ; HASWELL:       # %bb.0:
   7168 ; HASWELL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7169 ; HASWELL-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7170 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   7171 ;
   7172 ; BROADWELL-SSE-LABEL: test_paddsw:
   7173 ; BROADWELL-SSE:       # %bb.0:
   7174 ; BROADWELL-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
   7175 ; BROADWELL-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [6:0.50]
   7176 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7177 ;
   7178 ; BROADWELL-LABEL: test_paddsw:
   7179 ; BROADWELL:       # %bb.0:
   7180 ; BROADWELL-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7181 ; BROADWELL-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   7182 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   7183 ;
   7184 ; SKYLAKE-SSE-LABEL: test_paddsw:
   7185 ; SKYLAKE-SSE:       # %bb.0:
   7186 ; SKYLAKE-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
   7187 ; SKYLAKE-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
   7188 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   7189 ;
   7190 ; SKYLAKE-LABEL: test_paddsw:
   7191 ; SKYLAKE:       # %bb.0:
   7192 ; SKYLAKE-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7193 ; SKYLAKE-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7194 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   7195 ;
   7196 ; SKX-SSE-LABEL: test_paddsw:
   7197 ; SKX-SSE:       # %bb.0:
   7198 ; SKX-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
   7199 ; SKX-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [7:0.50]
   7200 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   7201 ;
   7202 ; SKX-LABEL: test_paddsw:
   7203 ; SKX:       # %bb.0:
   7204 ; SKX-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7205 ; SKX-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7206 ; SKX-NEXT:    retq # sched: [7:1.00]
   7207 ;
   7208 ; BTVER2-SSE-LABEL: test_paddsw:
   7209 ; BTVER2-SSE:       # %bb.0:
   7210 ; BTVER2-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.50]
   7211 ; BTVER2-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [6:1.00]
   7212 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   7213 ;
   7214 ; BTVER2-LABEL: test_paddsw:
   7215 ; BTVER2:       # %bb.0:
   7216 ; BTVER2-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7217 ; BTVER2-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   7218 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   7219 ;
   7220 ; ZNVER1-SSE-LABEL: test_paddsw:
   7221 ; ZNVER1-SSE:       # %bb.0:
   7222 ; ZNVER1-SSE-NEXT:    paddsw %xmm1, %xmm0 # sched: [1:0.25]
   7223 ; ZNVER1-SSE-NEXT:    paddsw (%rdi), %xmm0 # sched: [8:0.50]
   7224 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   7225 ;
   7226 ; ZNVER1-LABEL: test_paddsw:
   7227 ; ZNVER1:       # %bb.0:
   7228 ; ZNVER1-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7229 ; ZNVER1-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   7230 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   7231   %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1)
   7232   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   7233   %3 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %1, <8 x i16> %2)
   7234   ret <8 x i16> %3
   7235 }
   7236 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
   7237 
   7238 define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   7239 ; GENERIC-LABEL: test_paddusb:
   7240 ; GENERIC:       # %bb.0:
   7241 ; GENERIC-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
   7242 ; GENERIC-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
   7243 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7244 ;
   7245 ; ATOM-LABEL: test_paddusb:
   7246 ; ATOM:       # %bb.0:
   7247 ; ATOM-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
   7248 ; ATOM-NEXT:    paddusb (%rdi), %xmm0 # sched: [1:1.00]
   7249 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7250 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7251 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7252 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7253 ; ATOM-NEXT:    retq # sched: [79:39.50]
   7254 ;
   7255 ; SLM-LABEL: test_paddusb:
   7256 ; SLM:       # %bb.0:
   7257 ; SLM-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
   7258 ; SLM-NEXT:    paddusb (%rdi), %xmm0 # sched: [4:1.00]
   7259 ; SLM-NEXT:    retq # sched: [4:1.00]
   7260 ;
   7261 ; SANDY-SSE-LABEL: test_paddusb:
   7262 ; SANDY-SSE:       # %bb.0:
   7263 ; SANDY-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
   7264 ; SANDY-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
   7265 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   7266 ;
   7267 ; SANDY-LABEL: test_paddusb:
   7268 ; SANDY:       # %bb.0:
   7269 ; SANDY-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7270 ; SANDY-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7271 ; SANDY-NEXT:    retq # sched: [1:1.00]
   7272 ;
   7273 ; HASWELL-SSE-LABEL: test_paddusb:
   7274 ; HASWELL-SSE:       # %bb.0:
   7275 ; HASWELL-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
   7276 ; HASWELL-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
   7277 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7278 ;
   7279 ; HASWELL-LABEL: test_paddusb:
   7280 ; HASWELL:       # %bb.0:
   7281 ; HASWELL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7282 ; HASWELL-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7283 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   7284 ;
   7285 ; BROADWELL-SSE-LABEL: test_paddusb:
   7286 ; BROADWELL-SSE:       # %bb.0:
   7287 ; BROADWELL-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
   7288 ; BROADWELL-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [6:0.50]
   7289 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7290 ;
   7291 ; BROADWELL-LABEL: test_paddusb:
   7292 ; BROADWELL:       # %bb.0:
   7293 ; BROADWELL-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7294 ; BROADWELL-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   7295 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   7296 ;
   7297 ; SKYLAKE-SSE-LABEL: test_paddusb:
   7298 ; SKYLAKE-SSE:       # %bb.0:
   7299 ; SKYLAKE-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
   7300 ; SKYLAKE-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
   7301 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   7302 ;
   7303 ; SKYLAKE-LABEL: test_paddusb:
   7304 ; SKYLAKE:       # %bb.0:
   7305 ; SKYLAKE-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7306 ; SKYLAKE-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7307 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   7308 ;
   7309 ; SKX-SSE-LABEL: test_paddusb:
   7310 ; SKX-SSE:       # %bb.0:
   7311 ; SKX-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
   7312 ; SKX-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [7:0.50]
   7313 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   7314 ;
   7315 ; SKX-LABEL: test_paddusb:
   7316 ; SKX:       # %bb.0:
   7317 ; SKX-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7318 ; SKX-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7319 ; SKX-NEXT:    retq # sched: [7:1.00]
   7320 ;
   7321 ; BTVER2-SSE-LABEL: test_paddusb:
   7322 ; BTVER2-SSE:       # %bb.0:
   7323 ; BTVER2-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.50]
   7324 ; BTVER2-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [6:1.00]
   7325 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   7326 ;
   7327 ; BTVER2-LABEL: test_paddusb:
   7328 ; BTVER2:       # %bb.0:
   7329 ; BTVER2-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7330 ; BTVER2-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   7331 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   7332 ;
   7333 ; ZNVER1-SSE-LABEL: test_paddusb:
   7334 ; ZNVER1-SSE:       # %bb.0:
   7335 ; ZNVER1-SSE-NEXT:    paddusb %xmm1, %xmm0 # sched: [1:0.25]
   7336 ; ZNVER1-SSE-NEXT:    paddusb (%rdi), %xmm0 # sched: [8:0.50]
   7337 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   7338 ;
   7339 ; ZNVER1-LABEL: test_paddusb:
   7340 ; ZNVER1:       # %bb.0:
   7341 ; ZNVER1-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7342 ; ZNVER1-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   7343 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   7344   %1 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1)
   7345   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   7346   %3 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %1, <16 x i8> %2)
   7347   ret <16 x i8> %3
   7348 }
   7349 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
   7350 
   7351 define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   7352 ; GENERIC-LABEL: test_paddusw:
   7353 ; GENERIC:       # %bb.0:
   7354 ; GENERIC-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
   7355 ; GENERIC-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
   7356 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7357 ;
   7358 ; ATOM-LABEL: test_paddusw:
   7359 ; ATOM:       # %bb.0:
   7360 ; ATOM-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
   7361 ; ATOM-NEXT:    paddusw (%rdi), %xmm0 # sched: [1:1.00]
   7362 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7363 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7364 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7365 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7366 ; ATOM-NEXT:    retq # sched: [79:39.50]
   7367 ;
   7368 ; SLM-LABEL: test_paddusw:
   7369 ; SLM:       # %bb.0:
   7370 ; SLM-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
   7371 ; SLM-NEXT:    paddusw (%rdi), %xmm0 # sched: [4:1.00]
   7372 ; SLM-NEXT:    retq # sched: [4:1.00]
   7373 ;
   7374 ; SANDY-SSE-LABEL: test_paddusw:
   7375 ; SANDY-SSE:       # %bb.0:
   7376 ; SANDY-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
   7377 ; SANDY-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
   7378 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   7379 ;
   7380 ; SANDY-LABEL: test_paddusw:
   7381 ; SANDY:       # %bb.0:
   7382 ; SANDY-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7383 ; SANDY-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7384 ; SANDY-NEXT:    retq # sched: [1:1.00]
   7385 ;
   7386 ; HASWELL-SSE-LABEL: test_paddusw:
   7387 ; HASWELL-SSE:       # %bb.0:
   7388 ; HASWELL-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
   7389 ; HASWELL-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
   7390 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7391 ;
   7392 ; HASWELL-LABEL: test_paddusw:
   7393 ; HASWELL:       # %bb.0:
   7394 ; HASWELL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7395 ; HASWELL-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7396 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   7397 ;
   7398 ; BROADWELL-SSE-LABEL: test_paddusw:
   7399 ; BROADWELL-SSE:       # %bb.0:
   7400 ; BROADWELL-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
   7401 ; BROADWELL-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [6:0.50]
   7402 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7403 ;
   7404 ; BROADWELL-LABEL: test_paddusw:
   7405 ; BROADWELL:       # %bb.0:
   7406 ; BROADWELL-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7407 ; BROADWELL-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   7408 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   7409 ;
   7410 ; SKYLAKE-SSE-LABEL: test_paddusw:
   7411 ; SKYLAKE-SSE:       # %bb.0:
   7412 ; SKYLAKE-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
   7413 ; SKYLAKE-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
   7414 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   7415 ;
   7416 ; SKYLAKE-LABEL: test_paddusw:
   7417 ; SKYLAKE:       # %bb.0:
   7418 ; SKYLAKE-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7419 ; SKYLAKE-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7420 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   7421 ;
   7422 ; SKX-SSE-LABEL: test_paddusw:
   7423 ; SKX-SSE:       # %bb.0:
   7424 ; SKX-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
   7425 ; SKX-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [7:0.50]
   7426 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   7427 ;
   7428 ; SKX-LABEL: test_paddusw:
   7429 ; SKX:       # %bb.0:
   7430 ; SKX-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7431 ; SKX-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7432 ; SKX-NEXT:    retq # sched: [7:1.00]
   7433 ;
   7434 ; BTVER2-SSE-LABEL: test_paddusw:
   7435 ; BTVER2-SSE:       # %bb.0:
   7436 ; BTVER2-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.50]
   7437 ; BTVER2-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [6:1.00]
   7438 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   7439 ;
   7440 ; BTVER2-LABEL: test_paddusw:
   7441 ; BTVER2:       # %bb.0:
   7442 ; BTVER2-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7443 ; BTVER2-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   7444 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   7445 ;
   7446 ; ZNVER1-SSE-LABEL: test_paddusw:
   7447 ; ZNVER1-SSE:       # %bb.0:
   7448 ; ZNVER1-SSE-NEXT:    paddusw %xmm1, %xmm0 # sched: [1:0.25]
   7449 ; ZNVER1-SSE-NEXT:    paddusw (%rdi), %xmm0 # sched: [8:0.50]
   7450 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   7451 ;
   7452 ; ZNVER1-LABEL: test_paddusw:
   7453 ; ZNVER1:       # %bb.0:
   7454 ; ZNVER1-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7455 ; ZNVER1-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   7456 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   7457   %1 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1)
   7458   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   7459   %3 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %1, <8 x i16> %2)
   7460   ret <8 x i16> %3
   7461 }
   7462 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
   7463 
   7464 define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   7465 ; GENERIC-LABEL: test_paddw:
   7466 ; GENERIC:       # %bb.0:
   7467 ; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   7468 ; GENERIC-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
   7469 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7470 ;
   7471 ; ATOM-LABEL: test_paddw:
   7472 ; ATOM:       # %bb.0:
   7473 ; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   7474 ; ATOM-NEXT:    paddw (%rdi), %xmm0 # sched: [1:1.00]
   7475 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7476 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7477 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7478 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7479 ; ATOM-NEXT:    retq # sched: [79:39.50]
   7480 ;
   7481 ; SLM-LABEL: test_paddw:
   7482 ; SLM:       # %bb.0:
   7483 ; SLM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   7484 ; SLM-NEXT:    paddw (%rdi), %xmm0 # sched: [4:1.00]
   7485 ; SLM-NEXT:    retq # sched: [4:1.00]
   7486 ;
   7487 ; SANDY-SSE-LABEL: test_paddw:
   7488 ; SANDY-SSE:       # %bb.0:
   7489 ; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   7490 ; SANDY-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
   7491 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   7492 ;
   7493 ; SANDY-LABEL: test_paddw:
   7494 ; SANDY:       # %bb.0:
   7495 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7496 ; SANDY-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7497 ; SANDY-NEXT:    retq # sched: [1:1.00]
   7498 ;
   7499 ; HASWELL-SSE-LABEL: test_paddw:
   7500 ; HASWELL-SSE:       # %bb.0:
   7501 ; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   7502 ; HASWELL-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
   7503 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7504 ;
   7505 ; HASWELL-LABEL: test_paddw:
   7506 ; HASWELL:       # %bb.0:
   7507 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7508 ; HASWELL-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7509 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   7510 ;
   7511 ; BROADWELL-SSE-LABEL: test_paddw:
   7512 ; BROADWELL-SSE:       # %bb.0:
   7513 ; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   7514 ; BROADWELL-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [6:0.50]
   7515 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7516 ;
   7517 ; BROADWELL-LABEL: test_paddw:
   7518 ; BROADWELL:       # %bb.0:
   7519 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7520 ; BROADWELL-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   7521 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   7522 ;
   7523 ; SKYLAKE-SSE-LABEL: test_paddw:
   7524 ; SKYLAKE-SSE:       # %bb.0:
   7525 ; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   7526 ; SKYLAKE-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
   7527 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   7528 ;
   7529 ; SKYLAKE-LABEL: test_paddw:
   7530 ; SKYLAKE:       # %bb.0:
   7531 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7532 ; SKYLAKE-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7533 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   7534 ;
   7535 ; SKX-SSE-LABEL: test_paddw:
   7536 ; SKX-SSE:       # %bb.0:
   7537 ; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   7538 ; SKX-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [7:0.50]
   7539 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   7540 ;
   7541 ; SKX-LABEL: test_paddw:
   7542 ; SKX:       # %bb.0:
   7543 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7544 ; SKX-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7545 ; SKX-NEXT:    retq # sched: [7:1.00]
   7546 ;
   7547 ; BTVER2-SSE-LABEL: test_paddw:
   7548 ; BTVER2-SSE:       # %bb.0:
   7549 ; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   7550 ; BTVER2-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [6:1.00]
   7551 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   7552 ;
   7553 ; BTVER2-LABEL: test_paddw:
   7554 ; BTVER2:       # %bb.0:
   7555 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7556 ; BTVER2-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   7557 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   7558 ;
   7559 ; ZNVER1-SSE-LABEL: test_paddw:
   7560 ; ZNVER1-SSE:       # %bb.0:
   7561 ; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
   7562 ; ZNVER1-SSE-NEXT:    paddw (%rdi), %xmm0 # sched: [8:0.50]
   7563 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   7564 ;
   7565 ; ZNVER1-LABEL: test_paddw:
   7566 ; ZNVER1:       # %bb.0:
   7567 ; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7568 ; ZNVER1-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   7569 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   7570   %1 = add <8 x i16> %a0, %a1
   7571   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   7572   %3 = add <8 x i16> %1, %2
   7573   ret <8 x i16> %3
   7574 }
   7575 
   7576 define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   7577 ; GENERIC-LABEL: test_pand:
   7578 ; GENERIC:       # %bb.0:
   7579 ; GENERIC-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
   7580 ; GENERIC-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
   7581 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   7582 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7583 ;
   7584 ; ATOM-LABEL: test_pand:
   7585 ; ATOM:       # %bb.0:
   7586 ; ATOM-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.50]
   7587 ; ATOM-NEXT:    pand (%rdi), %xmm0 # sched: [1:1.00]
   7588 ; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
   7589 ; ATOM-NEXT:    retq # sched: [79:39.50]
   7590 ;
   7591 ; SLM-LABEL: test_pand:
   7592 ; SLM:       # %bb.0:
   7593 ; SLM-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.50]
   7594 ; SLM-NEXT:    pand (%rdi), %xmm0 # sched: [4:1.00]
   7595 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   7596 ; SLM-NEXT:    retq # sched: [4:1.00]
   7597 ;
   7598 ; SANDY-SSE-LABEL: test_pand:
   7599 ; SANDY-SSE:       # %bb.0:
   7600 ; SANDY-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
   7601 ; SANDY-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
   7602 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   7603 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   7604 ;
   7605 ; SANDY-LABEL: test_pand:
   7606 ; SANDY:       # %bb.0:
   7607 ; SANDY-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7608 ; SANDY-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7609 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7610 ; SANDY-NEXT:    retq # sched: [1:1.00]
   7611 ;
   7612 ; HASWELL-SSE-LABEL: test_pand:
   7613 ; HASWELL-SSE:       # %bb.0:
   7614 ; HASWELL-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
   7615 ; HASWELL-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
   7616 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   7617 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7618 ;
   7619 ; HASWELL-LABEL: test_pand:
   7620 ; HASWELL:       # %bb.0:
   7621 ; HASWELL-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7622 ; HASWELL-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7623 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7624 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   7625 ;
   7626 ; BROADWELL-SSE-LABEL: test_pand:
   7627 ; BROADWELL-SSE:       # %bb.0:
   7628 ; BROADWELL-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
   7629 ; BROADWELL-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [6:0.50]
   7630 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   7631 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7632 ;
   7633 ; BROADWELL-LABEL: test_pand:
   7634 ; BROADWELL:       # %bb.0:
   7635 ; BROADWELL-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7636 ; BROADWELL-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   7637 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7638 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   7639 ;
   7640 ; SKYLAKE-SSE-LABEL: test_pand:
   7641 ; SKYLAKE-SSE:       # %bb.0:
   7642 ; SKYLAKE-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
   7643 ; SKYLAKE-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
   7644 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   7645 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   7646 ;
   7647 ; SKYLAKE-LABEL: test_pand:
   7648 ; SKYLAKE:       # %bb.0:
   7649 ; SKYLAKE-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7650 ; SKYLAKE-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7651 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7652 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   7653 ;
   7654 ; SKX-SSE-LABEL: test_pand:
   7655 ; SKX-SSE:       # %bb.0:
   7656 ; SKX-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.33]
   7657 ; SKX-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [7:0.50]
   7658 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   7659 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   7660 ;
   7661 ; SKX-LABEL: test_pand:
   7662 ; SKX:       # %bb.0:
   7663 ; SKX-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7664 ; SKX-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7665 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7666 ; SKX-NEXT:    retq # sched: [7:1.00]
   7667 ;
   7668 ; BTVER2-SSE-LABEL: test_pand:
   7669 ; BTVER2-SSE:       # %bb.0:
   7670 ; BTVER2-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.50]
   7671 ; BTVER2-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [6:1.00]
   7672 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   7673 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   7674 ;
   7675 ; BTVER2-LABEL: test_pand:
   7676 ; BTVER2:       # %bb.0:
   7677 ; BTVER2-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7678 ; BTVER2-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   7679 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7680 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   7681 ;
   7682 ; ZNVER1-SSE-LABEL: test_pand:
   7683 ; ZNVER1-SSE:       # %bb.0:
   7684 ; ZNVER1-SSE-NEXT:    pand %xmm1, %xmm0 # sched: [1:0.25]
   7685 ; ZNVER1-SSE-NEXT:    pand (%rdi), %xmm0 # sched: [8:0.50]
   7686 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   7687 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   7688 ;
   7689 ; ZNVER1-LABEL: test_pand:
   7690 ; ZNVER1:       # %bb.0:
   7691 ; ZNVER1-NEXT:    vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7692 ; ZNVER1-NEXT:    vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   7693 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7694 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   7695   %1 = and <2 x i64> %a0, %a1
   7696   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   7697   %3 = and <2 x i64> %1, %2
   7698   %4 = add <2 x i64> %3, %a1
   7699   ret <2 x i64> %4
   7700 }
   7701 
   7702 define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   7703 ; GENERIC-LABEL: test_pandn:
   7704 ; GENERIC:       # %bb.0:
   7705 ; GENERIC-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
   7706 ; GENERIC-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
   7707 ; GENERIC-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
   7708 ; GENERIC-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   7709 ; GENERIC-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
   7710 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7711 ;
   7712 ; ATOM-LABEL: test_pandn:
   7713 ; ATOM:       # %bb.0:
   7714 ; ATOM-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.50]
   7715 ; ATOM-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.50]
   7716 ; ATOM-NEXT:    pandn (%rdi), %xmm1 # sched: [1:1.00]
   7717 ; ATOM-NEXT:    paddq %xmm0, %xmm1 # sched: [2:1.00]
   7718 ; ATOM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   7719 ; ATOM-NEXT:    retq # sched: [79:39.50]
   7720 ;
   7721 ; SLM-LABEL: test_pandn:
   7722 ; SLM:       # %bb.0:
   7723 ; SLM-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.50]
   7724 ; SLM-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.50]
   7725 ; SLM-NEXT:    pandn (%rdi), %xmm1 # sched: [4:1.00]
   7726 ; SLM-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   7727 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   7728 ; SLM-NEXT:    retq # sched: [4:1.00]
   7729 ;
   7730 ; SANDY-SSE-LABEL: test_pandn:
   7731 ; SANDY-SSE:       # %bb.0:
   7732 ; SANDY-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
   7733 ; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
   7734 ; SANDY-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
   7735 ; SANDY-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   7736 ; SANDY-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
   7737 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   7738 ;
   7739 ; SANDY-LABEL: test_pandn:
   7740 ; SANDY:       # %bb.0:
   7741 ; SANDY-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7742 ; SANDY-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
   7743 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7744 ; SANDY-NEXT:    retq # sched: [1:1.00]
   7745 ;
   7746 ; HASWELL-SSE-LABEL: test_pandn:
   7747 ; HASWELL-SSE:       # %bb.0:
   7748 ; HASWELL-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
   7749 ; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
   7750 ; HASWELL-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
   7751 ; HASWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   7752 ; HASWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
   7753 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7754 ;
   7755 ; HASWELL-LABEL: test_pandn:
   7756 ; HASWELL:       # %bb.0:
   7757 ; HASWELL-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7758 ; HASWELL-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
   7759 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7760 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   7761 ;
   7762 ; BROADWELL-SSE-LABEL: test_pandn:
   7763 ; BROADWELL-SSE:       # %bb.0:
   7764 ; BROADWELL-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
   7765 ; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
   7766 ; BROADWELL-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [6:0.50]
   7767 ; BROADWELL-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   7768 ; BROADWELL-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
   7769 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7770 ;
   7771 ; BROADWELL-LABEL: test_pandn:
   7772 ; BROADWELL:       # %bb.0:
   7773 ; BROADWELL-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7774 ; BROADWELL-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [6:0.50]
   7775 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7776 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   7777 ;
   7778 ; SKYLAKE-SSE-LABEL: test_pandn:
   7779 ; SKYLAKE-SSE:       # %bb.0:
   7780 ; SKYLAKE-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
   7781 ; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
   7782 ; SKYLAKE-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
   7783 ; SKYLAKE-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
   7784 ; SKYLAKE-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
   7785 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   7786 ;
   7787 ; SKYLAKE-LABEL: test_pandn:
   7788 ; SKYLAKE:       # %bb.0:
   7789 ; SKYLAKE-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7790 ; SKYLAKE-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
   7791 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7792 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   7793 ;
   7794 ; SKX-SSE-LABEL: test_pandn:
   7795 ; SKX-SSE:       # %bb.0:
   7796 ; SKX-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.33]
   7797 ; SKX-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.33]
   7798 ; SKX-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [7:0.50]
   7799 ; SKX-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.33]
   7800 ; SKX-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.33]
   7801 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   7802 ;
   7803 ; SKX-LABEL: test_pandn:
   7804 ; SKX:       # %bb.0:
   7805 ; SKX-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7806 ; SKX-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
   7807 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   7808 ; SKX-NEXT:    retq # sched: [7:1.00]
   7809 ;
   7810 ; BTVER2-SSE-LABEL: test_pandn:
   7811 ; BTVER2-SSE:       # %bb.0:
   7812 ; BTVER2-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.50]
   7813 ; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.50]
   7814 ; BTVER2-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [6:1.00]
   7815 ; BTVER2-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.50]
   7816 ; BTVER2-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   7817 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   7818 ;
   7819 ; BTVER2-LABEL: test_pandn:
   7820 ; BTVER2:       # %bb.0:
   7821 ; BTVER2-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7822 ; BTVER2-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [6:1.00]
   7823 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7824 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   7825 ;
   7826 ; ZNVER1-SSE-LABEL: test_pandn:
   7827 ; ZNVER1-SSE:       # %bb.0:
   7828 ; ZNVER1-SSE-NEXT:    pandn %xmm1, %xmm0 # sched: [1:0.25]
   7829 ; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm1 # sched: [1:0.25]
   7830 ; ZNVER1-SSE-NEXT:    pandn (%rdi), %xmm1 # sched: [8:0.50]
   7831 ; ZNVER1-SSE-NEXT:    paddq %xmm0, %xmm1 # sched: [1:0.25]
   7832 ; ZNVER1-SSE-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.25]
   7833 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   7834 ;
   7835 ; ZNVER1-LABEL: test_pandn:
   7836 ; ZNVER1:       # %bb.0:
   7837 ; ZNVER1-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7838 ; ZNVER1-NEXT:    vpandn (%rdi), %xmm0, %xmm1 # sched: [8:0.50]
   7839 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7840 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   7841   %1 = xor <2 x i64> %a0, <i64 -1, i64 -1>
   7842   %2 = and <2 x i64> %a1, %1
   7843   %3 = load <2 x i64>, <2 x i64> *%a2, align 16
   7844   %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
   7845   %5 = and <2 x i64> %3, %4
   7846   %6 = add <2 x i64> %2, %5
   7847   ret <2 x i64> %6
   7848 }
   7849 
   7850 define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   7851 ; GENERIC-LABEL: test_pavgb:
   7852 ; GENERIC:       # %bb.0:
   7853 ; GENERIC-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
   7854 ; GENERIC-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
   7855 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7856 ;
   7857 ; ATOM-LABEL: test_pavgb:
   7858 ; ATOM:       # %bb.0:
   7859 ; ATOM-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
   7860 ; ATOM-NEXT:    pavgb (%rdi), %xmm0 # sched: [1:1.00]
   7861 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7862 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7863 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7864 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7865 ; ATOM-NEXT:    retq # sched: [79:39.50]
   7866 ;
   7867 ; SLM-LABEL: test_pavgb:
   7868 ; SLM:       # %bb.0:
   7869 ; SLM-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
   7870 ; SLM-NEXT:    pavgb (%rdi), %xmm0 # sched: [4:1.00]
   7871 ; SLM-NEXT:    retq # sched: [4:1.00]
   7872 ;
   7873 ; SANDY-SSE-LABEL: test_pavgb:
   7874 ; SANDY-SSE:       # %bb.0:
   7875 ; SANDY-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
   7876 ; SANDY-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
   7877 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   7878 ;
   7879 ; SANDY-LABEL: test_pavgb:
   7880 ; SANDY:       # %bb.0:
   7881 ; SANDY-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7882 ; SANDY-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7883 ; SANDY-NEXT:    retq # sched: [1:1.00]
   7884 ;
   7885 ; HASWELL-SSE-LABEL: test_pavgb:
   7886 ; HASWELL-SSE:       # %bb.0:
   7887 ; HASWELL-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
   7888 ; HASWELL-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
   7889 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7890 ;
   7891 ; HASWELL-LABEL: test_pavgb:
   7892 ; HASWELL:       # %bb.0:
   7893 ; HASWELL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7894 ; HASWELL-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7895 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   7896 ;
   7897 ; BROADWELL-SSE-LABEL: test_pavgb:
   7898 ; BROADWELL-SSE:       # %bb.0:
   7899 ; BROADWELL-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
   7900 ; BROADWELL-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [6:0.50]
   7901 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   7902 ;
   7903 ; BROADWELL-LABEL: test_pavgb:
   7904 ; BROADWELL:       # %bb.0:
   7905 ; BROADWELL-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7906 ; BROADWELL-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   7907 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   7908 ;
   7909 ; SKYLAKE-SSE-LABEL: test_pavgb:
   7910 ; SKYLAKE-SSE:       # %bb.0:
   7911 ; SKYLAKE-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
   7912 ; SKYLAKE-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
   7913 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   7914 ;
   7915 ; SKYLAKE-LABEL: test_pavgb:
   7916 ; SKYLAKE:       # %bb.0:
   7917 ; SKYLAKE-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7918 ; SKYLAKE-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7919 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   7920 ;
   7921 ; SKX-SSE-LABEL: test_pavgb:
   7922 ; SKX-SSE:       # %bb.0:
   7923 ; SKX-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
   7924 ; SKX-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [7:0.50]
   7925 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   7926 ;
   7927 ; SKX-LABEL: test_pavgb:
   7928 ; SKX:       # %bb.0:
   7929 ; SKX-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7930 ; SKX-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   7931 ; SKX-NEXT:    retq # sched: [7:1.00]
   7932 ;
   7933 ; BTVER2-SSE-LABEL: test_pavgb:
   7934 ; BTVER2-SSE:       # %bb.0:
   7935 ; BTVER2-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.50]
   7936 ; BTVER2-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [6:1.00]
   7937 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   7938 ;
   7939 ; BTVER2-LABEL: test_pavgb:
   7940 ; BTVER2:       # %bb.0:
   7941 ; BTVER2-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   7942 ; BTVER2-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   7943 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   7944 ;
   7945 ; ZNVER1-SSE-LABEL: test_pavgb:
   7946 ; ZNVER1-SSE:       # %bb.0:
   7947 ; ZNVER1-SSE-NEXT:    pavgb %xmm1, %xmm0 # sched: [1:0.25]
   7948 ; ZNVER1-SSE-NEXT:    pavgb (%rdi), %xmm0 # sched: [8:0.50]
   7949 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   7950 ;
   7951 ; ZNVER1-LABEL: test_pavgb:
   7952 ; ZNVER1:       # %bb.0:
   7953 ; ZNVER1-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   7954 ; ZNVER1-NEXT:    vpavgb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   7955 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   7956   %1 = zext <16 x i8> %a0 to <16 x i16>
   7957   %2 = zext <16 x i8> %a1 to <16 x i16>
   7958   %3 = add <16 x i16> %1, %2
   7959   %4 = add <16 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   7960   %5 = lshr <16 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   7961   %6 = trunc <16 x i16> %5 to <16 x i8>
   7962   %7 = load <16 x i8>, <16 x i8> *%a2, align 16
   7963   %8 = zext <16 x i8> %6 to <16 x i16>
   7964   %9 = zext <16 x i8> %7 to <16 x i16>
   7965   %10 = add <16 x i16> %8, %9
   7966   %11 = add <16 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   7967   %12 = lshr <16 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   7968   %13 = trunc <16 x i16> %12 to <16 x i8>
   7969   ret <16 x i8> %13
   7970 }
   7971 
   7972 define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   7973 ; GENERIC-LABEL: test_pavgw:
   7974 ; GENERIC:       # %bb.0:
   7975 ; GENERIC-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
   7976 ; GENERIC-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
   7977 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   7978 ;
   7979 ; ATOM-LABEL: test_pavgw:
   7980 ; ATOM:       # %bb.0:
   7981 ; ATOM-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
   7982 ; ATOM-NEXT:    pavgw (%rdi), %xmm0 # sched: [1:1.00]
   7983 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7984 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7985 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7986 ; ATOM-NEXT:    nop # sched: [1:0.50]
   7987 ; ATOM-NEXT:    retq # sched: [79:39.50]
   7988 ;
   7989 ; SLM-LABEL: test_pavgw:
   7990 ; SLM:       # %bb.0:
   7991 ; SLM-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
   7992 ; SLM-NEXT:    pavgw (%rdi), %xmm0 # sched: [4:1.00]
   7993 ; SLM-NEXT:    retq # sched: [4:1.00]
   7994 ;
   7995 ; SANDY-SSE-LABEL: test_pavgw:
   7996 ; SANDY-SSE:       # %bb.0:
   7997 ; SANDY-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
   7998 ; SANDY-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
   7999 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   8000 ;
   8001 ; SANDY-LABEL: test_pavgw:
   8002 ; SANDY:       # %bb.0:
   8003 ; SANDY-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   8004 ; SANDY-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8005 ; SANDY-NEXT:    retq # sched: [1:1.00]
   8006 ;
   8007 ; HASWELL-SSE-LABEL: test_pavgw:
   8008 ; HASWELL-SSE:       # %bb.0:
   8009 ; HASWELL-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
   8010 ; HASWELL-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
   8011 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8012 ;
   8013 ; HASWELL-LABEL: test_pavgw:
   8014 ; HASWELL:       # %bb.0:
   8015 ; HASWELL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   8016 ; HASWELL-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8017 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   8018 ;
   8019 ; BROADWELL-SSE-LABEL: test_pavgw:
   8020 ; BROADWELL-SSE:       # %bb.0:
   8021 ; BROADWELL-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
   8022 ; BROADWELL-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [6:0.50]
   8023 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8024 ;
   8025 ; BROADWELL-LABEL: test_pavgw:
   8026 ; BROADWELL:       # %bb.0:
   8027 ; BROADWELL-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   8028 ; BROADWELL-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   8029 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   8030 ;
   8031 ; SKYLAKE-SSE-LABEL: test_pavgw:
   8032 ; SKYLAKE-SSE:       # %bb.0:
   8033 ; SKYLAKE-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
   8034 ; SKYLAKE-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
   8035 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   8036 ;
   8037 ; SKYLAKE-LABEL: test_pavgw:
   8038 ; SKYLAKE:       # %bb.0:
   8039 ; SKYLAKE-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   8040 ; SKYLAKE-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8041 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   8042 ;
   8043 ; SKX-SSE-LABEL: test_pavgw:
   8044 ; SKX-SSE:       # %bb.0:
   8045 ; SKX-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
   8046 ; SKX-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [7:0.50]
   8047 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   8048 ;
   8049 ; SKX-LABEL: test_pavgw:
   8050 ; SKX:       # %bb.0:
   8051 ; SKX-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   8052 ; SKX-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8053 ; SKX-NEXT:    retq # sched: [7:1.00]
   8054 ;
   8055 ; BTVER2-SSE-LABEL: test_pavgw:
   8056 ; BTVER2-SSE:       # %bb.0:
   8057 ; BTVER2-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.50]
   8058 ; BTVER2-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [6:1.00]
   8059 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   8060 ;
   8061 ; BTVER2-LABEL: test_pavgw:
   8062 ; BTVER2:       # %bb.0:
   8063 ; BTVER2-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   8064 ; BTVER2-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   8065 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   8066 ;
   8067 ; ZNVER1-SSE-LABEL: test_pavgw:
   8068 ; ZNVER1-SSE:       # %bb.0:
   8069 ; ZNVER1-SSE-NEXT:    pavgw %xmm1, %xmm0 # sched: [1:0.25]
   8070 ; ZNVER1-SSE-NEXT:    pavgw (%rdi), %xmm0 # sched: [8:0.50]
   8071 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   8072 ;
   8073 ; ZNVER1-LABEL: test_pavgw:
   8074 ; ZNVER1:       # %bb.0:
   8075 ; ZNVER1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   8076 ; ZNVER1-NEXT:    vpavgw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   8077 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   8078   %1 = zext <8 x i16> %a0 to <8 x i32>
   8079   %2 = zext <8 x i16> %a1 to <8 x i32>
   8080   %3 = add <8 x i32> %1, %2
   8081   %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   8082   %5 = lshr <8 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   8083   %6 = trunc <8 x i32> %5 to <8 x i16>
   8084   %7 = load <8 x i16>, <8 x i16> *%a2, align 16
   8085   %8 = zext <8 x i16> %6 to <8 x i32>
   8086   %9 = zext <8 x i16> %7 to <8 x i32>
   8087   %10 = add <8 x i32> %8, %9
   8088   %11 = add <8 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   8089   %12 = lshr <8 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   8090   %13 = trunc <8 x i32> %12 to <8 x i16>
   8091   ret <8 x i16> %13
   8092 }
   8093 
   8094 define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   8095 ; GENERIC-LABEL: test_pcmpeqb:
   8096 ; GENERIC:       # %bb.0:
   8097 ; GENERIC-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
   8098 ; GENERIC-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
   8099 ; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8100 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8101 ;
   8102 ; ATOM-LABEL: test_pcmpeqb:
   8103 ; ATOM:       # %bb.0:
   8104 ; ATOM-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
   8105 ; ATOM-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [1:1.00]
   8106 ; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   8107 ; ATOM-NEXT:    nop # sched: [1:0.50]
   8108 ; ATOM-NEXT:    nop # sched: [1:0.50]
   8109 ; ATOM-NEXT:    retq # sched: [79:39.50]
   8110 ;
   8111 ; SLM-LABEL: test_pcmpeqb:
   8112 ; SLM:       # %bb.0:
   8113 ; SLM-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
   8114 ; SLM-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [4:1.00]
   8115 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   8116 ; SLM-NEXT:    retq # sched: [4:1.00]
   8117 ;
   8118 ; SANDY-SSE-LABEL: test_pcmpeqb:
   8119 ; SANDY-SSE:       # %bb.0:
   8120 ; SANDY-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
   8121 ; SANDY-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
   8122 ; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8123 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   8124 ;
   8125 ; SANDY-LABEL: test_pcmpeqb:
   8126 ; SANDY:       # %bb.0:
   8127 ; SANDY-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8128 ; SANDY-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8129 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8130 ; SANDY-NEXT:    retq # sched: [1:1.00]
   8131 ;
   8132 ; HASWELL-SSE-LABEL: test_pcmpeqb:
   8133 ; HASWELL-SSE:       # %bb.0:
   8134 ; HASWELL-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
   8135 ; HASWELL-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
   8136 ; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8137 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8138 ;
   8139 ; HASWELL-LABEL: test_pcmpeqb:
   8140 ; HASWELL:       # %bb.0:
   8141 ; HASWELL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8142 ; HASWELL-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8143 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8144 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   8145 ;
   8146 ; BROADWELL-SSE-LABEL: test_pcmpeqb:
   8147 ; BROADWELL-SSE:       # %bb.0:
   8148 ; BROADWELL-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
   8149 ; BROADWELL-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [6:0.50]
   8150 ; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8151 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8152 ;
   8153 ; BROADWELL-LABEL: test_pcmpeqb:
   8154 ; BROADWELL:       # %bb.0:
   8155 ; BROADWELL-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8156 ; BROADWELL-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   8157 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8158 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   8159 ;
   8160 ; SKYLAKE-SSE-LABEL: test_pcmpeqb:
   8161 ; SKYLAKE-SSE:       # %bb.0:
   8162 ; SKYLAKE-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
   8163 ; SKYLAKE-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
   8164 ; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8165 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   8166 ;
   8167 ; SKYLAKE-LABEL: test_pcmpeqb:
   8168 ; SKYLAKE:       # %bb.0:
   8169 ; SKYLAKE-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8170 ; SKYLAKE-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8171 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8172 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   8173 ;
   8174 ; SKX-SSE-LABEL: test_pcmpeqb:
   8175 ; SKX-SSE:       # %bb.0:
   8176 ; SKX-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
   8177 ; SKX-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
   8178 ; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8179 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   8180 ;
   8181 ; SKX-LABEL: test_pcmpeqb:
   8182 ; SKX:       # %bb.0:
   8183 ; SKX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8184 ; SKX-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8185 ; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8186 ; SKX-NEXT:    retq # sched: [7:1.00]
   8187 ;
   8188 ; BTVER2-SSE-LABEL: test_pcmpeqb:
   8189 ; BTVER2-SSE:       # %bb.0:
   8190 ; BTVER2-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
   8191 ; BTVER2-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [6:1.00]
   8192 ; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   8193 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   8194 ;
   8195 ; BTVER2-LABEL: test_pcmpeqb:
   8196 ; BTVER2:       # %bb.0:
   8197 ; BTVER2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8198 ; BTVER2-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   8199 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   8200 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   8201 ;
   8202 ; ZNVER1-SSE-LABEL: test_pcmpeqb:
   8203 ; ZNVER1-SSE:       # %bb.0:
   8204 ; ZNVER1-SSE-NEXT:    pcmpeqb %xmm0, %xmm1 # sched: [1:0.25]
   8205 ; ZNVER1-SSE-NEXT:    pcmpeqb (%rdi), %xmm0 # sched: [8:0.50]
   8206 ; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
   8207 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   8208 ;
   8209 ; ZNVER1-LABEL: test_pcmpeqb:
   8210 ; ZNVER1:       # %bb.0:
   8211 ; ZNVER1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
   8212 ; ZNVER1-NEXT:    vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   8213 ; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
   8214 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   8215   %1 = icmp eq <16 x i8> %a0, %a1
   8216   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   8217   %3 = icmp eq <16 x i8> %a0, %2
   8218   %4 = or <16 x i1> %1, %3
   8219   %5 = sext <16 x i1> %4 to <16 x i8>
   8220   ret <16 x i8> %5
   8221 }
   8222 
   8223 define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   8224 ; GENERIC-LABEL: test_pcmpeqd:
   8225 ; GENERIC:       # %bb.0:
   8226 ; GENERIC-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
   8227 ; GENERIC-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8228 ; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8229 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8230 ;
   8231 ; ATOM-LABEL: test_pcmpeqd:
   8232 ; ATOM:       # %bb.0:
   8233 ; ATOM-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
   8234 ; ATOM-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [1:1.00]
   8235 ; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   8236 ; ATOM-NEXT:    nop # sched: [1:0.50]
   8237 ; ATOM-NEXT:    nop # sched: [1:0.50]
   8238 ; ATOM-NEXT:    retq # sched: [79:39.50]
   8239 ;
   8240 ; SLM-LABEL: test_pcmpeqd:
   8241 ; SLM:       # %bb.0:
   8242 ; SLM-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
   8243 ; SLM-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [4:1.00]
   8244 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   8245 ; SLM-NEXT:    retq # sched: [4:1.00]
   8246 ;
   8247 ; SANDY-SSE-LABEL: test_pcmpeqd:
   8248 ; SANDY-SSE:       # %bb.0:
   8249 ; SANDY-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
   8250 ; SANDY-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8251 ; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8252 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   8253 ;
   8254 ; SANDY-LABEL: test_pcmpeqd:
   8255 ; SANDY:       # %bb.0:
   8256 ; SANDY-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8257 ; SANDY-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8258 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8259 ; SANDY-NEXT:    retq # sched: [1:1.00]
   8260 ;
   8261 ; HASWELL-SSE-LABEL: test_pcmpeqd:
   8262 ; HASWELL-SSE:       # %bb.0:
   8263 ; HASWELL-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
   8264 ; HASWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8265 ; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8266 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8267 ;
   8268 ; HASWELL-LABEL: test_pcmpeqd:
   8269 ; HASWELL:       # %bb.0:
   8270 ; HASWELL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8271 ; HASWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8272 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8273 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   8274 ;
   8275 ; BROADWELL-SSE-LABEL: test_pcmpeqd:
   8276 ; BROADWELL-SSE:       # %bb.0:
   8277 ; BROADWELL-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
   8278 ; BROADWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
   8279 ; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8280 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8281 ;
   8282 ; BROADWELL-LABEL: test_pcmpeqd:
   8283 ; BROADWELL:       # %bb.0:
   8284 ; BROADWELL-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8285 ; BROADWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   8286 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8287 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   8288 ;
   8289 ; SKYLAKE-SSE-LABEL: test_pcmpeqd:
   8290 ; SKYLAKE-SSE:       # %bb.0:
   8291 ; SKYLAKE-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
   8292 ; SKYLAKE-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8293 ; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8294 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   8295 ;
   8296 ; SKYLAKE-LABEL: test_pcmpeqd:
   8297 ; SKYLAKE:       # %bb.0:
   8298 ; SKYLAKE-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8299 ; SKYLAKE-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8300 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8301 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   8302 ;
   8303 ; SKX-SSE-LABEL: test_pcmpeqd:
   8304 ; SKX-SSE:       # %bb.0:
   8305 ; SKX-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
   8306 ; SKX-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8307 ; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8308 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   8309 ;
   8310 ; SKX-LABEL: test_pcmpeqd:
   8311 ; SKX:       # %bb.0:
   8312 ; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8313 ; SKX-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8314 ; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8315 ; SKX-NEXT:    retq # sched: [7:1.00]
   8316 ;
   8317 ; BTVER2-SSE-LABEL: test_pcmpeqd:
   8318 ; BTVER2-SSE:       # %bb.0:
   8319 ; BTVER2-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
   8320 ; BTVER2-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
   8321 ; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   8322 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   8323 ;
   8324 ; BTVER2-LABEL: test_pcmpeqd:
   8325 ; BTVER2:       # %bb.0:
   8326 ; BTVER2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8327 ; BTVER2-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   8328 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   8329 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   8330 ;
   8331 ; ZNVER1-SSE-LABEL: test_pcmpeqd:
   8332 ; ZNVER1-SSE:       # %bb.0:
   8333 ; ZNVER1-SSE-NEXT:    pcmpeqd %xmm0, %xmm1 # sched: [1:0.25]
   8334 ; ZNVER1-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
   8335 ; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
   8336 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   8337 ;
   8338 ; ZNVER1-LABEL: test_pcmpeqd:
   8339 ; ZNVER1:       # %bb.0:
   8340 ; ZNVER1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
   8341 ; ZNVER1-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   8342 ; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
   8343 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   8344   %1 = icmp eq <4 x i32> %a0, %a1
   8345   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   8346   %3 = icmp eq <4 x i32> %a0, %2
   8347   %4 = or <4 x i1> %1, %3
   8348   %5 = sext <4 x i1> %4 to <4 x i32>
   8349   ret <4 x i32> %5
   8350 }
   8351 
   8352 define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   8353 ; GENERIC-LABEL: test_pcmpeqw:
   8354 ; GENERIC:       # %bb.0:
   8355 ; GENERIC-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
   8356 ; GENERIC-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
   8357 ; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8358 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8359 ;
   8360 ; ATOM-LABEL: test_pcmpeqw:
   8361 ; ATOM:       # %bb.0:
   8362 ; ATOM-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
   8363 ; ATOM-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [1:1.00]
   8364 ; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   8365 ; ATOM-NEXT:    nop # sched: [1:0.50]
   8366 ; ATOM-NEXT:    nop # sched: [1:0.50]
   8367 ; ATOM-NEXT:    retq # sched: [79:39.50]
   8368 ;
   8369 ; SLM-LABEL: test_pcmpeqw:
   8370 ; SLM:       # %bb.0:
   8371 ; SLM-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
   8372 ; SLM-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [4:1.00]
   8373 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   8374 ; SLM-NEXT:    retq # sched: [4:1.00]
   8375 ;
   8376 ; SANDY-SSE-LABEL: test_pcmpeqw:
   8377 ; SANDY-SSE:       # %bb.0:
   8378 ; SANDY-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
   8379 ; SANDY-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
   8380 ; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8381 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   8382 ;
   8383 ; SANDY-LABEL: test_pcmpeqw:
   8384 ; SANDY:       # %bb.0:
   8385 ; SANDY-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8386 ; SANDY-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8387 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8388 ; SANDY-NEXT:    retq # sched: [1:1.00]
   8389 ;
   8390 ; HASWELL-SSE-LABEL: test_pcmpeqw:
   8391 ; HASWELL-SSE:       # %bb.0:
   8392 ; HASWELL-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
   8393 ; HASWELL-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
   8394 ; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8395 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8396 ;
   8397 ; HASWELL-LABEL: test_pcmpeqw:
   8398 ; HASWELL:       # %bb.0:
   8399 ; HASWELL-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8400 ; HASWELL-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8401 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8402 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   8403 ;
   8404 ; BROADWELL-SSE-LABEL: test_pcmpeqw:
   8405 ; BROADWELL-SSE:       # %bb.0:
   8406 ; BROADWELL-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
   8407 ; BROADWELL-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [6:0.50]
   8408 ; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8409 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8410 ;
   8411 ; BROADWELL-LABEL: test_pcmpeqw:
   8412 ; BROADWELL:       # %bb.0:
   8413 ; BROADWELL-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8414 ; BROADWELL-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   8415 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8416 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   8417 ;
   8418 ; SKYLAKE-SSE-LABEL: test_pcmpeqw:
   8419 ; SKYLAKE-SSE:       # %bb.0:
   8420 ; SKYLAKE-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
   8421 ; SKYLAKE-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
   8422 ; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8423 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   8424 ;
   8425 ; SKYLAKE-LABEL: test_pcmpeqw:
   8426 ; SKYLAKE:       # %bb.0:
   8427 ; SKYLAKE-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8428 ; SKYLAKE-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8429 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8430 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   8431 ;
   8432 ; SKX-SSE-LABEL: test_pcmpeqw:
   8433 ; SKX-SSE:       # %bb.0:
   8434 ; SKX-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
   8435 ; SKX-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
   8436 ; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   8437 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   8438 ;
   8439 ; SKX-LABEL: test_pcmpeqw:
   8440 ; SKX:       # %bb.0:
   8441 ; SKX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8442 ; SKX-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8443 ; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8444 ; SKX-NEXT:    retq # sched: [7:1.00]
   8445 ;
   8446 ; BTVER2-SSE-LABEL: test_pcmpeqw:
   8447 ; BTVER2-SSE:       # %bb.0:
   8448 ; BTVER2-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
   8449 ; BTVER2-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [6:1.00]
   8450 ; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   8451 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   8452 ;
   8453 ; BTVER2-LABEL: test_pcmpeqw:
   8454 ; BTVER2:       # %bb.0:
   8455 ; BTVER2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8456 ; BTVER2-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   8457 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   8458 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   8459 ;
   8460 ; ZNVER1-SSE-LABEL: test_pcmpeqw:
   8461 ; ZNVER1-SSE:       # %bb.0:
   8462 ; ZNVER1-SSE-NEXT:    pcmpeqw %xmm0, %xmm1 # sched: [1:0.25]
   8463 ; ZNVER1-SSE-NEXT:    pcmpeqw (%rdi), %xmm0 # sched: [8:0.50]
   8464 ; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
   8465 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   8466 ;
   8467 ; ZNVER1-LABEL: test_pcmpeqw:
   8468 ; ZNVER1:       # %bb.0:
   8469 ; ZNVER1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
   8470 ; ZNVER1-NEXT:    vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   8471 ; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
   8472 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   8473   %1 = icmp eq <8 x i16> %a0, %a1
   8474   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   8475   %3 = icmp eq <8 x i16> %a0, %2
   8476   %4 = or <8 x i1> %1, %3
   8477   %5 = sext <8 x i1> %4 to <8 x i16>
   8478   ret <8 x i16> %5
   8479 }
   8480 
   8481 define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   8482 ; GENERIC-LABEL: test_pcmpgtb:
   8483 ; GENERIC:       # %bb.0:
   8484 ; GENERIC-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8485 ; GENERIC-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
   8486 ; GENERIC-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
   8487 ; GENERIC-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8488 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8489 ;
   8490 ; ATOM-LABEL: test_pcmpgtb:
   8491 ; ATOM:       # %bb.0:
   8492 ; ATOM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
   8493 ; ATOM-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [1:1.00]
   8494 ; ATOM-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
   8495 ; ATOM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
   8496 ; ATOM-NEXT:    retq # sched: [79:39.50]
   8497 ;
   8498 ; SLM-LABEL: test_pcmpgtb:
   8499 ; SLM:       # %bb.0:
   8500 ; SLM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
   8501 ; SLM-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [4:1.00]
   8502 ; SLM-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
   8503 ; SLM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
   8504 ; SLM-NEXT:    retq # sched: [4:1.00]
   8505 ;
   8506 ; SANDY-SSE-LABEL: test_pcmpgtb:
   8507 ; SANDY-SSE:       # %bb.0:
   8508 ; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8509 ; SANDY-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
   8510 ; SANDY-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
   8511 ; SANDY-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8512 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   8513 ;
   8514 ; SANDY-LABEL: test_pcmpgtb:
   8515 ; SANDY:       # %bb.0:
   8516 ; SANDY-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8517 ; SANDY-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8518 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8519 ; SANDY-NEXT:    retq # sched: [1:1.00]
   8520 ;
   8521 ; HASWELL-SSE-LABEL: test_pcmpgtb:
   8522 ; HASWELL-SSE:       # %bb.0:
   8523 ; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8524 ; HASWELL-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
   8525 ; HASWELL-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
   8526 ; HASWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8527 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8528 ;
   8529 ; HASWELL-LABEL: test_pcmpgtb:
   8530 ; HASWELL:       # %bb.0:
   8531 ; HASWELL-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8532 ; HASWELL-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8533 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8534 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   8535 ;
   8536 ; BROADWELL-SSE-LABEL: test_pcmpgtb:
   8537 ; BROADWELL-SSE:       # %bb.0:
   8538 ; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8539 ; BROADWELL-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
   8540 ; BROADWELL-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [6:0.50]
   8541 ; BROADWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8542 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8543 ;
   8544 ; BROADWELL-LABEL: test_pcmpgtb:
   8545 ; BROADWELL:       # %bb.0:
   8546 ; BROADWELL-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8547 ; BROADWELL-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   8548 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8549 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   8550 ;
   8551 ; SKYLAKE-SSE-LABEL: test_pcmpgtb:
   8552 ; SKYLAKE-SSE:       # %bb.0:
   8553 ; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8554 ; SKYLAKE-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
   8555 ; SKYLAKE-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
   8556 ; SKYLAKE-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8557 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   8558 ;
   8559 ; SKYLAKE-LABEL: test_pcmpgtb:
   8560 ; SKYLAKE:       # %bb.0:
   8561 ; SKYLAKE-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8562 ; SKYLAKE-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8563 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8564 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   8565 ;
   8566 ; SKX-SSE-LABEL: test_pcmpgtb:
   8567 ; SKX-SSE:       # %bb.0:
   8568 ; SKX-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8569 ; SKX-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
   8570 ; SKX-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
   8571 ; SKX-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8572 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   8573 ;
   8574 ; SKX-LABEL: test_pcmpgtb:
   8575 ; SKX:       # %bb.0:
   8576 ; SKX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8577 ; SKX-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8578 ; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8579 ; SKX-NEXT:    retq # sched: [7:1.00]
   8580 ;
   8581 ; BTVER2-SSE-LABEL: test_pcmpgtb:
   8582 ; BTVER2-SSE:       # %bb.0:
   8583 ; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
   8584 ; BTVER2-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [6:1.00]
   8585 ; BTVER2-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
   8586 ; BTVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
   8587 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   8588 ;
   8589 ; BTVER2-LABEL: test_pcmpgtb:
   8590 ; BTVER2:       # %bb.0:
   8591 ; BTVER2-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8592 ; BTVER2-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   8593 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   8594 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   8595 ;
   8596 ; ZNVER1-SSE-LABEL: test_pcmpgtb:
   8597 ; ZNVER1-SSE:       # %bb.0:
   8598 ; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.25]
   8599 ; ZNVER1-SSE-NEXT:    pcmpgtb (%rdi), %xmm0 # sched: [8:0.50]
   8600 ; ZNVER1-SSE-NEXT:    pcmpgtb %xmm1, %xmm2 # sched: [1:0.25]
   8601 ; ZNVER1-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.25]
   8602 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   8603 ;
   8604 ; ZNVER1-LABEL: test_pcmpgtb:
   8605 ; ZNVER1:       # %bb.0:
   8606 ; ZNVER1-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
   8607 ; ZNVER1-NEXT:    vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   8608 ; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
   8609 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   8610   %1 = icmp sgt <16 x i8> %a0, %a1
   8611   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   8612   %3 = icmp sgt <16 x i8> %a0, %2
   8613   %4 = or <16 x i1> %1, %3
   8614   %5 = sext <16 x i1> %4 to <16 x i8>
   8615   ret <16 x i8> %5
   8616 }
   8617 
   8618 define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   8619 ; GENERIC-LABEL: test_pcmpgtd:
   8620 ; GENERIC:       # %bb.0:
   8621 ; GENERIC-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8622 ; GENERIC-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
   8623 ; GENERIC-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8624 ; GENERIC-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8625 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8626 ;
   8627 ; ATOM-LABEL: test_pcmpgtd:
   8628 ; ATOM:       # %bb.0:
   8629 ; ATOM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
   8630 ; ATOM-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [1:1.00]
   8631 ; ATOM-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
   8632 ; ATOM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
   8633 ; ATOM-NEXT:    retq # sched: [79:39.50]
   8634 ;
   8635 ; SLM-LABEL: test_pcmpgtd:
   8636 ; SLM:       # %bb.0:
   8637 ; SLM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
   8638 ; SLM-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [4:1.00]
   8639 ; SLM-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
   8640 ; SLM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
   8641 ; SLM-NEXT:    retq # sched: [4:1.00]
   8642 ;
   8643 ; SANDY-SSE-LABEL: test_pcmpgtd:
   8644 ; SANDY-SSE:       # %bb.0:
   8645 ; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8646 ; SANDY-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
   8647 ; SANDY-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8648 ; SANDY-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8649 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   8650 ;
   8651 ; SANDY-LABEL: test_pcmpgtd:
   8652 ; SANDY:       # %bb.0:
   8653 ; SANDY-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8654 ; SANDY-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8655 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8656 ; SANDY-NEXT:    retq # sched: [1:1.00]
   8657 ;
   8658 ; HASWELL-SSE-LABEL: test_pcmpgtd:
   8659 ; HASWELL-SSE:       # %bb.0:
   8660 ; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8661 ; HASWELL-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
   8662 ; HASWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8663 ; HASWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8664 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8665 ;
   8666 ; HASWELL-LABEL: test_pcmpgtd:
   8667 ; HASWELL:       # %bb.0:
   8668 ; HASWELL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8669 ; HASWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8670 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8671 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   8672 ;
   8673 ; BROADWELL-SSE-LABEL: test_pcmpgtd:
   8674 ; BROADWELL-SSE:       # %bb.0:
   8675 ; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8676 ; BROADWELL-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
   8677 ; BROADWELL-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
   8678 ; BROADWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8679 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8680 ;
   8681 ; BROADWELL-LABEL: test_pcmpgtd:
   8682 ; BROADWELL:       # %bb.0:
   8683 ; BROADWELL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8684 ; BROADWELL-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   8685 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8686 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   8687 ;
   8688 ; SKYLAKE-SSE-LABEL: test_pcmpgtd:
   8689 ; SKYLAKE-SSE:       # %bb.0:
   8690 ; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8691 ; SKYLAKE-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
   8692 ; SKYLAKE-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8693 ; SKYLAKE-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8694 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   8695 ;
   8696 ; SKYLAKE-LABEL: test_pcmpgtd:
   8697 ; SKYLAKE:       # %bb.0:
   8698 ; SKYLAKE-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8699 ; SKYLAKE-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8700 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8701 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   8702 ;
   8703 ; SKX-SSE-LABEL: test_pcmpgtd:
   8704 ; SKX-SSE:       # %bb.0:
   8705 ; SKX-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8706 ; SKX-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
   8707 ; SKX-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
   8708 ; SKX-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8709 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   8710 ;
   8711 ; SKX-LABEL: test_pcmpgtd:
   8712 ; SKX:       # %bb.0:
   8713 ; SKX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8714 ; SKX-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8715 ; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8716 ; SKX-NEXT:    retq # sched: [7:1.00]
   8717 ;
   8718 ; BTVER2-SSE-LABEL: test_pcmpgtd:
   8719 ; BTVER2-SSE:       # %bb.0:
   8720 ; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
   8721 ; BTVER2-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
   8722 ; BTVER2-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
   8723 ; BTVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
   8724 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   8725 ;
   8726 ; BTVER2-LABEL: test_pcmpgtd:
   8727 ; BTVER2:       # %bb.0:
   8728 ; BTVER2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8729 ; BTVER2-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   8730 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   8731 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   8732 ;
   8733 ; ZNVER1-SSE-LABEL: test_pcmpgtd:
   8734 ; ZNVER1-SSE:       # %bb.0:
   8735 ; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.25]
   8736 ; ZNVER1-SSE-NEXT:    pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
   8737 ; ZNVER1-SSE-NEXT:    pcmpgtd %xmm1, %xmm2 # sched: [1:0.25]
   8738 ; ZNVER1-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.25]
   8739 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   8740 ;
   8741 ; ZNVER1-LABEL: test_pcmpgtd:
   8742 ; ZNVER1:       # %bb.0:
   8743 ; ZNVER1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
   8744 ; ZNVER1-NEXT:    vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   8745 ; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
   8746 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   8747   %1 = icmp sgt <4 x i32> %a0, %a1
   8748   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   8749   %3 = icmp eq <4 x i32> %a0, %2
   8750   %4 = or <4 x i1> %1, %3
   8751   %5 = sext <4 x i1> %4 to <4 x i32>
   8752   ret <4 x i32> %5
   8753 }
   8754 
   8755 define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   8756 ; GENERIC-LABEL: test_pcmpgtw:
   8757 ; GENERIC:       # %bb.0:
   8758 ; GENERIC-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8759 ; GENERIC-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
   8760 ; GENERIC-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
   8761 ; GENERIC-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8762 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8763 ;
   8764 ; ATOM-LABEL: test_pcmpgtw:
   8765 ; ATOM:       # %bb.0:
   8766 ; ATOM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
   8767 ; ATOM-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [1:1.00]
   8768 ; ATOM-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
   8769 ; ATOM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
   8770 ; ATOM-NEXT:    retq # sched: [79:39.50]
   8771 ;
   8772 ; SLM-LABEL: test_pcmpgtw:
   8773 ; SLM:       # %bb.0:
   8774 ; SLM-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
   8775 ; SLM-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [4:1.00]
   8776 ; SLM-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
   8777 ; SLM-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
   8778 ; SLM-NEXT:    retq # sched: [4:1.00]
   8779 ;
   8780 ; SANDY-SSE-LABEL: test_pcmpgtw:
   8781 ; SANDY-SSE:       # %bb.0:
   8782 ; SANDY-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8783 ; SANDY-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
   8784 ; SANDY-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
   8785 ; SANDY-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8786 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   8787 ;
   8788 ; SANDY-LABEL: test_pcmpgtw:
   8789 ; SANDY:       # %bb.0:
   8790 ; SANDY-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8791 ; SANDY-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8792 ; SANDY-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8793 ; SANDY-NEXT:    retq # sched: [1:1.00]
   8794 ;
   8795 ; HASWELL-SSE-LABEL: test_pcmpgtw:
   8796 ; HASWELL-SSE:       # %bb.0:
   8797 ; HASWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8798 ; HASWELL-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
   8799 ; HASWELL-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
   8800 ; HASWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8801 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8802 ;
   8803 ; HASWELL-LABEL: test_pcmpgtw:
   8804 ; HASWELL:       # %bb.0:
   8805 ; HASWELL-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8806 ; HASWELL-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8807 ; HASWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8808 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   8809 ;
   8810 ; BROADWELL-SSE-LABEL: test_pcmpgtw:
   8811 ; BROADWELL-SSE:       # %bb.0:
   8812 ; BROADWELL-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8813 ; BROADWELL-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
   8814 ; BROADWELL-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [6:0.50]
   8815 ; BROADWELL-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8816 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8817 ;
   8818 ; BROADWELL-LABEL: test_pcmpgtw:
   8819 ; BROADWELL:       # %bb.0:
   8820 ; BROADWELL-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8821 ; BROADWELL-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   8822 ; BROADWELL-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8823 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   8824 ;
   8825 ; SKYLAKE-SSE-LABEL: test_pcmpgtw:
   8826 ; SKYLAKE-SSE:       # %bb.0:
   8827 ; SKYLAKE-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8828 ; SKYLAKE-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
   8829 ; SKYLAKE-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
   8830 ; SKYLAKE-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8831 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   8832 ;
   8833 ; SKYLAKE-LABEL: test_pcmpgtw:
   8834 ; SKYLAKE:       # %bb.0:
   8835 ; SKYLAKE-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8836 ; SKYLAKE-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8837 ; SKYLAKE-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8838 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   8839 ;
   8840 ; SKX-SSE-LABEL: test_pcmpgtw:
   8841 ; SKX-SSE:       # %bb.0:
   8842 ; SKX-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.33]
   8843 ; SKX-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
   8844 ; SKX-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
   8845 ; SKX-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.33]
   8846 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   8847 ;
   8848 ; SKX-LABEL: test_pcmpgtw:
   8849 ; SKX:       # %bb.0:
   8850 ; SKX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8851 ; SKX-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   8852 ; SKX-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
   8853 ; SKX-NEXT:    retq # sched: [7:1.00]
   8854 ;
   8855 ; BTVER2-SSE-LABEL: test_pcmpgtw:
   8856 ; BTVER2-SSE:       # %bb.0:
   8857 ; BTVER2-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.50]
   8858 ; BTVER2-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [6:1.00]
   8859 ; BTVER2-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
   8860 ; BTVER2-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.50]
   8861 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   8862 ;
   8863 ; BTVER2-LABEL: test_pcmpgtw:
   8864 ; BTVER2:       # %bb.0:
   8865 ; BTVER2-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
   8866 ; BTVER2-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   8867 ; BTVER2-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
   8868 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   8869 ;
   8870 ; ZNVER1-SSE-LABEL: test_pcmpgtw:
   8871 ; ZNVER1-SSE:       # %bb.0:
   8872 ; ZNVER1-SSE-NEXT:    movdqa %xmm0, %xmm2 # sched: [1:0.25]
   8873 ; ZNVER1-SSE-NEXT:    pcmpgtw (%rdi), %xmm0 # sched: [8:0.50]
   8874 ; ZNVER1-SSE-NEXT:    pcmpgtw %xmm1, %xmm2 # sched: [1:0.25]
   8875 ; ZNVER1-SSE-NEXT:    por %xmm2, %xmm0 # sched: [1:0.25]
   8876 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   8877 ;
   8878 ; ZNVER1-LABEL: test_pcmpgtw:
   8879 ; ZNVER1:       # %bb.0:
   8880 ; ZNVER1-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
   8881 ; ZNVER1-NEXT:    vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   8882 ; ZNVER1-NEXT:    vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
   8883 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   8884   %1 = icmp sgt <8 x i16> %a0, %a1
   8885   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   8886   %3 = icmp sgt <8 x i16> %a0, %2
   8887   %4 = or <8 x i1> %1, %3
   8888   %5 = sext <8 x i1> %4 to <8 x i16>
   8889   ret <8 x i16> %5
   8890 }
   8891 
   8892 define i16 @test_pextrw(<8 x i16> %a0) {
   8893 ; GENERIC-LABEL: test_pextrw:
   8894 ; GENERIC:       # %bb.0:
   8895 ; GENERIC-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
   8896 ; GENERIC-NEXT:    # kill: def $ax killed $ax killed $eax
   8897 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   8898 ;
   8899 ; ATOM-LABEL: test_pextrw:
   8900 ; ATOM:       # %bb.0:
   8901 ; ATOM-NEXT:    pextrw $6, %xmm0, %eax # sched: [4:2.00]
   8902 ; ATOM-NEXT:    # kill: def $ax killed $ax killed $eax
   8903 ; ATOM-NEXT:    retq # sched: [79:39.50]
   8904 ;
   8905 ; SLM-LABEL: test_pextrw:
   8906 ; SLM:       # %bb.0:
   8907 ; SLM-NEXT:    pextrw $6, %xmm0, %eax # sched: [1:1.00]
   8908 ; SLM-NEXT:    # kill: def $ax killed $ax killed $eax
   8909 ; SLM-NEXT:    retq # sched: [4:1.00]
   8910 ;
   8911 ; SANDY-SSE-LABEL: test_pextrw:
   8912 ; SANDY-SSE:       # %bb.0:
   8913 ; SANDY-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
   8914 ; SANDY-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
   8915 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   8916 ;
   8917 ; SANDY-LABEL: test_pextrw:
   8918 ; SANDY:       # %bb.0:
   8919 ; SANDY-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
   8920 ; SANDY-NEXT:    # kill: def $ax killed $ax killed $eax
   8921 ; SANDY-NEXT:    retq # sched: [1:1.00]
   8922 ;
   8923 ; HASWELL-SSE-LABEL: test_pextrw:
   8924 ; HASWELL-SSE:       # %bb.0:
   8925 ; HASWELL-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [2:1.00]
   8926 ; HASWELL-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
   8927 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8928 ;
   8929 ; HASWELL-LABEL: test_pextrw:
   8930 ; HASWELL:       # %bb.0:
   8931 ; HASWELL-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:1.00]
   8932 ; HASWELL-NEXT:    # kill: def $ax killed $ax killed $eax
   8933 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   8934 ;
   8935 ; BROADWELL-SSE-LABEL: test_pextrw:
   8936 ; BROADWELL-SSE:       # %bb.0:
   8937 ; BROADWELL-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [2:1.00]
   8938 ; BROADWELL-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
   8939 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   8940 ;
   8941 ; BROADWELL-LABEL: test_pextrw:
   8942 ; BROADWELL:       # %bb.0:
   8943 ; BROADWELL-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:1.00]
   8944 ; BROADWELL-NEXT:    # kill: def $ax killed $ax killed $eax
   8945 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   8946 ;
   8947 ; SKYLAKE-SSE-LABEL: test_pextrw:
   8948 ; SKYLAKE-SSE:       # %bb.0:
   8949 ; SKYLAKE-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
   8950 ; SKYLAKE-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
   8951 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   8952 ;
   8953 ; SKYLAKE-LABEL: test_pextrw:
   8954 ; SKYLAKE:       # %bb.0:
   8955 ; SKYLAKE-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
   8956 ; SKYLAKE-NEXT:    # kill: def $ax killed $ax killed $eax
   8957 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   8958 ;
   8959 ; SKX-SSE-LABEL: test_pextrw:
   8960 ; SKX-SSE:       # %bb.0:
   8961 ; SKX-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
   8962 ; SKX-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
   8963 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   8964 ;
   8965 ; SKX-LABEL: test_pextrw:
   8966 ; SKX:       # %bb.0:
   8967 ; SKX-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
   8968 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
   8969 ; SKX-NEXT:    retq # sched: [7:1.00]
   8970 ;
   8971 ; BTVER2-SSE-LABEL: test_pextrw:
   8972 ; BTVER2-SSE:       # %bb.0:
   8973 ; BTVER2-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [3:1.00]
   8974 ; BTVER2-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
   8975 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   8976 ;
   8977 ; BTVER2-LABEL: test_pextrw:
   8978 ; BTVER2:       # %bb.0:
   8979 ; BTVER2-NEXT:    vpextrw $6, %xmm0, %eax # sched: [3:1.00]
   8980 ; BTVER2-NEXT:    # kill: def $ax killed $ax killed $eax
   8981 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   8982 ;
   8983 ; ZNVER1-SSE-LABEL: test_pextrw:
   8984 ; ZNVER1-SSE:       # %bb.0:
   8985 ; ZNVER1-SSE-NEXT:    pextrw $6, %xmm0, %eax # sched: [2:2.00]
   8986 ; ZNVER1-SSE-NEXT:    # kill: def $ax killed $ax killed $eax
   8987 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   8988 ;
   8989 ; ZNVER1-LABEL: test_pextrw:
   8990 ; ZNVER1:       # %bb.0:
   8991 ; ZNVER1-NEXT:    vpextrw $6, %xmm0, %eax # sched: [2:2.00]
   8992 ; ZNVER1-NEXT:    # kill: def $ax killed $ax killed $eax
   8993 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   8994   %1 = extractelement <8 x i16> %a0, i32 6
   8995   ret i16 %1
   8996 }
   8997 
   8998 define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) {
   8999 ; GENERIC-LABEL: test_pinsrw:
   9000 ; GENERIC:       # %bb.0:
   9001 ; GENERIC-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
   9002 ; GENERIC-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
   9003 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9004 ;
   9005 ; ATOM-LABEL: test_pinsrw:
   9006 ; ATOM:       # %bb.0:
   9007 ; ATOM-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [1:1.00]
   9008 ; ATOM-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [1:1.00]
   9009 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9010 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9011 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9012 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9013 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9014 ;
   9015 ; SLM-LABEL: test_pinsrw:
   9016 ; SLM:       # %bb.0:
   9017 ; SLM-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [1:1.00]
   9018 ; SLM-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
   9019 ; SLM-NEXT:    retq # sched: [4:1.00]
   9020 ;
   9021 ; SANDY-SSE-LABEL: test_pinsrw:
   9022 ; SANDY-SSE:       # %bb.0:
   9023 ; SANDY-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
   9024 ; SANDY-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
   9025 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   9026 ;
   9027 ; SANDY-LABEL: test_pinsrw:
   9028 ; SANDY:       # %bb.0:
   9029 ; SANDY-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
   9030 ; SANDY-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
   9031 ; SANDY-NEXT:    retq # sched: [1:1.00]
   9032 ;
   9033 ; HASWELL-SSE-LABEL: test_pinsrw:
   9034 ; HASWELL-SSE:       # %bb.0:
   9035 ; HASWELL-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
   9036 ; HASWELL-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
   9037 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9038 ;
   9039 ; HASWELL-LABEL: test_pinsrw:
   9040 ; HASWELL:       # %bb.0:
   9041 ; HASWELL-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   9042 ; HASWELL-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   9043 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   9044 ;
   9045 ; BROADWELL-SSE-LABEL: test_pinsrw:
   9046 ; BROADWELL-SSE:       # %bb.0:
   9047 ; BROADWELL-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
   9048 ; BROADWELL-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
   9049 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9050 ;
   9051 ; BROADWELL-LABEL: test_pinsrw:
   9052 ; BROADWELL:       # %bb.0:
   9053 ; BROADWELL-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   9054 ; BROADWELL-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   9055 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   9056 ;
   9057 ; SKYLAKE-SSE-LABEL: test_pinsrw:
   9058 ; SKYLAKE-SSE:       # %bb.0:
   9059 ; SKYLAKE-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
   9060 ; SKYLAKE-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
   9061 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   9062 ;
   9063 ; SKYLAKE-LABEL: test_pinsrw:
   9064 ; SKYLAKE:       # %bb.0:
   9065 ; SKYLAKE-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   9066 ; SKYLAKE-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   9067 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   9068 ;
   9069 ; SKX-SSE-LABEL: test_pinsrw:
   9070 ; SKX-SSE:       # %bb.0:
   9071 ; SKX-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
   9072 ; SKX-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
   9073 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   9074 ;
   9075 ; SKX-LABEL: test_pinsrw:
   9076 ; SKX:       # %bb.0:
   9077 ; SKX-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
   9078 ; SKX-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
   9079 ; SKX-NEXT:    retq # sched: [7:1.00]
   9080 ;
   9081 ; BTVER2-SSE-LABEL: test_pinsrw:
   9082 ; BTVER2-SSE:       # %bb.0:
   9083 ; BTVER2-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [7:0.50]
   9084 ; BTVER2-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
   9085 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   9086 ;
   9087 ; BTVER2-LABEL: test_pinsrw:
   9088 ; BTVER2:       # %bb.0:
   9089 ; BTVER2-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
   9090 ; BTVER2-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
   9091 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   9092 ;
   9093 ; ZNVER1-SSE-LABEL: test_pinsrw:
   9094 ; ZNVER1-SSE:       # %bb.0:
   9095 ; ZNVER1-SSE-NEXT:    pinsrw $1, %edi, %xmm0 # sched: [1:0.25]
   9096 ; ZNVER1-SSE-NEXT:    pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50]
   9097 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   9098 ;
   9099 ; ZNVER1-LABEL: test_pinsrw:
   9100 ; ZNVER1:       # %bb.0:
   9101 ; ZNVER1-NEXT:    vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
   9102 ; ZNVER1-NEXT:    vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50]
   9103 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   9104   %1 = insertelement <8 x i16> %a0, i16 %a1, i32 1
   9105   %2 = load i16, i16 *%a2
   9106   %3 = insertelement <8 x i16> %1, i16 %2, i32 3
   9107   ret <8 x i16> %3
   9108 }
   9109 
   9110 define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   9111 ; GENERIC-LABEL: test_pmaddwd:
   9112 ; GENERIC:       # %bb.0:
   9113 ; GENERIC-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
   9114 ; GENERIC-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
   9115 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9116 ;
   9117 ; ATOM-LABEL: test_pmaddwd:
   9118 ; ATOM:       # %bb.0:
   9119 ; ATOM-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:5.00]
   9120 ; ATOM-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [5:5.00]
   9121 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9122 ;
   9123 ; SLM-LABEL: test_pmaddwd:
   9124 ; SLM:       # %bb.0:
   9125 ; SLM-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
   9126 ; SLM-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
   9127 ; SLM-NEXT:    retq # sched: [4:1.00]
   9128 ;
   9129 ; SANDY-SSE-LABEL: test_pmaddwd:
   9130 ; SANDY-SSE:       # %bb.0:
   9131 ; SANDY-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
   9132 ; SANDY-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
   9133 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   9134 ;
   9135 ; SANDY-LABEL: test_pmaddwd:
   9136 ; SANDY:       # %bb.0:
   9137 ; SANDY-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   9138 ; SANDY-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   9139 ; SANDY-NEXT:    retq # sched: [1:1.00]
   9140 ;
   9141 ; HASWELL-SSE-LABEL: test_pmaddwd:
   9142 ; HASWELL-SSE:       # %bb.0:
   9143 ; HASWELL-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
   9144 ; HASWELL-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
   9145 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9146 ;
   9147 ; HASWELL-LABEL: test_pmaddwd:
   9148 ; HASWELL:       # %bb.0:
   9149 ; HASWELL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   9150 ; HASWELL-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   9151 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   9152 ;
   9153 ; BROADWELL-SSE-LABEL: test_pmaddwd:
   9154 ; BROADWELL-SSE:       # %bb.0:
   9155 ; BROADWELL-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
   9156 ; BROADWELL-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [10:1.00]
   9157 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9158 ;
   9159 ; BROADWELL-LABEL: test_pmaddwd:
   9160 ; BROADWELL:       # %bb.0:
   9161 ; BROADWELL-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   9162 ; BROADWELL-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   9163 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   9164 ;
   9165 ; SKYLAKE-SSE-LABEL: test_pmaddwd:
   9166 ; SKYLAKE-SSE:       # %bb.0:
   9167 ; SKYLAKE-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
   9168 ; SKYLAKE-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
   9169 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   9170 ;
   9171 ; SKYLAKE-LABEL: test_pmaddwd:
   9172 ; SKYLAKE:       # %bb.0:
   9173 ; SKYLAKE-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   9174 ; SKYLAKE-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   9175 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   9176 ;
   9177 ; SKX-SSE-LABEL: test_pmaddwd:
   9178 ; SKX-SSE:       # %bb.0:
   9179 ; SKX-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
   9180 ; SKX-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
   9181 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   9182 ;
   9183 ; SKX-LABEL: test_pmaddwd:
   9184 ; SKX:       # %bb.0:
   9185 ; SKX-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   9186 ; SKX-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   9187 ; SKX-NEXT:    retq # sched: [7:1.00]
   9188 ;
   9189 ; BTVER2-SSE-LABEL: test_pmaddwd:
   9190 ; BTVER2-SSE:       # %bb.0:
   9191 ; BTVER2-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [2:1.00]
   9192 ; BTVER2-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
   9193 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   9194 ;
   9195 ; BTVER2-LABEL: test_pmaddwd:
   9196 ; BTVER2:       # %bb.0:
   9197 ; BTVER2-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   9198 ; BTVER2-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   9199 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   9200 ;
   9201 ; ZNVER1-SSE-LABEL: test_pmaddwd:
   9202 ; ZNVER1-SSE:       # %bb.0:
   9203 ; ZNVER1-SSE-NEXT:    pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
   9204 ; ZNVER1-SSE-NEXT:    pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
   9205 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   9206 ;
   9207 ; ZNVER1-LABEL: test_pmaddwd:
   9208 ; ZNVER1:       # %bb.0:
   9209 ; ZNVER1-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
   9210 ; ZNVER1-NEXT:    vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   9211 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   9212   %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
   9213   %2 = bitcast <4 x i32> %1 to <8 x i16>
   9214   %3 = load <8 x i16>, <8 x i16> *%a2, align 16
   9215   %4 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %2, <8 x i16> %3)
   9216   ret <4 x i32> %4
   9217 }
   9218 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
   9219 
   9220 define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   9221 ; GENERIC-LABEL: test_pmaxsw:
   9222 ; GENERIC:       # %bb.0:
   9223 ; GENERIC-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
   9224 ; GENERIC-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
   9225 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9226 ;
   9227 ; ATOM-LABEL: test_pmaxsw:
   9228 ; ATOM:       # %bb.0:
   9229 ; ATOM-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
   9230 ; ATOM-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [1:1.00]
   9231 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9232 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9233 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9234 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9235 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9236 ;
   9237 ; SLM-LABEL: test_pmaxsw:
   9238 ; SLM:       # %bb.0:
   9239 ; SLM-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
   9240 ; SLM-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [4:1.00]
   9241 ; SLM-NEXT:    retq # sched: [4:1.00]
   9242 ;
   9243 ; SANDY-SSE-LABEL: test_pmaxsw:
   9244 ; SANDY-SSE:       # %bb.0:
   9245 ; SANDY-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
   9246 ; SANDY-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
   9247 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   9248 ;
   9249 ; SANDY-LABEL: test_pmaxsw:
   9250 ; SANDY:       # %bb.0:
   9251 ; SANDY-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9252 ; SANDY-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9253 ; SANDY-NEXT:    retq # sched: [1:1.00]
   9254 ;
   9255 ; HASWELL-SSE-LABEL: test_pmaxsw:
   9256 ; HASWELL-SSE:       # %bb.0:
   9257 ; HASWELL-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
   9258 ; HASWELL-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
   9259 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9260 ;
   9261 ; HASWELL-LABEL: test_pmaxsw:
   9262 ; HASWELL:       # %bb.0:
   9263 ; HASWELL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9264 ; HASWELL-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9265 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   9266 ;
   9267 ; BROADWELL-SSE-LABEL: test_pmaxsw:
   9268 ; BROADWELL-SSE:       # %bb.0:
   9269 ; BROADWELL-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
   9270 ; BROADWELL-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [6:0.50]
   9271 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9272 ;
   9273 ; BROADWELL-LABEL: test_pmaxsw:
   9274 ; BROADWELL:       # %bb.0:
   9275 ; BROADWELL-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9276 ; BROADWELL-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   9277 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   9278 ;
   9279 ; SKYLAKE-SSE-LABEL: test_pmaxsw:
   9280 ; SKYLAKE-SSE:       # %bb.0:
   9281 ; SKYLAKE-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
   9282 ; SKYLAKE-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
   9283 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   9284 ;
   9285 ; SKYLAKE-LABEL: test_pmaxsw:
   9286 ; SKYLAKE:       # %bb.0:
   9287 ; SKYLAKE-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9288 ; SKYLAKE-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9289 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   9290 ;
   9291 ; SKX-SSE-LABEL: test_pmaxsw:
   9292 ; SKX-SSE:       # %bb.0:
   9293 ; SKX-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
   9294 ; SKX-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
   9295 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   9296 ;
   9297 ; SKX-LABEL: test_pmaxsw:
   9298 ; SKX:       # %bb.0:
   9299 ; SKX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9300 ; SKX-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9301 ; SKX-NEXT:    retq # sched: [7:1.00]
   9302 ;
   9303 ; BTVER2-SSE-LABEL: test_pmaxsw:
   9304 ; BTVER2-SSE:       # %bb.0:
   9305 ; BTVER2-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
   9306 ; BTVER2-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [6:1.00]
   9307 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   9308 ;
   9309 ; BTVER2-LABEL: test_pmaxsw:
   9310 ; BTVER2:       # %bb.0:
   9311 ; BTVER2-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9312 ; BTVER2-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   9313 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   9314 ;
   9315 ; ZNVER1-SSE-LABEL: test_pmaxsw:
   9316 ; ZNVER1-SSE:       # %bb.0:
   9317 ; ZNVER1-SSE-NEXT:    pmaxsw %xmm1, %xmm0 # sched: [1:0.25]
   9318 ; ZNVER1-SSE-NEXT:    pmaxsw (%rdi), %xmm0 # sched: [8:0.50]
   9319 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   9320 ;
   9321 ; ZNVER1-LABEL: test_pmaxsw:
   9322 ; ZNVER1:       # %bb.0:
   9323 ; ZNVER1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   9324 ; ZNVER1-NEXT:    vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   9325 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   9326   %1 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
   9327   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   9328   %3 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %1, <8 x i16> %2)
   9329   ret <8 x i16> %3
   9330 }
   9331 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
   9332 
   9333 define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   9334 ; GENERIC-LABEL: test_pmaxub:
   9335 ; GENERIC:       # %bb.0:
   9336 ; GENERIC-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
   9337 ; GENERIC-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
   9338 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9339 ;
   9340 ; ATOM-LABEL: test_pmaxub:
   9341 ; ATOM:       # %bb.0:
   9342 ; ATOM-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
   9343 ; ATOM-NEXT:    pmaxub (%rdi), %xmm0 # sched: [1:1.00]
   9344 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9345 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9346 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9347 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9348 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9349 ;
   9350 ; SLM-LABEL: test_pmaxub:
   9351 ; SLM:       # %bb.0:
   9352 ; SLM-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
   9353 ; SLM-NEXT:    pmaxub (%rdi), %xmm0 # sched: [4:1.00]
   9354 ; SLM-NEXT:    retq # sched: [4:1.00]
   9355 ;
   9356 ; SANDY-SSE-LABEL: test_pmaxub:
   9357 ; SANDY-SSE:       # %bb.0:
   9358 ; SANDY-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
   9359 ; SANDY-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
   9360 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   9361 ;
   9362 ; SANDY-LABEL: test_pmaxub:
   9363 ; SANDY:       # %bb.0:
   9364 ; SANDY-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9365 ; SANDY-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9366 ; SANDY-NEXT:    retq # sched: [1:1.00]
   9367 ;
   9368 ; HASWELL-SSE-LABEL: test_pmaxub:
   9369 ; HASWELL-SSE:       # %bb.0:
   9370 ; HASWELL-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
   9371 ; HASWELL-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
   9372 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9373 ;
   9374 ; HASWELL-LABEL: test_pmaxub:
   9375 ; HASWELL:       # %bb.0:
   9376 ; HASWELL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9377 ; HASWELL-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9378 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   9379 ;
   9380 ; BROADWELL-SSE-LABEL: test_pmaxub:
   9381 ; BROADWELL-SSE:       # %bb.0:
   9382 ; BROADWELL-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
   9383 ; BROADWELL-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [6:0.50]
   9384 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9385 ;
   9386 ; BROADWELL-LABEL: test_pmaxub:
   9387 ; BROADWELL:       # %bb.0:
   9388 ; BROADWELL-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9389 ; BROADWELL-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   9390 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   9391 ;
   9392 ; SKYLAKE-SSE-LABEL: test_pmaxub:
   9393 ; SKYLAKE-SSE:       # %bb.0:
   9394 ; SKYLAKE-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
   9395 ; SKYLAKE-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
   9396 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   9397 ;
   9398 ; SKYLAKE-LABEL: test_pmaxub:
   9399 ; SKYLAKE:       # %bb.0:
   9400 ; SKYLAKE-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9401 ; SKYLAKE-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9402 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   9403 ;
   9404 ; SKX-SSE-LABEL: test_pmaxub:
   9405 ; SKX-SSE:       # %bb.0:
   9406 ; SKX-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
   9407 ; SKX-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [7:0.50]
   9408 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   9409 ;
   9410 ; SKX-LABEL: test_pmaxub:
   9411 ; SKX:       # %bb.0:
   9412 ; SKX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9413 ; SKX-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9414 ; SKX-NEXT:    retq # sched: [7:1.00]
   9415 ;
   9416 ; BTVER2-SSE-LABEL: test_pmaxub:
   9417 ; BTVER2-SSE:       # %bb.0:
   9418 ; BTVER2-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.50]
   9419 ; BTVER2-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [6:1.00]
   9420 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   9421 ;
   9422 ; BTVER2-LABEL: test_pmaxub:
   9423 ; BTVER2:       # %bb.0:
   9424 ; BTVER2-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9425 ; BTVER2-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   9426 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   9427 ;
   9428 ; ZNVER1-SSE-LABEL: test_pmaxub:
   9429 ; ZNVER1-SSE:       # %bb.0:
   9430 ; ZNVER1-SSE-NEXT:    pmaxub %xmm1, %xmm0 # sched: [1:0.25]
   9431 ; ZNVER1-SSE-NEXT:    pmaxub (%rdi), %xmm0 # sched: [8:0.50]
   9432 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   9433 ;
   9434 ; ZNVER1-LABEL: test_pmaxub:
   9435 ; ZNVER1:       # %bb.0:
   9436 ; ZNVER1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   9437 ; ZNVER1-NEXT:    vpmaxub (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   9438 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   9439   %1 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
   9440   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   9441   %3 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %1, <16 x i8> %2)
   9442   ret <16 x i8> %3
   9443 }
   9444 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
   9445 
   9446 define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   9447 ; GENERIC-LABEL: test_pminsw:
   9448 ; GENERIC:       # %bb.0:
   9449 ; GENERIC-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
   9450 ; GENERIC-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
   9451 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9452 ;
   9453 ; ATOM-LABEL: test_pminsw:
   9454 ; ATOM:       # %bb.0:
   9455 ; ATOM-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
   9456 ; ATOM-NEXT:    pminsw (%rdi), %xmm0 # sched: [1:1.00]
   9457 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9458 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9459 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9460 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9461 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9462 ;
   9463 ; SLM-LABEL: test_pminsw:
   9464 ; SLM:       # %bb.0:
   9465 ; SLM-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
   9466 ; SLM-NEXT:    pminsw (%rdi), %xmm0 # sched: [4:1.00]
   9467 ; SLM-NEXT:    retq # sched: [4:1.00]
   9468 ;
   9469 ; SANDY-SSE-LABEL: test_pminsw:
   9470 ; SANDY-SSE:       # %bb.0:
   9471 ; SANDY-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
   9472 ; SANDY-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
   9473 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   9474 ;
   9475 ; SANDY-LABEL: test_pminsw:
   9476 ; SANDY:       # %bb.0:
   9477 ; SANDY-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9478 ; SANDY-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9479 ; SANDY-NEXT:    retq # sched: [1:1.00]
   9480 ;
   9481 ; HASWELL-SSE-LABEL: test_pminsw:
   9482 ; HASWELL-SSE:       # %bb.0:
   9483 ; HASWELL-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
   9484 ; HASWELL-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
   9485 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9486 ;
   9487 ; HASWELL-LABEL: test_pminsw:
   9488 ; HASWELL:       # %bb.0:
   9489 ; HASWELL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9490 ; HASWELL-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9491 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   9492 ;
   9493 ; BROADWELL-SSE-LABEL: test_pminsw:
   9494 ; BROADWELL-SSE:       # %bb.0:
   9495 ; BROADWELL-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
   9496 ; BROADWELL-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [6:0.50]
   9497 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9498 ;
   9499 ; BROADWELL-LABEL: test_pminsw:
   9500 ; BROADWELL:       # %bb.0:
   9501 ; BROADWELL-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9502 ; BROADWELL-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   9503 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   9504 ;
   9505 ; SKYLAKE-SSE-LABEL: test_pminsw:
   9506 ; SKYLAKE-SSE:       # %bb.0:
   9507 ; SKYLAKE-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
   9508 ; SKYLAKE-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
   9509 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   9510 ;
   9511 ; SKYLAKE-LABEL: test_pminsw:
   9512 ; SKYLAKE:       # %bb.0:
   9513 ; SKYLAKE-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9514 ; SKYLAKE-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9515 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   9516 ;
   9517 ; SKX-SSE-LABEL: test_pminsw:
   9518 ; SKX-SSE:       # %bb.0:
   9519 ; SKX-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
   9520 ; SKX-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [7:0.50]
   9521 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   9522 ;
   9523 ; SKX-LABEL: test_pminsw:
   9524 ; SKX:       # %bb.0:
   9525 ; SKX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9526 ; SKX-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9527 ; SKX-NEXT:    retq # sched: [7:1.00]
   9528 ;
   9529 ; BTVER2-SSE-LABEL: test_pminsw:
   9530 ; BTVER2-SSE:       # %bb.0:
   9531 ; BTVER2-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.50]
   9532 ; BTVER2-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [6:1.00]
   9533 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   9534 ;
   9535 ; BTVER2-LABEL: test_pminsw:
   9536 ; BTVER2:       # %bb.0:
   9537 ; BTVER2-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9538 ; BTVER2-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   9539 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   9540 ;
   9541 ; ZNVER1-SSE-LABEL: test_pminsw:
   9542 ; ZNVER1-SSE:       # %bb.0:
   9543 ; ZNVER1-SSE-NEXT:    pminsw %xmm1, %xmm0 # sched: [1:0.25]
   9544 ; ZNVER1-SSE-NEXT:    pminsw (%rdi), %xmm0 # sched: [8:0.50]
   9545 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   9546 ;
   9547 ; ZNVER1-LABEL: test_pminsw:
   9548 ; ZNVER1:       # %bb.0:
   9549 ; ZNVER1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   9550 ; ZNVER1-NEXT:    vpminsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   9551 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   9552   %1 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
   9553   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   9554   %3 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %1, <8 x i16> %2)
   9555   ret <8 x i16> %3
   9556 }
   9557 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
   9558 
   9559 define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   9560 ; GENERIC-LABEL: test_pminub:
   9561 ; GENERIC:       # %bb.0:
   9562 ; GENERIC-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
   9563 ; GENERIC-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
   9564 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9565 ;
   9566 ; ATOM-LABEL: test_pminub:
   9567 ; ATOM:       # %bb.0:
   9568 ; ATOM-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
   9569 ; ATOM-NEXT:    pminub (%rdi), %xmm0 # sched: [1:1.00]
   9570 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9571 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9572 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9573 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9574 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9575 ;
   9576 ; SLM-LABEL: test_pminub:
   9577 ; SLM:       # %bb.0:
   9578 ; SLM-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
   9579 ; SLM-NEXT:    pminub (%rdi), %xmm0 # sched: [4:1.00]
   9580 ; SLM-NEXT:    retq # sched: [4:1.00]
   9581 ;
   9582 ; SANDY-SSE-LABEL: test_pminub:
   9583 ; SANDY-SSE:       # %bb.0:
   9584 ; SANDY-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
   9585 ; SANDY-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
   9586 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   9587 ;
   9588 ; SANDY-LABEL: test_pminub:
   9589 ; SANDY:       # %bb.0:
   9590 ; SANDY-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9591 ; SANDY-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9592 ; SANDY-NEXT:    retq # sched: [1:1.00]
   9593 ;
   9594 ; HASWELL-SSE-LABEL: test_pminub:
   9595 ; HASWELL-SSE:       # %bb.0:
   9596 ; HASWELL-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
   9597 ; HASWELL-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
   9598 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9599 ;
   9600 ; HASWELL-LABEL: test_pminub:
   9601 ; HASWELL:       # %bb.0:
   9602 ; HASWELL-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9603 ; HASWELL-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9604 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   9605 ;
   9606 ; BROADWELL-SSE-LABEL: test_pminub:
   9607 ; BROADWELL-SSE:       # %bb.0:
   9608 ; BROADWELL-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
   9609 ; BROADWELL-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [6:0.50]
   9610 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9611 ;
   9612 ; BROADWELL-LABEL: test_pminub:
   9613 ; BROADWELL:       # %bb.0:
   9614 ; BROADWELL-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9615 ; BROADWELL-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   9616 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   9617 ;
   9618 ; SKYLAKE-SSE-LABEL: test_pminub:
   9619 ; SKYLAKE-SSE:       # %bb.0:
   9620 ; SKYLAKE-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
   9621 ; SKYLAKE-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
   9622 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   9623 ;
   9624 ; SKYLAKE-LABEL: test_pminub:
   9625 ; SKYLAKE:       # %bb.0:
   9626 ; SKYLAKE-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9627 ; SKYLAKE-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9628 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   9629 ;
   9630 ; SKX-SSE-LABEL: test_pminub:
   9631 ; SKX-SSE:       # %bb.0:
   9632 ; SKX-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
   9633 ; SKX-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [7:0.50]
   9634 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   9635 ;
   9636 ; SKX-LABEL: test_pminub:
   9637 ; SKX:       # %bb.0:
   9638 ; SKX-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9639 ; SKX-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   9640 ; SKX-NEXT:    retq # sched: [7:1.00]
   9641 ;
   9642 ; BTVER2-SSE-LABEL: test_pminub:
   9643 ; BTVER2-SSE:       # %bb.0:
   9644 ; BTVER2-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.50]
   9645 ; BTVER2-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [6:1.00]
   9646 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   9647 ;
   9648 ; BTVER2-LABEL: test_pminub:
   9649 ; BTVER2:       # %bb.0:
   9650 ; BTVER2-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   9651 ; BTVER2-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   9652 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   9653 ;
   9654 ; ZNVER1-SSE-LABEL: test_pminub:
   9655 ; ZNVER1-SSE:       # %bb.0:
   9656 ; ZNVER1-SSE-NEXT:    pminub %xmm1, %xmm0 # sched: [1:0.25]
   9657 ; ZNVER1-SSE-NEXT:    pminub (%rdi), %xmm0 # sched: [8:0.50]
   9658 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   9659 ;
   9660 ; ZNVER1-LABEL: test_pminub:
   9661 ; ZNVER1:       # %bb.0:
   9662 ; ZNVER1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   9663 ; ZNVER1-NEXT:    vpminub (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   9664 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   9665   %1 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
   9666   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   9667   %3 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %1, <16 x i8> %2)
   9668   ret <16 x i8> %3
   9669 }
   9670 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
   9671 
   9672 define i32 @test_pmovmskb(<16 x i8> %a0) {
   9673 ; GENERIC-LABEL: test_pmovmskb:
   9674 ; GENERIC:       # %bb.0:
   9675 ; GENERIC-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
   9676 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9677 ;
   9678 ; ATOM-LABEL: test_pmovmskb:
   9679 ; ATOM:       # %bb.0:
   9680 ; ATOM-NEXT:    pmovmskb %xmm0, %eax # sched: [3:3.00]
   9681 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9682 ; ATOM-NEXT:    nop # sched: [1:0.50]
   9683 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9684 ;
   9685 ; SLM-LABEL: test_pmovmskb:
   9686 ; SLM:       # %bb.0:
   9687 ; SLM-NEXT:    pmovmskb %xmm0, %eax # sched: [4:1.00]
   9688 ; SLM-NEXT:    retq # sched: [4:1.00]
   9689 ;
   9690 ; SANDY-SSE-LABEL: test_pmovmskb:
   9691 ; SANDY-SSE:       # %bb.0:
   9692 ; SANDY-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
   9693 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   9694 ;
   9695 ; SANDY-LABEL: test_pmovmskb:
   9696 ; SANDY:       # %bb.0:
   9697 ; SANDY-NEXT:    vpmovmskb %xmm0, %eax # sched: [2:1.00]
   9698 ; SANDY-NEXT:    retq # sched: [1:1.00]
   9699 ;
   9700 ; HASWELL-SSE-LABEL: test_pmovmskb:
   9701 ; HASWELL-SSE:       # %bb.0:
   9702 ; HASWELL-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [3:1.00]
   9703 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9704 ;
   9705 ; HASWELL-LABEL: test_pmovmskb:
   9706 ; HASWELL:       # %bb.0:
   9707 ; HASWELL-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
   9708 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   9709 ;
   9710 ; BROADWELL-SSE-LABEL: test_pmovmskb:
   9711 ; BROADWELL-SSE:       # %bb.0:
   9712 ; BROADWELL-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [3:1.00]
   9713 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9714 ;
   9715 ; BROADWELL-LABEL: test_pmovmskb:
   9716 ; BROADWELL:       # %bb.0:
   9717 ; BROADWELL-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
   9718 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   9719 ;
   9720 ; SKYLAKE-SSE-LABEL: test_pmovmskb:
   9721 ; SKYLAKE-SSE:       # %bb.0:
   9722 ; SKYLAKE-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
   9723 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   9724 ;
   9725 ; SKYLAKE-LABEL: test_pmovmskb:
   9726 ; SKYLAKE:       # %bb.0:
   9727 ; SKYLAKE-NEXT:    vpmovmskb %xmm0, %eax # sched: [2:1.00]
   9728 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   9729 ;
   9730 ; SKX-SSE-LABEL: test_pmovmskb:
   9731 ; SKX-SSE:       # %bb.0:
   9732 ; SKX-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [2:1.00]
   9733 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   9734 ;
   9735 ; SKX-LABEL: test_pmovmskb:
   9736 ; SKX:       # %bb.0:
   9737 ; SKX-NEXT:    vpmovmskb %xmm0, %eax # sched: [2:1.00]
   9738 ; SKX-NEXT:    retq # sched: [7:1.00]
   9739 ;
   9740 ; BTVER2-SSE-LABEL: test_pmovmskb:
   9741 ; BTVER2-SSE:       # %bb.0:
   9742 ; BTVER2-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [3:1.00]
   9743 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   9744 ;
   9745 ; BTVER2-LABEL: test_pmovmskb:
   9746 ; BTVER2:       # %bb.0:
   9747 ; BTVER2-NEXT:    vpmovmskb %xmm0, %eax # sched: [3:1.00]
   9748 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   9749 ;
   9750 ; ZNVER1-SSE-LABEL: test_pmovmskb:
   9751 ; ZNVER1-SSE:       # %bb.0:
   9752 ; ZNVER1-SSE-NEXT:    pmovmskb %xmm0, %eax # sched: [1:1.00]
   9753 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   9754 ;
   9755 ; ZNVER1-LABEL: test_pmovmskb:
   9756 ; ZNVER1:       # %bb.0:
   9757 ; ZNVER1-NEXT:    vpmovmskb %xmm0, %eax # sched: [1:1.00]
   9758 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   9759   %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
   9760   ret i32 %1
   9761 }
   9762 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
   9763 
   9764 define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   9765 ; GENERIC-LABEL: test_pmulhuw:
   9766 ; GENERIC:       # %bb.0:
   9767 ; GENERIC-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
   9768 ; GENERIC-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
   9769 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9770 ;
   9771 ; ATOM-LABEL: test_pmulhuw:
   9772 ; ATOM:       # %bb.0:
   9773 ; ATOM-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:5.00]
   9774 ; ATOM-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [5:5.00]
   9775 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9776 ;
   9777 ; SLM-LABEL: test_pmulhuw:
   9778 ; SLM:       # %bb.0:
   9779 ; SLM-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
   9780 ; SLM-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
   9781 ; SLM-NEXT:    retq # sched: [4:1.00]
   9782 ;
   9783 ; SANDY-SSE-LABEL: test_pmulhuw:
   9784 ; SANDY-SSE:       # %bb.0:
   9785 ; SANDY-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
   9786 ; SANDY-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
   9787 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   9788 ;
   9789 ; SANDY-LABEL: test_pmulhuw:
   9790 ; SANDY:       # %bb.0:
   9791 ; SANDY-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   9792 ; SANDY-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   9793 ; SANDY-NEXT:    retq # sched: [1:1.00]
   9794 ;
   9795 ; HASWELL-SSE-LABEL: test_pmulhuw:
   9796 ; HASWELL-SSE:       # %bb.0:
   9797 ; HASWELL-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
   9798 ; HASWELL-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
   9799 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9800 ;
   9801 ; HASWELL-LABEL: test_pmulhuw:
   9802 ; HASWELL:       # %bb.0:
   9803 ; HASWELL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   9804 ; HASWELL-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   9805 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   9806 ;
   9807 ; BROADWELL-SSE-LABEL: test_pmulhuw:
   9808 ; BROADWELL-SSE:       # %bb.0:
   9809 ; BROADWELL-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
   9810 ; BROADWELL-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [10:1.00]
   9811 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9812 ;
   9813 ; BROADWELL-LABEL: test_pmulhuw:
   9814 ; BROADWELL:       # %bb.0:
   9815 ; BROADWELL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   9816 ; BROADWELL-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   9817 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   9818 ;
   9819 ; SKYLAKE-SSE-LABEL: test_pmulhuw:
   9820 ; SKYLAKE-SSE:       # %bb.0:
   9821 ; SKYLAKE-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
   9822 ; SKYLAKE-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
   9823 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   9824 ;
   9825 ; SKYLAKE-LABEL: test_pmulhuw:
   9826 ; SKYLAKE:       # %bb.0:
   9827 ; SKYLAKE-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   9828 ; SKYLAKE-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   9829 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   9830 ;
   9831 ; SKX-SSE-LABEL: test_pmulhuw:
   9832 ; SKX-SSE:       # %bb.0:
   9833 ; SKX-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
   9834 ; SKX-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
   9835 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   9836 ;
   9837 ; SKX-LABEL: test_pmulhuw:
   9838 ; SKX:       # %bb.0:
   9839 ; SKX-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   9840 ; SKX-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   9841 ; SKX-NEXT:    retq # sched: [7:1.00]
   9842 ;
   9843 ; BTVER2-SSE-LABEL: test_pmulhuw:
   9844 ; BTVER2-SSE:       # %bb.0:
   9845 ; BTVER2-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [2:1.00]
   9846 ; BTVER2-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
   9847 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   9848 ;
   9849 ; BTVER2-LABEL: test_pmulhuw:
   9850 ; BTVER2:       # %bb.0:
   9851 ; BTVER2-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   9852 ; BTVER2-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   9853 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   9854 ;
   9855 ; ZNVER1-SSE-LABEL: test_pmulhuw:
   9856 ; ZNVER1-SSE:       # %bb.0:
   9857 ; ZNVER1-SSE-NEXT:    pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
   9858 ; ZNVER1-SSE-NEXT:    pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
   9859 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   9860 ;
   9861 ; ZNVER1-LABEL: test_pmulhuw:
   9862 ; ZNVER1:       # %bb.0:
   9863 ; ZNVER1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
   9864 ; ZNVER1-NEXT:    vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   9865 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   9866   %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
   9867   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   9868   %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2)
   9869   ret <8 x i16> %3
   9870 }
   9871 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
   9872 
   9873 define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   9874 ; GENERIC-LABEL: test_pmulhw:
   9875 ; GENERIC:       # %bb.0:
   9876 ; GENERIC-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
   9877 ; GENERIC-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
   9878 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9879 ;
   9880 ; ATOM-LABEL: test_pmulhw:
   9881 ; ATOM:       # %bb.0:
   9882 ; ATOM-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:5.00]
   9883 ; ATOM-NEXT:    pmulhw (%rdi), %xmm0 # sched: [5:5.00]
   9884 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9885 ;
   9886 ; SLM-LABEL: test_pmulhw:
   9887 ; SLM:       # %bb.0:
   9888 ; SLM-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:1.00]
   9889 ; SLM-NEXT:    pmulhw (%rdi), %xmm0 # sched: [7:1.00]
   9890 ; SLM-NEXT:    retq # sched: [4:1.00]
   9891 ;
   9892 ; SANDY-SSE-LABEL: test_pmulhw:
   9893 ; SANDY-SSE:       # %bb.0:
   9894 ; SANDY-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
   9895 ; SANDY-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
   9896 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   9897 ;
   9898 ; SANDY-LABEL: test_pmulhw:
   9899 ; SANDY:       # %bb.0:
   9900 ; SANDY-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   9901 ; SANDY-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   9902 ; SANDY-NEXT:    retq # sched: [1:1.00]
   9903 ;
   9904 ; HASWELL-SSE-LABEL: test_pmulhw:
   9905 ; HASWELL-SSE:       # %bb.0:
   9906 ; HASWELL-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
   9907 ; HASWELL-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
   9908 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9909 ;
   9910 ; HASWELL-LABEL: test_pmulhw:
   9911 ; HASWELL:       # %bb.0:
   9912 ; HASWELL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   9913 ; HASWELL-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   9914 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   9915 ;
   9916 ; BROADWELL-SSE-LABEL: test_pmulhw:
   9917 ; BROADWELL-SSE:       # %bb.0:
   9918 ; BROADWELL-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [5:1.00]
   9919 ; BROADWELL-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [10:1.00]
   9920 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   9921 ;
   9922 ; BROADWELL-LABEL: test_pmulhw:
   9923 ; BROADWELL:       # %bb.0:
   9924 ; BROADWELL-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   9925 ; BROADWELL-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   9926 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   9927 ;
   9928 ; SKYLAKE-SSE-LABEL: test_pmulhw:
   9929 ; SKYLAKE-SSE:       # %bb.0:
   9930 ; SKYLAKE-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:0.50]
   9931 ; SKYLAKE-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [10:0.50]
   9932 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   9933 ;
   9934 ; SKYLAKE-LABEL: test_pmulhw:
   9935 ; SKYLAKE:       # %bb.0:
   9936 ; SKYLAKE-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   9937 ; SKYLAKE-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   9938 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   9939 ;
   9940 ; SKX-SSE-LABEL: test_pmulhw:
   9941 ; SKX-SSE:       # %bb.0:
   9942 ; SKX-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:0.50]
   9943 ; SKX-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [10:0.50]
   9944 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   9945 ;
   9946 ; SKX-LABEL: test_pmulhw:
   9947 ; SKX:       # %bb.0:
   9948 ; SKX-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   9949 ; SKX-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   9950 ; SKX-NEXT:    retq # sched: [7:1.00]
   9951 ;
   9952 ; BTVER2-SSE-LABEL: test_pmulhw:
   9953 ; BTVER2-SSE:       # %bb.0:
   9954 ; BTVER2-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [2:1.00]
   9955 ; BTVER2-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [7:1.00]
   9956 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   9957 ;
   9958 ; BTVER2-LABEL: test_pmulhw:
   9959 ; BTVER2:       # %bb.0:
   9960 ; BTVER2-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   9961 ; BTVER2-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   9962 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   9963 ;
   9964 ; ZNVER1-SSE-LABEL: test_pmulhw:
   9965 ; ZNVER1-SSE:       # %bb.0:
   9966 ; ZNVER1-SSE-NEXT:    pmulhw %xmm1, %xmm0 # sched: [4:1.00]
   9967 ; ZNVER1-SSE-NEXT:    pmulhw (%rdi), %xmm0 # sched: [11:1.00]
   9968 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   9969 ;
   9970 ; ZNVER1-LABEL: test_pmulhw:
   9971 ; ZNVER1:       # %bb.0:
   9972 ; ZNVER1-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
   9973 ; ZNVER1-NEXT:    vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   9974 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   9975   %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
   9976   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   9977   %3 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %1, <8 x i16> %2)
   9978   ret <8 x i16> %3
   9979 }
   9980 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
   9981 
   9982 define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   9983 ; GENERIC-LABEL: test_pmullw:
   9984 ; GENERIC:       # %bb.0:
   9985 ; GENERIC-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
   9986 ; GENERIC-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
   9987 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   9988 ;
   9989 ; ATOM-LABEL: test_pmullw:
   9990 ; ATOM:       # %bb.0:
   9991 ; ATOM-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:5.00]
   9992 ; ATOM-NEXT:    pmullw (%rdi), %xmm0 # sched: [5:5.00]
   9993 ; ATOM-NEXT:    retq # sched: [79:39.50]
   9994 ;
   9995 ; SLM-LABEL: test_pmullw:
   9996 ; SLM:       # %bb.0:
   9997 ; SLM-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:1.00]
   9998 ; SLM-NEXT:    pmullw (%rdi), %xmm0 # sched: [7:1.00]
   9999 ; SLM-NEXT:    retq # sched: [4:1.00]
   10000 ;
   10001 ; SANDY-SSE-LABEL: test_pmullw:
   10002 ; SANDY-SSE:       # %bb.0:
   10003 ; SANDY-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
   10004 ; SANDY-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
   10005 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   10006 ;
   10007 ; SANDY-LABEL: test_pmullw:
   10008 ; SANDY:       # %bb.0:
   10009 ; SANDY-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   10010 ; SANDY-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   10011 ; SANDY-NEXT:    retq # sched: [1:1.00]
   10012 ;
   10013 ; HASWELL-SSE-LABEL: test_pmullw:
   10014 ; HASWELL-SSE:       # %bb.0:
   10015 ; HASWELL-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
   10016 ; HASWELL-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
   10017 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10018 ;
   10019 ; HASWELL-LABEL: test_pmullw:
   10020 ; HASWELL:       # %bb.0:
   10021 ; HASWELL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   10022 ; HASWELL-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   10023 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   10024 ;
   10025 ; BROADWELL-SSE-LABEL: test_pmullw:
   10026 ; BROADWELL-SSE:       # %bb.0:
   10027 ; BROADWELL-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [5:1.00]
   10028 ; BROADWELL-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [10:1.00]
   10029 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10030 ;
   10031 ; BROADWELL-LABEL: test_pmullw:
   10032 ; BROADWELL:       # %bb.0:
   10033 ; BROADWELL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   10034 ; BROADWELL-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   10035 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   10036 ;
   10037 ; SKYLAKE-SSE-LABEL: test_pmullw:
   10038 ; SKYLAKE-SSE:       # %bb.0:
   10039 ; SKYLAKE-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:0.50]
   10040 ; SKYLAKE-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [10:0.50]
   10041 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   10042 ;
   10043 ; SKYLAKE-LABEL: test_pmullw:
   10044 ; SKYLAKE:       # %bb.0:
   10045 ; SKYLAKE-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   10046 ; SKYLAKE-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   10047 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   10048 ;
   10049 ; SKX-SSE-LABEL: test_pmullw:
   10050 ; SKX-SSE:       # %bb.0:
   10051 ; SKX-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:0.50]
   10052 ; SKX-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [10:0.50]
   10053 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   10054 ;
   10055 ; SKX-LABEL: test_pmullw:
   10056 ; SKX:       # %bb.0:
   10057 ; SKX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   10058 ; SKX-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   10059 ; SKX-NEXT:    retq # sched: [7:1.00]
   10060 ;
   10061 ; BTVER2-SSE-LABEL: test_pmullw:
   10062 ; BTVER2-SSE:       # %bb.0:
   10063 ; BTVER2-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [2:1.00]
   10064 ; BTVER2-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [7:1.00]
   10065 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   10066 ;
   10067 ; BTVER2-LABEL: test_pmullw:
   10068 ; BTVER2:       # %bb.0:
   10069 ; BTVER2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   10070 ; BTVER2-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   10071 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   10072 ;
   10073 ; ZNVER1-SSE-LABEL: test_pmullw:
   10074 ; ZNVER1-SSE:       # %bb.0:
   10075 ; ZNVER1-SSE-NEXT:    pmullw %xmm1, %xmm0 # sched: [4:1.00]
   10076 ; ZNVER1-SSE-NEXT:    pmullw (%rdi), %xmm0 # sched: [11:1.00]
   10077 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   10078 ;
   10079 ; ZNVER1-LABEL: test_pmullw:
   10080 ; ZNVER1:       # %bb.0:
   10081 ; ZNVER1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
   10082 ; ZNVER1-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   10083 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   10084   %1 = mul <8 x i16> %a0, %a1
   10085   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   10086   %3 = mul <8 x i16> %1, %2
   10087   ret <8 x i16> %3
   10088 }
   10089 
   10090 define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   10091 ; GENERIC-LABEL: test_pmuludq:
   10092 ; GENERIC:       # %bb.0:
   10093 ; GENERIC-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
   10094 ; GENERIC-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
   10095 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   10096 ;
   10097 ; ATOM-LABEL: test_pmuludq:
   10098 ; ATOM:       # %bb.0:
   10099 ; ATOM-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:5.00]
   10100 ; ATOM-NEXT:    pmuludq (%rdi), %xmm0 # sched: [5:5.00]
   10101 ; ATOM-NEXT:    retq # sched: [79:39.50]
   10102 ;
   10103 ; SLM-LABEL: test_pmuludq:
   10104 ; SLM:       # %bb.0:
   10105 ; SLM-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:1.00]
   10106 ; SLM-NEXT:    pmuludq (%rdi), %xmm0 # sched: [7:1.00]
   10107 ; SLM-NEXT:    retq # sched: [4:1.00]
   10108 ;
   10109 ; SANDY-SSE-LABEL: test_pmuludq:
   10110 ; SANDY-SSE:       # %bb.0:
   10111 ; SANDY-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
   10112 ; SANDY-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
   10113 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   10114 ;
   10115 ; SANDY-LABEL: test_pmuludq:
   10116 ; SANDY:       # %bb.0:
   10117 ; SANDY-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   10118 ; SANDY-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   10119 ; SANDY-NEXT:    retq # sched: [1:1.00]
   10120 ;
   10121 ; HASWELL-SSE-LABEL: test_pmuludq:
   10122 ; HASWELL-SSE:       # %bb.0:
   10123 ; HASWELL-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
   10124 ; HASWELL-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
   10125 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10126 ;
   10127 ; HASWELL-LABEL: test_pmuludq:
   10128 ; HASWELL:       # %bb.0:
   10129 ; HASWELL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   10130 ; HASWELL-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   10131 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   10132 ;
   10133 ; BROADWELL-SSE-LABEL: test_pmuludq:
   10134 ; BROADWELL-SSE:       # %bb.0:
   10135 ; BROADWELL-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [5:1.00]
   10136 ; BROADWELL-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [10:1.00]
   10137 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10138 ;
   10139 ; BROADWELL-LABEL: test_pmuludq:
   10140 ; BROADWELL:       # %bb.0:
   10141 ; BROADWELL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   10142 ; BROADWELL-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   10143 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   10144 ;
   10145 ; SKYLAKE-SSE-LABEL: test_pmuludq:
   10146 ; SKYLAKE-SSE:       # %bb.0:
   10147 ; SKYLAKE-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:0.50]
   10148 ; SKYLAKE-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [10:0.50]
   10149 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   10150 ;
   10151 ; SKYLAKE-LABEL: test_pmuludq:
   10152 ; SKYLAKE:       # %bb.0:
   10153 ; SKYLAKE-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   10154 ; SKYLAKE-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   10155 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   10156 ;
   10157 ; SKX-SSE-LABEL: test_pmuludq:
   10158 ; SKX-SSE:       # %bb.0:
   10159 ; SKX-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:0.50]
   10160 ; SKX-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [10:0.50]
   10161 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   10162 ;
   10163 ; SKX-LABEL: test_pmuludq:
   10164 ; SKX:       # %bb.0:
   10165 ; SKX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   10166 ; SKX-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   10167 ; SKX-NEXT:    retq # sched: [7:1.00]
   10168 ;
   10169 ; BTVER2-SSE-LABEL: test_pmuludq:
   10170 ; BTVER2-SSE:       # %bb.0:
   10171 ; BTVER2-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [2:1.00]
   10172 ; BTVER2-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [7:1.00]
   10173 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   10174 ;
   10175 ; BTVER2-LABEL: test_pmuludq:
   10176 ; BTVER2:       # %bb.0:
   10177 ; BTVER2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   10178 ; BTVER2-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   10179 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   10180 ;
   10181 ; ZNVER1-SSE-LABEL: test_pmuludq:
   10182 ; ZNVER1-SSE:       # %bb.0:
   10183 ; ZNVER1-SSE-NEXT:    pmuludq %xmm1, %xmm0 # sched: [4:1.00]
   10184 ; ZNVER1-SSE-NEXT:    pmuludq (%rdi), %xmm0 # sched: [11:1.00]
   10185 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   10186 ;
   10187 ; ZNVER1-LABEL: test_pmuludq:
   10188 ; ZNVER1:       # %bb.0:
   10189 ; ZNVER1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
   10190 ; ZNVER1-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   10191 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   10192   %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
   10193   %2 = bitcast <2 x i64> %1 to <4 x i32>
   10194   %3 = load <4 x i32>, <4 x i32> *%a2, align 16
   10195   %4 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %2, <4 x i32> %3)
   10196   ret <2 x i64> %4
   10197 }
   10198 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
   10199 
   10200 define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   10201 ; GENERIC-LABEL: test_por:
   10202 ; GENERIC:       # %bb.0:
   10203 ; GENERIC-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   10204 ; GENERIC-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
   10205 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   10206 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   10207 ;
   10208 ; ATOM-LABEL: test_por:
   10209 ; ATOM:       # %bb.0:
   10210 ; ATOM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   10211 ; ATOM-NEXT:    por (%rdi), %xmm0 # sched: [1:1.00]
   10212 ; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
   10213 ; ATOM-NEXT:    retq # sched: [79:39.50]
   10214 ;
   10215 ; SLM-LABEL: test_por:
   10216 ; SLM:       # %bb.0:
   10217 ; SLM-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   10218 ; SLM-NEXT:    por (%rdi), %xmm0 # sched: [4:1.00]
   10219 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   10220 ; SLM-NEXT:    retq # sched: [4:1.00]
   10221 ;
   10222 ; SANDY-SSE-LABEL: test_por:
   10223 ; SANDY-SSE:       # %bb.0:
   10224 ; SANDY-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   10225 ; SANDY-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
   10226 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   10227 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   10228 ;
   10229 ; SANDY-LABEL: test_por:
   10230 ; SANDY:       # %bb.0:
   10231 ; SANDY-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10232 ; SANDY-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   10233 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10234 ; SANDY-NEXT:    retq # sched: [1:1.00]
   10235 ;
   10236 ; HASWELL-SSE-LABEL: test_por:
   10237 ; HASWELL-SSE:       # %bb.0:
   10238 ; HASWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   10239 ; HASWELL-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
   10240 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   10241 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10242 ;
   10243 ; HASWELL-LABEL: test_por:
   10244 ; HASWELL:       # %bb.0:
   10245 ; HASWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10246 ; HASWELL-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   10247 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10248 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   10249 ;
   10250 ; BROADWELL-SSE-LABEL: test_por:
   10251 ; BROADWELL-SSE:       # %bb.0:
   10252 ; BROADWELL-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   10253 ; BROADWELL-SSE-NEXT:    por (%rdi), %xmm0 # sched: [6:0.50]
   10254 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   10255 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10256 ;
   10257 ; BROADWELL-LABEL: test_por:
   10258 ; BROADWELL:       # %bb.0:
   10259 ; BROADWELL-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10260 ; BROADWELL-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   10261 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10262 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   10263 ;
   10264 ; SKYLAKE-SSE-LABEL: test_por:
   10265 ; SKYLAKE-SSE:       # %bb.0:
   10266 ; SKYLAKE-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   10267 ; SKYLAKE-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
   10268 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   10269 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   10270 ;
   10271 ; SKYLAKE-LABEL: test_por:
   10272 ; SKYLAKE:       # %bb.0:
   10273 ; SKYLAKE-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10274 ; SKYLAKE-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   10275 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10276 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   10277 ;
   10278 ; SKX-SSE-LABEL: test_por:
   10279 ; SKX-SSE:       # %bb.0:
   10280 ; SKX-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.33]
   10281 ; SKX-SSE-NEXT:    por (%rdi), %xmm0 # sched: [7:0.50]
   10282 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   10283 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   10284 ;
   10285 ; SKX-LABEL: test_por:
   10286 ; SKX:       # %bb.0:
   10287 ; SKX-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10288 ; SKX-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   10289 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10290 ; SKX-NEXT:    retq # sched: [7:1.00]
   10291 ;
   10292 ; BTVER2-SSE-LABEL: test_por:
   10293 ; BTVER2-SSE:       # %bb.0:
   10294 ; BTVER2-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.50]
   10295 ; BTVER2-SSE-NEXT:    por (%rdi), %xmm0 # sched: [6:1.00]
   10296 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   10297 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   10298 ;
   10299 ; BTVER2-LABEL: test_por:
   10300 ; BTVER2:       # %bb.0:
   10301 ; BTVER2-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10302 ; BTVER2-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   10303 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10304 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   10305 ;
   10306 ; ZNVER1-SSE-LABEL: test_por:
   10307 ; ZNVER1-SSE:       # %bb.0:
   10308 ; ZNVER1-SSE-NEXT:    por %xmm1, %xmm0 # sched: [1:0.25]
   10309 ; ZNVER1-SSE-NEXT:    por (%rdi), %xmm0 # sched: [8:0.50]
   10310 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   10311 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   10312 ;
   10313 ; ZNVER1-LABEL: test_por:
   10314 ; ZNVER1:       # %bb.0:
   10315 ; ZNVER1-NEXT:    vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   10316 ; ZNVER1-NEXT:    vpor (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   10317 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   10318 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   10319   %1 = or <2 x i64> %a0, %a1
   10320   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   10321   %3 = or <2 x i64> %1, %2
   10322   %4 = add <2 x i64> %3, %a1
   10323   ret <2 x i64> %4
   10324 }
   10325 
   10326 define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   10327 ; GENERIC-LABEL: test_psadbw:
   10328 ; GENERIC:       # %bb.0:
   10329 ; GENERIC-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
   10330 ; GENERIC-NEXT:    psadbw (%rdi), %xmm0 # sched: [11:1.00]
   10331 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   10332 ;
   10333 ; ATOM-LABEL: test_psadbw:
   10334 ; ATOM:       # %bb.0:
   10335 ; ATOM-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:5.00]
   10336 ; ATOM-NEXT:    psadbw (%rdi), %xmm0 # sched: [5:5.00]
   10337 ; ATOM-NEXT:    retq # sched: [79:39.50]
   10338 ;
   10339 ; SLM-LABEL: test_psadbw:
   10340 ; SLM:       # %bb.0:
   10341 ; SLM-NEXT:    psadbw %xmm1, %xmm0 # sched: [4:1.00]
   10342 ; SLM-NEXT:    psadbw (%rdi), %xmm0 # sched: [7:1.00]
   10343 ; SLM-NEXT:    retq # sched: [4:1.00]
   10344 ;
   10345 ; SANDY-SSE-LABEL: test_psadbw:
   10346 ; SANDY-SSE:       # %bb.0:
   10347 ; SANDY-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
   10348 ; SANDY-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [11:1.00]
   10349 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   10350 ;
   10351 ; SANDY-LABEL: test_psadbw:
   10352 ; SANDY:       # %bb.0:
   10353 ; SANDY-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   10354 ; SANDY-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   10355 ; SANDY-NEXT:    retq # sched: [1:1.00]
   10356 ;
   10357 ; HASWELL-SSE-LABEL: test_psadbw:
   10358 ; HASWELL-SSE:       # %bb.0:
   10359 ; HASWELL-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
   10360 ; HASWELL-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [11:1.00]
   10361 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10362 ;
   10363 ; HASWELL-LABEL: test_psadbw:
   10364 ; HASWELL:       # %bb.0:
   10365 ; HASWELL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   10366 ; HASWELL-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   10367 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   10368 ;
   10369 ; BROADWELL-SSE-LABEL: test_psadbw:
   10370 ; BROADWELL-SSE:       # %bb.0:
   10371 ; BROADWELL-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [5:1.00]
   10372 ; BROADWELL-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [10:1.00]
   10373 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10374 ;
   10375 ; BROADWELL-LABEL: test_psadbw:
   10376 ; BROADWELL:       # %bb.0:
   10377 ; BROADWELL-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   10378 ; BROADWELL-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   10379 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   10380 ;
   10381 ; SKYLAKE-SSE-LABEL: test_psadbw:
   10382 ; SKYLAKE-SSE:       # %bb.0:
   10383 ; SKYLAKE-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [3:1.00]
   10384 ; SKYLAKE-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [9:1.00]
   10385 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   10386 ;
   10387 ; SKYLAKE-LABEL: test_psadbw:
   10388 ; SKYLAKE:       # %bb.0:
   10389 ; SKYLAKE-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   10390 ; SKYLAKE-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   10391 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   10392 ;
   10393 ; SKX-SSE-LABEL: test_psadbw:
   10394 ; SKX-SSE:       # %bb.0:
   10395 ; SKX-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [3:1.00]
   10396 ; SKX-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [9:1.00]
   10397 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   10398 ;
   10399 ; SKX-LABEL: test_psadbw:
   10400 ; SKX:       # %bb.0:
   10401 ; SKX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   10402 ; SKX-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   10403 ; SKX-NEXT:    retq # sched: [7:1.00]
   10404 ;
   10405 ; BTVER2-SSE-LABEL: test_psadbw:
   10406 ; BTVER2-SSE:       # %bb.0:
   10407 ; BTVER2-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [2:0.50]
   10408 ; BTVER2-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [7:1.00]
   10409 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   10410 ;
   10411 ; BTVER2-LABEL: test_psadbw:
   10412 ; BTVER2:       # %bb.0:
   10413 ; BTVER2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
   10414 ; BTVER2-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   10415 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   10416 ;
   10417 ; ZNVER1-SSE-LABEL: test_psadbw:
   10418 ; ZNVER1-SSE:       # %bb.0:
   10419 ; ZNVER1-SSE-NEXT:    psadbw %xmm1, %xmm0 # sched: [3:1.00]
   10420 ; ZNVER1-SSE-NEXT:    psadbw (%rdi), %xmm0 # sched: [10:1.00]
   10421 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   10422 ;
   10423 ; ZNVER1-LABEL: test_psadbw:
   10424 ; ZNVER1:       # %bb.0:
   10425 ; ZNVER1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   10426 ; ZNVER1-NEXT:    vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   10427 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   10428   %1 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
   10429   %2 = bitcast <2 x i64> %1 to <16 x i8>
   10430   %3 = load <16 x i8>, <16 x i8> *%a2, align 16
   10431   %4 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %2, <16 x i8> %3)
   10432   ret <2 x i64> %4
   10433 }
   10434 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
   10435 
   10436 define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
   10437 ; GENERIC-LABEL: test_pshufd:
   10438 ; GENERIC:       # %bb.0:
   10439 ; GENERIC-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
   10440 ; GENERIC-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
   10441 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   10442 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   10443 ;
   10444 ; ATOM-LABEL: test_pshufd:
   10445 ; ATOM:       # %bb.0:
   10446 ; ATOM-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
   10447 ; ATOM-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00]
   10448 ; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   10449 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10450 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10451 ; ATOM-NEXT:    retq # sched: [79:39.50]
   10452 ;
   10453 ; SLM-LABEL: test_pshufd:
   10454 ; SLM:       # %bb.0:
   10455 ; SLM-NEXT:    pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [4:1.00]
   10456 ; SLM-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
   10457 ; SLM-NEXT:    paddd %xmm0, %xmm1 # sched: [1:0.50]
   10458 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   10459 ; SLM-NEXT:    retq # sched: [4:1.00]
   10460 ;
   10461 ; SANDY-SSE-LABEL: test_pshufd:
   10462 ; SANDY-SSE:       # %bb.0:
   10463 ; SANDY-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
   10464 ; SANDY-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
   10465 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   10466 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   10467 ;
   10468 ; SANDY-LABEL: test_pshufd:
   10469 ; SANDY:       # %bb.0:
   10470 ; SANDY-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
   10471 ; SANDY-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50]
   10472 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10473 ; SANDY-NEXT:    retq # sched: [1:1.00]
   10474 ;
   10475 ; HASWELL-SSE-LABEL: test_pshufd:
   10476 ; HASWELL-SSE:       # %bb.0:
   10477 ; HASWELL-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
   10478 ; HASWELL-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
   10479 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   10480 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10481 ;
   10482 ; HASWELL-LABEL: test_pshufd:
   10483 ; HASWELL:       # %bb.0:
   10484 ; HASWELL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
   10485 ; HASWELL-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
   10486 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10487 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   10488 ;
   10489 ; BROADWELL-SSE-LABEL: test_pshufd:
   10490 ; BROADWELL-SSE:       # %bb.0:
   10491 ; BROADWELL-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
   10492 ; BROADWELL-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
   10493 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   10494 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10495 ;
   10496 ; BROADWELL-LABEL: test_pshufd:
   10497 ; BROADWELL:       # %bb.0:
   10498 ; BROADWELL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
   10499 ; BROADWELL-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
   10500 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10501 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   10502 ;
   10503 ; SKYLAKE-SSE-LABEL: test_pshufd:
   10504 ; SKYLAKE-SSE:       # %bb.0:
   10505 ; SKYLAKE-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
   10506 ; SKYLAKE-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
   10507 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   10508 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   10509 ;
   10510 ; SKYLAKE-LABEL: test_pshufd:
   10511 ; SKYLAKE:       # %bb.0:
   10512 ; SKYLAKE-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
   10513 ; SKYLAKE-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
   10514 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10515 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   10516 ;
   10517 ; SKX-SSE-LABEL: test_pshufd:
   10518 ; SKX-SSE:       # %bb.0:
   10519 ; SKX-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
   10520 ; SKX-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
   10521 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   10522 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   10523 ;
   10524 ; SKX-LABEL: test_pshufd:
   10525 ; SKX:       # %bb.0:
   10526 ; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
   10527 ; SKX-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
   10528 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10529 ; SKX-NEXT:    retq # sched: [7:1.00]
   10530 ;
   10531 ; BTVER2-SSE-LABEL: test_pshufd:
   10532 ; BTVER2-SSE:       # %bb.0:
   10533 ; BTVER2-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
   10534 ; BTVER2-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
   10535 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   10536 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   10537 ;
   10538 ; BTVER2-LABEL: test_pshufd:
   10539 ; BTVER2:       # %bb.0:
   10540 ; BTVER2-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
   10541 ; BTVER2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
   10542 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10543 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   10544 ;
   10545 ; ZNVER1-SSE-LABEL: test_pshufd:
   10546 ; ZNVER1-SSE:       # %bb.0:
   10547 ; ZNVER1-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25]
   10548 ; ZNVER1-SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50]
   10549 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   10550 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   10551 ;
   10552 ; ZNVER1-LABEL: test_pshufd:
   10553 ; ZNVER1:       # %bb.0:
   10554 ; ZNVER1-NEXT:    vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50]
   10555 ; ZNVER1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.25]
   10556 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   10557 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   10558   %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   10559   %2 = load <4 x i32>, <4 x i32> *%a1, align 16
   10560   %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   10561   %4 = add <4 x i32> %1, %3
   10562   ret <4 x i32> %4
   10563 }
   10564 
   10565 define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
   10566 ; GENERIC-LABEL: test_pshufhw:
   10567 ; GENERIC:       # %bb.0:
   10568 ; GENERIC-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
   10569 ; GENERIC-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
   10570 ; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10571 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   10572 ;
   10573 ; ATOM-LABEL: test_pshufhw:
   10574 ; ATOM:       # %bb.0:
   10575 ; ATOM-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10576 ; ATOM-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
   10577 ; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10578 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10579 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10580 ; ATOM-NEXT:    retq # sched: [79:39.50]
   10581 ;
   10582 ; SLM-LABEL: test_pshufhw:
   10583 ; SLM:       # %bb.0:
   10584 ; SLM-NEXT:    pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [4:1.00]
   10585 ; SLM-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10586 ; SLM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
   10587 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   10588 ; SLM-NEXT:    retq # sched: [4:1.00]
   10589 ;
   10590 ; SANDY-SSE-LABEL: test_pshufhw:
   10591 ; SANDY-SSE:       # %bb.0:
   10592 ; SANDY-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
   10593 ; SANDY-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
   10594 ; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10595 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   10596 ;
   10597 ; SANDY-LABEL: test_pshufhw:
   10598 ; SANDY:       # %bb.0:
   10599 ; SANDY-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
   10600 ; SANDY-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
   10601 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10602 ; SANDY-NEXT:    retq # sched: [1:1.00]
   10603 ;
   10604 ; HASWELL-SSE-LABEL: test_pshufhw:
   10605 ; HASWELL-SSE:       # %bb.0:
   10606 ; HASWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10607 ; HASWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
   10608 ; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10609 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10610 ;
   10611 ; HASWELL-LABEL: test_pshufhw:
   10612 ; HASWELL:       # %bb.0:
   10613 ; HASWELL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10614 ; HASWELL-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
   10615 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10616 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   10617 ;
   10618 ; BROADWELL-SSE-LABEL: test_pshufhw:
   10619 ; BROADWELL-SSE:       # %bb.0:
   10620 ; BROADWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10621 ; BROADWELL-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
   10622 ; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10623 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10624 ;
   10625 ; BROADWELL-LABEL: test_pshufhw:
   10626 ; BROADWELL:       # %bb.0:
   10627 ; BROADWELL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10628 ; BROADWELL-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
   10629 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10630 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   10631 ;
   10632 ; SKYLAKE-SSE-LABEL: test_pshufhw:
   10633 ; SKYLAKE-SSE:       # %bb.0:
   10634 ; SKYLAKE-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10635 ; SKYLAKE-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
   10636 ; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   10637 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   10638 ;
   10639 ; SKYLAKE-LABEL: test_pshufhw:
   10640 ; SKYLAKE:       # %bb.0:
   10641 ; SKYLAKE-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10642 ; SKYLAKE-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
   10643 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10644 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   10645 ;
   10646 ; SKX-SSE-LABEL: test_pshufhw:
   10647 ; SKX-SSE:       # %bb.0:
   10648 ; SKX-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10649 ; SKX-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
   10650 ; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   10651 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   10652 ;
   10653 ; SKX-LABEL: test_pshufhw:
   10654 ; SKX:       # %bb.0:
   10655 ; SKX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
   10656 ; SKX-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
   10657 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10658 ; SKX-NEXT:    retq # sched: [7:1.00]
   10659 ;
   10660 ; BTVER2-SSE-LABEL: test_pshufhw:
   10661 ; BTVER2-SSE:       # %bb.0:
   10662 ; BTVER2-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
   10663 ; BTVER2-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
   10664 ; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10665 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   10666 ;
   10667 ; BTVER2-LABEL: test_pshufhw:
   10668 ; BTVER2:       # %bb.0:
   10669 ; BTVER2-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
   10670 ; BTVER2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
   10671 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10672 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   10673 ;
   10674 ; ZNVER1-SSE-LABEL: test_pshufhw:
   10675 ; ZNVER1-SSE:       # %bb.0:
   10676 ; ZNVER1-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25]
   10677 ; ZNVER1-SSE-NEXT:    pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
   10678 ; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
   10679 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   10680 ;
   10681 ; ZNVER1-LABEL: test_pshufhw:
   10682 ; ZNVER1:       # %bb.0:
   10683 ; ZNVER1-NEXT:    vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
   10684 ; ZNVER1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25]
   10685 ; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   10686 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   10687   %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6>
   10688   %2 = load <8 x i16>, <8 x i16> *%a1, align 16
   10689   %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
   10690   %4 = add <8 x i16> %1, %3
   10691   ret <8 x i16> %4
   10692 }
   10693 
   10694 define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
   10695 ; GENERIC-LABEL: test_pshuflw:
   10696 ; GENERIC:       # %bb.0:
   10697 ; GENERIC-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
   10698 ; GENERIC-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
   10699 ; GENERIC-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10700 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   10701 ;
   10702 ; ATOM-LABEL: test_pshuflw:
   10703 ; ATOM:       # %bb.0:
   10704 ; ATOM-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10705 ; ATOM-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
   10706 ; ATOM-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10707 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10708 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10709 ; ATOM-NEXT:    retq # sched: [79:39.50]
   10710 ;
   10711 ; SLM-LABEL: test_pshuflw:
   10712 ; SLM:       # %bb.0:
   10713 ; SLM-NEXT:    pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [4:1.00]
   10714 ; SLM-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10715 ; SLM-NEXT:    paddw %xmm0, %xmm1 # sched: [1:0.50]
   10716 ; SLM-NEXT:    movdqa %xmm1, %xmm0 # sched: [1:0.50]
   10717 ; SLM-NEXT:    retq # sched: [4:1.00]
   10718 ;
   10719 ; SANDY-SSE-LABEL: test_pshuflw:
   10720 ; SANDY-SSE:       # %bb.0:
   10721 ; SANDY-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
   10722 ; SANDY-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
   10723 ; SANDY-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10724 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   10725 ;
   10726 ; SANDY-LABEL: test_pshuflw:
   10727 ; SANDY:       # %bb.0:
   10728 ; SANDY-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
   10729 ; SANDY-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
   10730 ; SANDY-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10731 ; SANDY-NEXT:    retq # sched: [1:1.00]
   10732 ;
   10733 ; HASWELL-SSE-LABEL: test_pshuflw:
   10734 ; HASWELL-SSE:       # %bb.0:
   10735 ; HASWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10736 ; HASWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
   10737 ; HASWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10738 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10739 ;
   10740 ; HASWELL-LABEL: test_pshuflw:
   10741 ; HASWELL:       # %bb.0:
   10742 ; HASWELL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10743 ; HASWELL-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
   10744 ; HASWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10745 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   10746 ;
   10747 ; BROADWELL-SSE-LABEL: test_pshuflw:
   10748 ; BROADWELL-SSE:       # %bb.0:
   10749 ; BROADWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10750 ; BROADWELL-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
   10751 ; BROADWELL-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10752 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10753 ;
   10754 ; BROADWELL-LABEL: test_pshuflw:
   10755 ; BROADWELL:       # %bb.0:
   10756 ; BROADWELL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10757 ; BROADWELL-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
   10758 ; BROADWELL-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10759 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   10760 ;
   10761 ; SKYLAKE-SSE-LABEL: test_pshuflw:
   10762 ; SKYLAKE-SSE:       # %bb.0:
   10763 ; SKYLAKE-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10764 ; SKYLAKE-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
   10765 ; SKYLAKE-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   10766 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   10767 ;
   10768 ; SKYLAKE-LABEL: test_pshuflw:
   10769 ; SKYLAKE:       # %bb.0:
   10770 ; SKYLAKE-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10771 ; SKYLAKE-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
   10772 ; SKYLAKE-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10773 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   10774 ;
   10775 ; SKX-SSE-LABEL: test_pshuflw:
   10776 ; SKX-SSE:       # %bb.0:
   10777 ; SKX-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10778 ; SKX-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
   10779 ; SKX-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.33]
   10780 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   10781 ;
   10782 ; SKX-LABEL: test_pshuflw:
   10783 ; SKX:       # %bb.0:
   10784 ; SKX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
   10785 ; SKX-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
   10786 ; SKX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   10787 ; SKX-NEXT:    retq # sched: [7:1.00]
   10788 ;
   10789 ; BTVER2-SSE-LABEL: test_pshuflw:
   10790 ; BTVER2-SSE:       # %bb.0:
   10791 ; BTVER2-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
   10792 ; BTVER2-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
   10793 ; BTVER2-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.50]
   10794 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   10795 ;
   10796 ; BTVER2-LABEL: test_pshuflw:
   10797 ; BTVER2:       # %bb.0:
   10798 ; BTVER2-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
   10799 ; BTVER2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
   10800 ; BTVER2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10801 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   10802 ;
   10803 ; ZNVER1-SSE-LABEL: test_pshuflw:
   10804 ; ZNVER1-SSE:       # %bb.0:
   10805 ; ZNVER1-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25]
   10806 ; ZNVER1-SSE-NEXT:    pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
   10807 ; ZNVER1-SSE-NEXT:    paddw %xmm1, %xmm0 # sched: [1:0.25]
   10808 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   10809 ;
   10810 ; ZNVER1-LABEL: test_pshuflw:
   10811 ; ZNVER1:       # %bb.0:
   10812 ; ZNVER1-NEXT:    vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
   10813 ; ZNVER1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25]
   10814 ; ZNVER1-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   10815 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   10816   %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   10817   %2 = load <8 x i16>, <8 x i16> *%a1, align 16
   10818   %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   10819   %4 = add <8 x i16> %1, %3
   10820   ret <8 x i16> %4
   10821 }
   10822 
   10823 define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   10824 ; GENERIC-LABEL: test_pslld:
   10825 ; GENERIC:       # %bb.0:
   10826 ; GENERIC-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
   10827 ; GENERIC-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
   10828 ; GENERIC-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
   10829 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   10830 ;
   10831 ; ATOM-LABEL: test_pslld:
   10832 ; ATOM:       # %bb.0:
   10833 ; ATOM-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
   10834 ; ATOM-NEXT:    pslld (%rdi), %xmm0 # sched: [3:1.50]
   10835 ; ATOM-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
   10836 ; ATOM-NEXT:    retq # sched: [79:39.50]
   10837 ;
   10838 ; SLM-LABEL: test_pslld:
   10839 ; SLM:       # %bb.0:
   10840 ; SLM-NEXT:    pslld %xmm1, %xmm0 # sched: [1:1.00]
   10841 ; SLM-NEXT:    pslld (%rdi), %xmm0 # sched: [4:1.00]
   10842 ; SLM-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
   10843 ; SLM-NEXT:    retq # sched: [4:1.00]
   10844 ;
   10845 ; SANDY-SSE-LABEL: test_pslld:
   10846 ; SANDY-SSE:       # %bb.0:
   10847 ; SANDY-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
   10848 ; SANDY-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
   10849 ; SANDY-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
   10850 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   10851 ;
   10852 ; SANDY-LABEL: test_pslld:
   10853 ; SANDY:       # %bb.0:
   10854 ; SANDY-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   10855 ; SANDY-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   10856 ; SANDY-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
   10857 ; SANDY-NEXT:    retq # sched: [1:1.00]
   10858 ;
   10859 ; HASWELL-SSE-LABEL: test_pslld:
   10860 ; HASWELL-SSE:       # %bb.0:
   10861 ; HASWELL-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
   10862 ; HASWELL-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
   10863 ; HASWELL-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
   10864 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10865 ;
   10866 ; HASWELL-LABEL: test_pslld:
   10867 ; HASWELL:       # %bb.0:
   10868 ; HASWELL-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   10869 ; HASWELL-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   10870 ; HASWELL-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
   10871 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   10872 ;
   10873 ; BROADWELL-SSE-LABEL: test_pslld:
   10874 ; BROADWELL-SSE:       # %bb.0:
   10875 ; BROADWELL-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
   10876 ; BROADWELL-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:1.00]
   10877 ; BROADWELL-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:1.00]
   10878 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10879 ;
   10880 ; BROADWELL-LABEL: test_pslld:
   10881 ; BROADWELL:       # %bb.0:
   10882 ; BROADWELL-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   10883 ; BROADWELL-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   10884 ; BROADWELL-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
   10885 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   10886 ;
   10887 ; SKYLAKE-SSE-LABEL: test_pslld:
   10888 ; SKYLAKE-SSE:       # %bb.0:
   10889 ; SKYLAKE-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
   10890 ; SKYLAKE-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:0.50]
   10891 ; SKYLAKE-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
   10892 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   10893 ;
   10894 ; SKYLAKE-LABEL: test_pslld:
   10895 ; SKYLAKE:       # %bb.0:
   10896 ; SKYLAKE-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   10897 ; SKYLAKE-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   10898 ; SKYLAKE-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
   10899 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   10900 ;
   10901 ; SKX-SSE-LABEL: test_pslld:
   10902 ; SKX-SSE:       # %bb.0:
   10903 ; SKX-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [2:1.00]
   10904 ; SKX-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [7:0.50]
   10905 ; SKX-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
   10906 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   10907 ;
   10908 ; SKX-LABEL: test_pslld:
   10909 ; SKX:       # %bb.0:
   10910 ; SKX-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   10911 ; SKX-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   10912 ; SKX-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
   10913 ; SKX-NEXT:    retq # sched: [7:1.00]
   10914 ;
   10915 ; BTVER2-SSE-LABEL: test_pslld:
   10916 ; BTVER2-SSE:       # %bb.0:
   10917 ; BTVER2-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [1:0.50]
   10918 ; BTVER2-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [6:1.00]
   10919 ; BTVER2-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.50]
   10920 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   10921 ;
   10922 ; BTVER2-LABEL: test_pslld:
   10923 ; BTVER2:       # %bb.0:
   10924 ; BTVER2-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   10925 ; BTVER2-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   10926 ; BTVER2-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
   10927 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   10928 ;
   10929 ; ZNVER1-SSE-LABEL: test_pslld:
   10930 ; ZNVER1-SSE:       # %bb.0:
   10931 ; ZNVER1-SSE-NEXT:    pslld %xmm1, %xmm0 # sched: [1:1.00]
   10932 ; ZNVER1-SSE-NEXT:    pslld (%rdi), %xmm0 # sched: [8:1.00]
   10933 ; ZNVER1-SSE-NEXT:    pslld $2, %xmm0 # sched: [1:0.25]
   10934 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   10935 ;
   10936 ; ZNVER1-LABEL: test_pslld:
   10937 ; ZNVER1:       # %bb.0:
   10938 ; ZNVER1-NEXT:    vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   10939 ; ZNVER1-NEXT:    vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   10940 ; ZNVER1-NEXT:    vpslld $2, %xmm0, %xmm0 # sched: [1:0.25]
   10941 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   10942   %1 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
   10943   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   10944   %3 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %1, <4 x i32> %2)
   10945   %4 = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %3, i32 2)
   10946   ret <4 x i32> %4
   10947 }
   10948 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
   10949 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
   10950 
   10951 define <4 x i32> @test_pslldq(<4 x i32> %a0) {
   10952 ; GENERIC-LABEL: test_pslldq:
   10953 ; GENERIC:       # %bb.0:
   10954 ; GENERIC-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
   10955 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   10956 ;
   10957 ; ATOM-LABEL: test_pslldq:
   10958 ; ATOM:       # %bb.0:
   10959 ; ATOM-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
   10960 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10961 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10962 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10963 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10964 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10965 ; ATOM-NEXT:    nop # sched: [1:0.50]
   10966 ; ATOM-NEXT:    retq # sched: [79:39.50]
   10967 ;
   10968 ; SLM-LABEL: test_pslldq:
   10969 ; SLM:       # %bb.0:
   10970 ; SLM-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   10971 ; SLM-NEXT:    retq # sched: [4:1.00]
   10972 ;
   10973 ; SANDY-SSE-LABEL: test_pslldq:
   10974 ; SANDY-SSE:       # %bb.0:
   10975 ; SANDY-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
   10976 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   10977 ;
   10978 ; SANDY-LABEL: test_pslldq:
   10979 ; SANDY:       # %bb.0:
   10980 ; SANDY-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
   10981 ; SANDY-NEXT:    retq # sched: [1:1.00]
   10982 ;
   10983 ; HASWELL-SSE-LABEL: test_pslldq:
   10984 ; HASWELL-SSE:       # %bb.0:
   10985 ; HASWELL-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   10986 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10987 ;
   10988 ; HASWELL-LABEL: test_pslldq:
   10989 ; HASWELL:       # %bb.0:
   10990 ; HASWELL-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   10991 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   10992 ;
   10993 ; BROADWELL-SSE-LABEL: test_pslldq:
   10994 ; BROADWELL-SSE:       # %bb.0:
   10995 ; BROADWELL-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   10996 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   10997 ;
   10998 ; BROADWELL-LABEL: test_pslldq:
   10999 ; BROADWELL:       # %bb.0:
   11000 ; BROADWELL-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   11001 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   11002 ;
   11003 ; SKYLAKE-SSE-LABEL: test_pslldq:
   11004 ; SKYLAKE-SSE:       # %bb.0:
   11005 ; SKYLAKE-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   11006 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   11007 ;
   11008 ; SKYLAKE-LABEL: test_pslldq:
   11009 ; SKYLAKE:       # %bb.0:
   11010 ; SKYLAKE-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   11011 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   11012 ;
   11013 ; SKX-SSE-LABEL: test_pslldq:
   11014 ; SKX-SSE:       # %bb.0:
   11015 ; SKX-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   11016 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   11017 ;
   11018 ; SKX-LABEL: test_pslldq:
   11019 ; SKX:       # %bb.0:
   11020 ; SKX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   11021 ; SKX-NEXT:    retq # sched: [7:1.00]
   11022 ;
   11023 ; BTVER2-SSE-LABEL: test_pslldq:
   11024 ; BTVER2-SSE:       # %bb.0:
   11025 ; BTVER2-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
   11026 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   11027 ;
   11028 ; BTVER2-LABEL: test_pslldq:
   11029 ; BTVER2:       # %bb.0:
   11030 ; BTVER2-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
   11031 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   11032 ;
   11033 ; ZNVER1-SSE-LABEL: test_pslldq:
   11034 ; ZNVER1-SSE:       # %bb.0:
   11035 ; ZNVER1-SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   11036 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   11037 ;
   11038 ; ZNVER1-LABEL: test_pslldq:
   11039 ; ZNVER1:       # %bb.0:
   11040 ; ZNVER1-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
   11041 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   11042   %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
   11043   ret <4 x i32> %1
   11044 }
   11045 
   11046 define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   11047 ; GENERIC-LABEL: test_psllq:
   11048 ; GENERIC:       # %bb.0:
   11049 ; GENERIC-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
   11050 ; GENERIC-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
   11051 ; GENERIC-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
   11052 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   11053 ;
   11054 ; ATOM-LABEL: test_psllq:
   11055 ; ATOM:       # %bb.0:
   11056 ; ATOM-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
   11057 ; ATOM-NEXT:    psllq (%rdi), %xmm0 # sched: [3:1.50]
   11058 ; ATOM-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
   11059 ; ATOM-NEXT:    retq # sched: [79:39.50]
   11060 ;
   11061 ; SLM-LABEL: test_psllq:
   11062 ; SLM:       # %bb.0:
   11063 ; SLM-NEXT:    psllq %xmm1, %xmm0 # sched: [1:1.00]
   11064 ; SLM-NEXT:    psllq (%rdi), %xmm0 # sched: [4:1.00]
   11065 ; SLM-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
   11066 ; SLM-NEXT:    retq # sched: [4:1.00]
   11067 ;
   11068 ; SANDY-SSE-LABEL: test_psllq:
   11069 ; SANDY-SSE:       # %bb.0:
   11070 ; SANDY-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
   11071 ; SANDY-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
   11072 ; SANDY-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
   11073 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   11074 ;
   11075 ; SANDY-LABEL: test_psllq:
   11076 ; SANDY:       # %bb.0:
   11077 ; SANDY-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11078 ; SANDY-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11079 ; SANDY-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
   11080 ; SANDY-NEXT:    retq # sched: [1:1.00]
   11081 ;
   11082 ; HASWELL-SSE-LABEL: test_psllq:
   11083 ; HASWELL-SSE:       # %bb.0:
   11084 ; HASWELL-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
   11085 ; HASWELL-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
   11086 ; HASWELL-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
   11087 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11088 ;
   11089 ; HASWELL-LABEL: test_psllq:
   11090 ; HASWELL:       # %bb.0:
   11091 ; HASWELL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11092 ; HASWELL-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11093 ; HASWELL-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
   11094 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   11095 ;
   11096 ; BROADWELL-SSE-LABEL: test_psllq:
   11097 ; BROADWELL-SSE:       # %bb.0:
   11098 ; BROADWELL-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
   11099 ; BROADWELL-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:1.00]
   11100 ; BROADWELL-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:1.00]
   11101 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11102 ;
   11103 ; BROADWELL-LABEL: test_psllq:
   11104 ; BROADWELL:       # %bb.0:
   11105 ; BROADWELL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11106 ; BROADWELL-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   11107 ; BROADWELL-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
   11108 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   11109 ;
   11110 ; SKYLAKE-SSE-LABEL: test_psllq:
   11111 ; SKYLAKE-SSE:       # %bb.0:
   11112 ; SKYLAKE-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
   11113 ; SKYLAKE-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:0.50]
   11114 ; SKYLAKE-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
   11115 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   11116 ;
   11117 ; SKYLAKE-LABEL: test_psllq:
   11118 ; SKYLAKE:       # %bb.0:
   11119 ; SKYLAKE-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11120 ; SKYLAKE-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11121 ; SKYLAKE-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
   11122 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   11123 ;
   11124 ; SKX-SSE-LABEL: test_psllq:
   11125 ; SKX-SSE:       # %bb.0:
   11126 ; SKX-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [2:1.00]
   11127 ; SKX-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [7:0.50]
   11128 ; SKX-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
   11129 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   11130 ;
   11131 ; SKX-LABEL: test_psllq:
   11132 ; SKX:       # %bb.0:
   11133 ; SKX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11134 ; SKX-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11135 ; SKX-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
   11136 ; SKX-NEXT:    retq # sched: [7:1.00]
   11137 ;
   11138 ; BTVER2-SSE-LABEL: test_psllq:
   11139 ; BTVER2-SSE:       # %bb.0:
   11140 ; BTVER2-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [1:0.50]
   11141 ; BTVER2-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [6:1.00]
   11142 ; BTVER2-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.50]
   11143 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   11144 ;
   11145 ; BTVER2-LABEL: test_psllq:
   11146 ; BTVER2:       # %bb.0:
   11147 ; BTVER2-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   11148 ; BTVER2-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   11149 ; BTVER2-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
   11150 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   11151 ;
   11152 ; ZNVER1-SSE-LABEL: test_psllq:
   11153 ; ZNVER1-SSE:       # %bb.0:
   11154 ; ZNVER1-SSE-NEXT:    psllq %xmm1, %xmm0 # sched: [1:1.00]
   11155 ; ZNVER1-SSE-NEXT:    psllq (%rdi), %xmm0 # sched: [8:1.00]
   11156 ; ZNVER1-SSE-NEXT:    psllq $2, %xmm0 # sched: [1:0.25]
   11157 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   11158 ;
   11159 ; ZNVER1-LABEL: test_psllq:
   11160 ; ZNVER1:       # %bb.0:
   11161 ; ZNVER1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   11162 ; ZNVER1-NEXT:    vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11163 ; ZNVER1-NEXT:    vpsllq $2, %xmm0, %xmm0 # sched: [1:0.25]
   11164 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   11165   %1 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
   11166   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   11167   %3 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %1, <2 x i64> %2)
   11168   %4 = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %3, i32 2)
   11169   ret <2 x i64> %4
   11170 }
   11171 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
   11172 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
   11173 
   11174 define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   11175 ; GENERIC-LABEL: test_psllw:
   11176 ; GENERIC:       # %bb.0:
   11177 ; GENERIC-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
   11178 ; GENERIC-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
   11179 ; GENERIC-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
   11180 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   11181 ;
   11182 ; ATOM-LABEL: test_psllw:
   11183 ; ATOM:       # %bb.0:
   11184 ; ATOM-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
   11185 ; ATOM-NEXT:    psllw (%rdi), %xmm0 # sched: [3:1.50]
   11186 ; ATOM-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
   11187 ; ATOM-NEXT:    retq # sched: [79:39.50]
   11188 ;
   11189 ; SLM-LABEL: test_psllw:
   11190 ; SLM:       # %bb.0:
   11191 ; SLM-NEXT:    psllw %xmm1, %xmm0 # sched: [1:1.00]
   11192 ; SLM-NEXT:    psllw (%rdi), %xmm0 # sched: [4:1.00]
   11193 ; SLM-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
   11194 ; SLM-NEXT:    retq # sched: [4:1.00]
   11195 ;
   11196 ; SANDY-SSE-LABEL: test_psllw:
   11197 ; SANDY-SSE:       # %bb.0:
   11198 ; SANDY-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
   11199 ; SANDY-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
   11200 ; SANDY-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
   11201 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   11202 ;
   11203 ; SANDY-LABEL: test_psllw:
   11204 ; SANDY:       # %bb.0:
   11205 ; SANDY-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11206 ; SANDY-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11207 ; SANDY-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
   11208 ; SANDY-NEXT:    retq # sched: [1:1.00]
   11209 ;
   11210 ; HASWELL-SSE-LABEL: test_psllw:
   11211 ; HASWELL-SSE:       # %bb.0:
   11212 ; HASWELL-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
   11213 ; HASWELL-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
   11214 ; HASWELL-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
   11215 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11216 ;
   11217 ; HASWELL-LABEL: test_psllw:
   11218 ; HASWELL:       # %bb.0:
   11219 ; HASWELL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11220 ; HASWELL-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11221 ; HASWELL-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
   11222 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   11223 ;
   11224 ; BROADWELL-SSE-LABEL: test_psllw:
   11225 ; BROADWELL-SSE:       # %bb.0:
   11226 ; BROADWELL-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
   11227 ; BROADWELL-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:1.00]
   11228 ; BROADWELL-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:1.00]
   11229 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11230 ;
   11231 ; BROADWELL-LABEL: test_psllw:
   11232 ; BROADWELL:       # %bb.0:
   11233 ; BROADWELL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11234 ; BROADWELL-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   11235 ; BROADWELL-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
   11236 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   11237 ;
   11238 ; SKYLAKE-SSE-LABEL: test_psllw:
   11239 ; SKYLAKE-SSE:       # %bb.0:
   11240 ; SKYLAKE-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
   11241 ; SKYLAKE-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:0.50]
   11242 ; SKYLAKE-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
   11243 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   11244 ;
   11245 ; SKYLAKE-LABEL: test_psllw:
   11246 ; SKYLAKE:       # %bb.0:
   11247 ; SKYLAKE-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11248 ; SKYLAKE-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11249 ; SKYLAKE-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
   11250 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   11251 ;
   11252 ; SKX-SSE-LABEL: test_psllw:
   11253 ; SKX-SSE:       # %bb.0:
   11254 ; SKX-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [2:1.00]
   11255 ; SKX-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [7:0.50]
   11256 ; SKX-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
   11257 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   11258 ;
   11259 ; SKX-LABEL: test_psllw:
   11260 ; SKX:       # %bb.0:
   11261 ; SKX-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11262 ; SKX-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11263 ; SKX-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
   11264 ; SKX-NEXT:    retq # sched: [7:1.00]
   11265 ;
   11266 ; BTVER2-SSE-LABEL: test_psllw:
   11267 ; BTVER2-SSE:       # %bb.0:
   11268 ; BTVER2-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [1:0.50]
   11269 ; BTVER2-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [6:1.00]
   11270 ; BTVER2-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.50]
   11271 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   11272 ;
   11273 ; BTVER2-LABEL: test_psllw:
   11274 ; BTVER2:       # %bb.0:
   11275 ; BTVER2-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   11276 ; BTVER2-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   11277 ; BTVER2-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
   11278 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   11279 ;
   11280 ; ZNVER1-SSE-LABEL: test_psllw:
   11281 ; ZNVER1-SSE:       # %bb.0:
   11282 ; ZNVER1-SSE-NEXT:    psllw %xmm1, %xmm0 # sched: [1:1.00]
   11283 ; ZNVER1-SSE-NEXT:    psllw (%rdi), %xmm0 # sched: [8:1.00]
   11284 ; ZNVER1-SSE-NEXT:    psllw $2, %xmm0 # sched: [1:0.25]
   11285 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   11286 ;
   11287 ; ZNVER1-LABEL: test_psllw:
   11288 ; ZNVER1:       # %bb.0:
   11289 ; ZNVER1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   11290 ; ZNVER1-NEXT:    vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11291 ; ZNVER1-NEXT:    vpsllw $2, %xmm0, %xmm0 # sched: [1:0.25]
   11292 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   11293   %1 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
   11294   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   11295   %3 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %1, <8 x i16> %2)
   11296   %4 = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %3, i32 2)
   11297   ret <8 x i16> %4
   11298 }
   11299 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
   11300 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
   11301 
   11302 define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   11303 ; GENERIC-LABEL: test_psrad:
   11304 ; GENERIC:       # %bb.0:
   11305 ; GENERIC-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
   11306 ; GENERIC-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
   11307 ; GENERIC-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
   11308 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   11309 ;
   11310 ; ATOM-LABEL: test_psrad:
   11311 ; ATOM:       # %bb.0:
   11312 ; ATOM-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
   11313 ; ATOM-NEXT:    psrad (%rdi), %xmm0 # sched: [3:1.50]
   11314 ; ATOM-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
   11315 ; ATOM-NEXT:    retq # sched: [79:39.50]
   11316 ;
   11317 ; SLM-LABEL: test_psrad:
   11318 ; SLM:       # %bb.0:
   11319 ; SLM-NEXT:    psrad %xmm1, %xmm0 # sched: [1:1.00]
   11320 ; SLM-NEXT:    psrad (%rdi), %xmm0 # sched: [4:1.00]
   11321 ; SLM-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
   11322 ; SLM-NEXT:    retq # sched: [4:1.00]
   11323 ;
   11324 ; SANDY-SSE-LABEL: test_psrad:
   11325 ; SANDY-SSE:       # %bb.0:
   11326 ; SANDY-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
   11327 ; SANDY-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
   11328 ; SANDY-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
   11329 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   11330 ;
   11331 ; SANDY-LABEL: test_psrad:
   11332 ; SANDY:       # %bb.0:
   11333 ; SANDY-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11334 ; SANDY-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11335 ; SANDY-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
   11336 ; SANDY-NEXT:    retq # sched: [1:1.00]
   11337 ;
   11338 ; HASWELL-SSE-LABEL: test_psrad:
   11339 ; HASWELL-SSE:       # %bb.0:
   11340 ; HASWELL-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
   11341 ; HASWELL-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
   11342 ; HASWELL-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
   11343 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11344 ;
   11345 ; HASWELL-LABEL: test_psrad:
   11346 ; HASWELL:       # %bb.0:
   11347 ; HASWELL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11348 ; HASWELL-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11349 ; HASWELL-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
   11350 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   11351 ;
   11352 ; BROADWELL-SSE-LABEL: test_psrad:
   11353 ; BROADWELL-SSE:       # %bb.0:
   11354 ; BROADWELL-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
   11355 ; BROADWELL-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:1.00]
   11356 ; BROADWELL-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:1.00]
   11357 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11358 ;
   11359 ; BROADWELL-LABEL: test_psrad:
   11360 ; BROADWELL:       # %bb.0:
   11361 ; BROADWELL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11362 ; BROADWELL-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   11363 ; BROADWELL-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
   11364 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   11365 ;
   11366 ; SKYLAKE-SSE-LABEL: test_psrad:
   11367 ; SKYLAKE-SSE:       # %bb.0:
   11368 ; SKYLAKE-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
   11369 ; SKYLAKE-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:0.50]
   11370 ; SKYLAKE-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
   11371 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   11372 ;
   11373 ; SKYLAKE-LABEL: test_psrad:
   11374 ; SKYLAKE:       # %bb.0:
   11375 ; SKYLAKE-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11376 ; SKYLAKE-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11377 ; SKYLAKE-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
   11378 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   11379 ;
   11380 ; SKX-SSE-LABEL: test_psrad:
   11381 ; SKX-SSE:       # %bb.0:
   11382 ; SKX-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [2:1.00]
   11383 ; SKX-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [7:0.50]
   11384 ; SKX-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
   11385 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   11386 ;
   11387 ; SKX-LABEL: test_psrad:
   11388 ; SKX:       # %bb.0:
   11389 ; SKX-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11390 ; SKX-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11391 ; SKX-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
   11392 ; SKX-NEXT:    retq # sched: [7:1.00]
   11393 ;
   11394 ; BTVER2-SSE-LABEL: test_psrad:
   11395 ; BTVER2-SSE:       # %bb.0:
   11396 ; BTVER2-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [1:0.50]
   11397 ; BTVER2-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [6:1.00]
   11398 ; BTVER2-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.50]
   11399 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   11400 ;
   11401 ; BTVER2-LABEL: test_psrad:
   11402 ; BTVER2:       # %bb.0:
   11403 ; BTVER2-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   11404 ; BTVER2-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   11405 ; BTVER2-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
   11406 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   11407 ;
   11408 ; ZNVER1-SSE-LABEL: test_psrad:
   11409 ; ZNVER1-SSE:       # %bb.0:
   11410 ; ZNVER1-SSE-NEXT:    psrad %xmm1, %xmm0 # sched: [1:1.00]
   11411 ; ZNVER1-SSE-NEXT:    psrad (%rdi), %xmm0 # sched: [8:1.00]
   11412 ; ZNVER1-SSE-NEXT:    psrad $2, %xmm0 # sched: [1:0.25]
   11413 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   11414 ;
   11415 ; ZNVER1-LABEL: test_psrad:
   11416 ; ZNVER1:       # %bb.0:
   11417 ; ZNVER1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   11418 ; ZNVER1-NEXT:    vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11419 ; ZNVER1-NEXT:    vpsrad $2, %xmm0, %xmm0 # sched: [1:0.25]
   11420 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   11421   %1 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
   11422   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   11423   %3 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> %2)
   11424   %4 = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
   11425   ret <4 x i32> %4
   11426 }
   11427 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
   11428 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
   11429 
   11430 define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   11431 ; GENERIC-LABEL: test_psraw:
   11432 ; GENERIC:       # %bb.0:
   11433 ; GENERIC-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
   11434 ; GENERIC-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
   11435 ; GENERIC-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
   11436 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   11437 ;
   11438 ; ATOM-LABEL: test_psraw:
   11439 ; ATOM:       # %bb.0:
   11440 ; ATOM-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
   11441 ; ATOM-NEXT:    psraw (%rdi), %xmm0 # sched: [3:1.50]
   11442 ; ATOM-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
   11443 ; ATOM-NEXT:    retq # sched: [79:39.50]
   11444 ;
   11445 ; SLM-LABEL: test_psraw:
   11446 ; SLM:       # %bb.0:
   11447 ; SLM-NEXT:    psraw %xmm1, %xmm0 # sched: [1:1.00]
   11448 ; SLM-NEXT:    psraw (%rdi), %xmm0 # sched: [4:1.00]
   11449 ; SLM-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
   11450 ; SLM-NEXT:    retq # sched: [4:1.00]
   11451 ;
   11452 ; SANDY-SSE-LABEL: test_psraw:
   11453 ; SANDY-SSE:       # %bb.0:
   11454 ; SANDY-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
   11455 ; SANDY-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
   11456 ; SANDY-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
   11457 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   11458 ;
   11459 ; SANDY-LABEL: test_psraw:
   11460 ; SANDY:       # %bb.0:
   11461 ; SANDY-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11462 ; SANDY-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11463 ; SANDY-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
   11464 ; SANDY-NEXT:    retq # sched: [1:1.00]
   11465 ;
   11466 ; HASWELL-SSE-LABEL: test_psraw:
   11467 ; HASWELL-SSE:       # %bb.0:
   11468 ; HASWELL-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
   11469 ; HASWELL-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
   11470 ; HASWELL-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
   11471 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11472 ;
   11473 ; HASWELL-LABEL: test_psraw:
   11474 ; HASWELL:       # %bb.0:
   11475 ; HASWELL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11476 ; HASWELL-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11477 ; HASWELL-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
   11478 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   11479 ;
   11480 ; BROADWELL-SSE-LABEL: test_psraw:
   11481 ; BROADWELL-SSE:       # %bb.0:
   11482 ; BROADWELL-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
   11483 ; BROADWELL-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:1.00]
   11484 ; BROADWELL-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:1.00]
   11485 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11486 ;
   11487 ; BROADWELL-LABEL: test_psraw:
   11488 ; BROADWELL:       # %bb.0:
   11489 ; BROADWELL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11490 ; BROADWELL-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   11491 ; BROADWELL-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
   11492 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   11493 ;
   11494 ; SKYLAKE-SSE-LABEL: test_psraw:
   11495 ; SKYLAKE-SSE:       # %bb.0:
   11496 ; SKYLAKE-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
   11497 ; SKYLAKE-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:0.50]
   11498 ; SKYLAKE-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
   11499 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   11500 ;
   11501 ; SKYLAKE-LABEL: test_psraw:
   11502 ; SKYLAKE:       # %bb.0:
   11503 ; SKYLAKE-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11504 ; SKYLAKE-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11505 ; SKYLAKE-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
   11506 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   11507 ;
   11508 ; SKX-SSE-LABEL: test_psraw:
   11509 ; SKX-SSE:       # %bb.0:
   11510 ; SKX-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [2:1.00]
   11511 ; SKX-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [7:0.50]
   11512 ; SKX-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
   11513 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   11514 ;
   11515 ; SKX-LABEL: test_psraw:
   11516 ; SKX:       # %bb.0:
   11517 ; SKX-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11518 ; SKX-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11519 ; SKX-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
   11520 ; SKX-NEXT:    retq # sched: [7:1.00]
   11521 ;
   11522 ; BTVER2-SSE-LABEL: test_psraw:
   11523 ; BTVER2-SSE:       # %bb.0:
   11524 ; BTVER2-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [1:0.50]
   11525 ; BTVER2-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [6:1.00]
   11526 ; BTVER2-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.50]
   11527 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   11528 ;
   11529 ; BTVER2-LABEL: test_psraw:
   11530 ; BTVER2:       # %bb.0:
   11531 ; BTVER2-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   11532 ; BTVER2-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   11533 ; BTVER2-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
   11534 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   11535 ;
   11536 ; ZNVER1-SSE-LABEL: test_psraw:
   11537 ; ZNVER1-SSE:       # %bb.0:
   11538 ; ZNVER1-SSE-NEXT:    psraw %xmm1, %xmm0 # sched: [1:1.00]
   11539 ; ZNVER1-SSE-NEXT:    psraw (%rdi), %xmm0 # sched: [8:1.00]
   11540 ; ZNVER1-SSE-NEXT:    psraw $2, %xmm0 # sched: [1:0.25]
   11541 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   11542 ;
   11543 ; ZNVER1-LABEL: test_psraw:
   11544 ; ZNVER1:       # %bb.0:
   11545 ; ZNVER1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   11546 ; ZNVER1-NEXT:    vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11547 ; ZNVER1-NEXT:    vpsraw $2, %xmm0, %xmm0 # sched: [1:0.25]
   11548 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   11549   %1 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
   11550   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   11551   %3 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> %2)
   11552   %4 = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
   11553   ret <8 x i16> %4
   11554 }
   11555 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
   11556 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
   11557 
   11558 define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   11559 ; GENERIC-LABEL: test_psrld:
   11560 ; GENERIC:       # %bb.0:
   11561 ; GENERIC-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
   11562 ; GENERIC-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
   11563 ; GENERIC-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
   11564 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   11565 ;
   11566 ; ATOM-LABEL: test_psrld:
   11567 ; ATOM:       # %bb.0:
   11568 ; ATOM-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
   11569 ; ATOM-NEXT:    psrld (%rdi), %xmm0 # sched: [3:1.50]
   11570 ; ATOM-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
   11571 ; ATOM-NEXT:    retq # sched: [79:39.50]
   11572 ;
   11573 ; SLM-LABEL: test_psrld:
   11574 ; SLM:       # %bb.0:
   11575 ; SLM-NEXT:    psrld %xmm1, %xmm0 # sched: [1:1.00]
   11576 ; SLM-NEXT:    psrld (%rdi), %xmm0 # sched: [4:1.00]
   11577 ; SLM-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
   11578 ; SLM-NEXT:    retq # sched: [4:1.00]
   11579 ;
   11580 ; SANDY-SSE-LABEL: test_psrld:
   11581 ; SANDY-SSE:       # %bb.0:
   11582 ; SANDY-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
   11583 ; SANDY-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
   11584 ; SANDY-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
   11585 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   11586 ;
   11587 ; SANDY-LABEL: test_psrld:
   11588 ; SANDY:       # %bb.0:
   11589 ; SANDY-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11590 ; SANDY-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11591 ; SANDY-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
   11592 ; SANDY-NEXT:    retq # sched: [1:1.00]
   11593 ;
   11594 ; HASWELL-SSE-LABEL: test_psrld:
   11595 ; HASWELL-SSE:       # %bb.0:
   11596 ; HASWELL-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
   11597 ; HASWELL-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
   11598 ; HASWELL-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
   11599 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11600 ;
   11601 ; HASWELL-LABEL: test_psrld:
   11602 ; HASWELL:       # %bb.0:
   11603 ; HASWELL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11604 ; HASWELL-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11605 ; HASWELL-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
   11606 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   11607 ;
   11608 ; BROADWELL-SSE-LABEL: test_psrld:
   11609 ; BROADWELL-SSE:       # %bb.0:
   11610 ; BROADWELL-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
   11611 ; BROADWELL-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:1.00]
   11612 ; BROADWELL-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:1.00]
   11613 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11614 ;
   11615 ; BROADWELL-LABEL: test_psrld:
   11616 ; BROADWELL:       # %bb.0:
   11617 ; BROADWELL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11618 ; BROADWELL-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   11619 ; BROADWELL-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
   11620 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   11621 ;
   11622 ; SKYLAKE-SSE-LABEL: test_psrld:
   11623 ; SKYLAKE-SSE:       # %bb.0:
   11624 ; SKYLAKE-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
   11625 ; SKYLAKE-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:0.50]
   11626 ; SKYLAKE-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
   11627 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   11628 ;
   11629 ; SKYLAKE-LABEL: test_psrld:
   11630 ; SKYLAKE:       # %bb.0:
   11631 ; SKYLAKE-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11632 ; SKYLAKE-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11633 ; SKYLAKE-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
   11634 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   11635 ;
   11636 ; SKX-SSE-LABEL: test_psrld:
   11637 ; SKX-SSE:       # %bb.0:
   11638 ; SKX-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [2:1.00]
   11639 ; SKX-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [7:0.50]
   11640 ; SKX-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
   11641 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   11642 ;
   11643 ; SKX-LABEL: test_psrld:
   11644 ; SKX:       # %bb.0:
   11645 ; SKX-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11646 ; SKX-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11647 ; SKX-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
   11648 ; SKX-NEXT:    retq # sched: [7:1.00]
   11649 ;
   11650 ; BTVER2-SSE-LABEL: test_psrld:
   11651 ; BTVER2-SSE:       # %bb.0:
   11652 ; BTVER2-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [1:0.50]
   11653 ; BTVER2-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [6:1.00]
   11654 ; BTVER2-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.50]
   11655 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   11656 ;
   11657 ; BTVER2-LABEL: test_psrld:
   11658 ; BTVER2:       # %bb.0:
   11659 ; BTVER2-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   11660 ; BTVER2-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   11661 ; BTVER2-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
   11662 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   11663 ;
   11664 ; ZNVER1-SSE-LABEL: test_psrld:
   11665 ; ZNVER1-SSE:       # %bb.0:
   11666 ; ZNVER1-SSE-NEXT:    psrld %xmm1, %xmm0 # sched: [1:1.00]
   11667 ; ZNVER1-SSE-NEXT:    psrld (%rdi), %xmm0 # sched: [8:1.00]
   11668 ; ZNVER1-SSE-NEXT:    psrld $2, %xmm0 # sched: [1:0.25]
   11669 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   11670 ;
   11671 ; ZNVER1-LABEL: test_psrld:
   11672 ; ZNVER1:       # %bb.0:
   11673 ; ZNVER1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   11674 ; ZNVER1-NEXT:    vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11675 ; ZNVER1-NEXT:    vpsrld $2, %xmm0, %xmm0 # sched: [1:0.25]
   11676 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   11677   %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
   11678   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   11679   %3 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %1, <4 x i32> %2)
   11680   %4 = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %3, i32 2)
   11681   ret <4 x i32> %4
   11682 }
   11683 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
   11684 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
   11685 
   11686 define <4 x i32> @test_psrldq(<4 x i32> %a0) {
   11687 ; GENERIC-LABEL: test_psrldq:
   11688 ; GENERIC:       # %bb.0:
   11689 ; GENERIC-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
   11690 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   11691 ;
   11692 ; ATOM-LABEL: test_psrldq:
   11693 ; ATOM:       # %bb.0:
   11694 ; ATOM-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
   11695 ; ATOM-NEXT:    nop # sched: [1:0.50]
   11696 ; ATOM-NEXT:    nop # sched: [1:0.50]
   11697 ; ATOM-NEXT:    nop # sched: [1:0.50]
   11698 ; ATOM-NEXT:    nop # sched: [1:0.50]
   11699 ; ATOM-NEXT:    nop # sched: [1:0.50]
   11700 ; ATOM-NEXT:    nop # sched: [1:0.50]
   11701 ; ATOM-NEXT:    retq # sched: [79:39.50]
   11702 ;
   11703 ; SLM-LABEL: test_psrldq:
   11704 ; SLM:       # %bb.0:
   11705 ; SLM-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11706 ; SLM-NEXT:    retq # sched: [4:1.00]
   11707 ;
   11708 ; SANDY-SSE-LABEL: test_psrldq:
   11709 ; SANDY-SSE:       # %bb.0:
   11710 ; SANDY-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
   11711 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   11712 ;
   11713 ; SANDY-LABEL: test_psrldq:
   11714 ; SANDY:       # %bb.0:
   11715 ; SANDY-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
   11716 ; SANDY-NEXT:    retq # sched: [1:1.00]
   11717 ;
   11718 ; HASWELL-SSE-LABEL: test_psrldq:
   11719 ; HASWELL-SSE:       # %bb.0:
   11720 ; HASWELL-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11721 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11722 ;
   11723 ; HASWELL-LABEL: test_psrldq:
   11724 ; HASWELL:       # %bb.0:
   11725 ; HASWELL-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11726 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   11727 ;
   11728 ; BROADWELL-SSE-LABEL: test_psrldq:
   11729 ; BROADWELL-SSE:       # %bb.0:
   11730 ; BROADWELL-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11731 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11732 ;
   11733 ; BROADWELL-LABEL: test_psrldq:
   11734 ; BROADWELL:       # %bb.0:
   11735 ; BROADWELL-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11736 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   11737 ;
   11738 ; SKYLAKE-SSE-LABEL: test_psrldq:
   11739 ; SKYLAKE-SSE:       # %bb.0:
   11740 ; SKYLAKE-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11741 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   11742 ;
   11743 ; SKYLAKE-LABEL: test_psrldq:
   11744 ; SKYLAKE:       # %bb.0:
   11745 ; SKYLAKE-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11746 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   11747 ;
   11748 ; SKX-SSE-LABEL: test_psrldq:
   11749 ; SKX-SSE:       # %bb.0:
   11750 ; SKX-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11751 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   11752 ;
   11753 ; SKX-LABEL: test_psrldq:
   11754 ; SKX:       # %bb.0:
   11755 ; SKX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11756 ; SKX-NEXT:    retq # sched: [7:1.00]
   11757 ;
   11758 ; BTVER2-SSE-LABEL: test_psrldq:
   11759 ; BTVER2-SSE:       # %bb.0:
   11760 ; BTVER2-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
   11761 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   11762 ;
   11763 ; BTVER2-LABEL: test_psrldq:
   11764 ; BTVER2:       # %bb.0:
   11765 ; BTVER2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
   11766 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   11767 ;
   11768 ; ZNVER1-SSE-LABEL: test_psrldq:
   11769 ; ZNVER1-SSE:       # %bb.0:
   11770 ; ZNVER1-SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11771 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   11772 ;
   11773 ; ZNVER1-LABEL: test_psrldq:
   11774 ; ZNVER1:       # %bb.0:
   11775 ; ZNVER1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
   11776 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   11777   %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   11778   ret <4 x i32> %1
   11779 }
   11780 
   11781 define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   11782 ; GENERIC-LABEL: test_psrlq:
   11783 ; GENERIC:       # %bb.0:
   11784 ; GENERIC-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
   11785 ; GENERIC-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
   11786 ; GENERIC-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
   11787 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   11788 ;
   11789 ; ATOM-LABEL: test_psrlq:
   11790 ; ATOM:       # %bb.0:
   11791 ; ATOM-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
   11792 ; ATOM-NEXT:    psrlq (%rdi), %xmm0 # sched: [3:1.50]
   11793 ; ATOM-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
   11794 ; ATOM-NEXT:    retq # sched: [79:39.50]
   11795 ;
   11796 ; SLM-LABEL: test_psrlq:
   11797 ; SLM:       # %bb.0:
   11798 ; SLM-NEXT:    psrlq %xmm1, %xmm0 # sched: [1:1.00]
   11799 ; SLM-NEXT:    psrlq (%rdi), %xmm0 # sched: [4:1.00]
   11800 ; SLM-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
   11801 ; SLM-NEXT:    retq # sched: [4:1.00]
   11802 ;
   11803 ; SANDY-SSE-LABEL: test_psrlq:
   11804 ; SANDY-SSE:       # %bb.0:
   11805 ; SANDY-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
   11806 ; SANDY-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
   11807 ; SANDY-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
   11808 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   11809 ;
   11810 ; SANDY-LABEL: test_psrlq:
   11811 ; SANDY:       # %bb.0:
   11812 ; SANDY-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11813 ; SANDY-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11814 ; SANDY-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
   11815 ; SANDY-NEXT:    retq # sched: [1:1.00]
   11816 ;
   11817 ; HASWELL-SSE-LABEL: test_psrlq:
   11818 ; HASWELL-SSE:       # %bb.0:
   11819 ; HASWELL-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
   11820 ; HASWELL-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
   11821 ; HASWELL-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
   11822 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11823 ;
   11824 ; HASWELL-LABEL: test_psrlq:
   11825 ; HASWELL:       # %bb.0:
   11826 ; HASWELL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11827 ; HASWELL-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11828 ; HASWELL-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
   11829 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   11830 ;
   11831 ; BROADWELL-SSE-LABEL: test_psrlq:
   11832 ; BROADWELL-SSE:       # %bb.0:
   11833 ; BROADWELL-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
   11834 ; BROADWELL-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:1.00]
   11835 ; BROADWELL-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:1.00]
   11836 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11837 ;
   11838 ; BROADWELL-LABEL: test_psrlq:
   11839 ; BROADWELL:       # %bb.0:
   11840 ; BROADWELL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11841 ; BROADWELL-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   11842 ; BROADWELL-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
   11843 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   11844 ;
   11845 ; SKYLAKE-SSE-LABEL: test_psrlq:
   11846 ; SKYLAKE-SSE:       # %bb.0:
   11847 ; SKYLAKE-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
   11848 ; SKYLAKE-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:0.50]
   11849 ; SKYLAKE-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
   11850 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   11851 ;
   11852 ; SKYLAKE-LABEL: test_psrlq:
   11853 ; SKYLAKE:       # %bb.0:
   11854 ; SKYLAKE-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11855 ; SKYLAKE-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11856 ; SKYLAKE-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
   11857 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   11858 ;
   11859 ; SKX-SSE-LABEL: test_psrlq:
   11860 ; SKX-SSE:       # %bb.0:
   11861 ; SKX-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [2:1.00]
   11862 ; SKX-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [7:0.50]
   11863 ; SKX-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
   11864 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   11865 ;
   11866 ; SKX-LABEL: test_psrlq:
   11867 ; SKX:       # %bb.0:
   11868 ; SKX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11869 ; SKX-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11870 ; SKX-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
   11871 ; SKX-NEXT:    retq # sched: [7:1.00]
   11872 ;
   11873 ; BTVER2-SSE-LABEL: test_psrlq:
   11874 ; BTVER2-SSE:       # %bb.0:
   11875 ; BTVER2-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [1:0.50]
   11876 ; BTVER2-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [6:1.00]
   11877 ; BTVER2-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.50]
   11878 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   11879 ;
   11880 ; BTVER2-LABEL: test_psrlq:
   11881 ; BTVER2:       # %bb.0:
   11882 ; BTVER2-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   11883 ; BTVER2-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   11884 ; BTVER2-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
   11885 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   11886 ;
   11887 ; ZNVER1-SSE-LABEL: test_psrlq:
   11888 ; ZNVER1-SSE:       # %bb.0:
   11889 ; ZNVER1-SSE-NEXT:    psrlq %xmm1, %xmm0 # sched: [1:1.00]
   11890 ; ZNVER1-SSE-NEXT:    psrlq (%rdi), %xmm0 # sched: [8:1.00]
   11891 ; ZNVER1-SSE-NEXT:    psrlq $2, %xmm0 # sched: [1:0.25]
   11892 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   11893 ;
   11894 ; ZNVER1-LABEL: test_psrlq:
   11895 ; ZNVER1:       # %bb.0:
   11896 ; ZNVER1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   11897 ; ZNVER1-NEXT:    vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11898 ; ZNVER1-NEXT:    vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.25]
   11899 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   11900   %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
   11901   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   11902   %3 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %1, <2 x i64> %2)
   11903   %4 = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %3, i32 2)
   11904   ret <2 x i64> %4
   11905 }
   11906 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
   11907 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
   11908 
   11909 define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   11910 ; GENERIC-LABEL: test_psrlw:
   11911 ; GENERIC:       # %bb.0:
   11912 ; GENERIC-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
   11913 ; GENERIC-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
   11914 ; GENERIC-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
   11915 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   11916 ;
   11917 ; ATOM-LABEL: test_psrlw:
   11918 ; ATOM:       # %bb.0:
   11919 ; ATOM-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
   11920 ; ATOM-NEXT:    psrlw (%rdi), %xmm0 # sched: [3:1.50]
   11921 ; ATOM-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
   11922 ; ATOM-NEXT:    retq # sched: [79:39.50]
   11923 ;
   11924 ; SLM-LABEL: test_psrlw:
   11925 ; SLM:       # %bb.0:
   11926 ; SLM-NEXT:    psrlw %xmm1, %xmm0 # sched: [1:1.00]
   11927 ; SLM-NEXT:    psrlw (%rdi), %xmm0 # sched: [4:1.00]
   11928 ; SLM-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
   11929 ; SLM-NEXT:    retq # sched: [4:1.00]
   11930 ;
   11931 ; SANDY-SSE-LABEL: test_psrlw:
   11932 ; SANDY-SSE:       # %bb.0:
   11933 ; SANDY-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
   11934 ; SANDY-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
   11935 ; SANDY-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
   11936 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   11937 ;
   11938 ; SANDY-LABEL: test_psrlw:
   11939 ; SANDY:       # %bb.0:
   11940 ; SANDY-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11941 ; SANDY-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11942 ; SANDY-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
   11943 ; SANDY-NEXT:    retq # sched: [1:1.00]
   11944 ;
   11945 ; HASWELL-SSE-LABEL: test_psrlw:
   11946 ; HASWELL-SSE:       # %bb.0:
   11947 ; HASWELL-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
   11948 ; HASWELL-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
   11949 ; HASWELL-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
   11950 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11951 ;
   11952 ; HASWELL-LABEL: test_psrlw:
   11953 ; HASWELL:       # %bb.0:
   11954 ; HASWELL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11955 ; HASWELL-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   11956 ; HASWELL-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
   11957 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   11958 ;
   11959 ; BROADWELL-SSE-LABEL: test_psrlw:
   11960 ; BROADWELL-SSE:       # %bb.0:
   11961 ; BROADWELL-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
   11962 ; BROADWELL-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:1.00]
   11963 ; BROADWELL-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:1.00]
   11964 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   11965 ;
   11966 ; BROADWELL-LABEL: test_psrlw:
   11967 ; BROADWELL:       # %bb.0:
   11968 ; BROADWELL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11969 ; BROADWELL-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   11970 ; BROADWELL-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
   11971 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   11972 ;
   11973 ; SKYLAKE-SSE-LABEL: test_psrlw:
   11974 ; SKYLAKE-SSE:       # %bb.0:
   11975 ; SKYLAKE-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
   11976 ; SKYLAKE-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:0.50]
   11977 ; SKYLAKE-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
   11978 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   11979 ;
   11980 ; SKYLAKE-LABEL: test_psrlw:
   11981 ; SKYLAKE:       # %bb.0:
   11982 ; SKYLAKE-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11983 ; SKYLAKE-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11984 ; SKYLAKE-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
   11985 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   11986 ;
   11987 ; SKX-SSE-LABEL: test_psrlw:
   11988 ; SKX-SSE:       # %bb.0:
   11989 ; SKX-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [2:1.00]
   11990 ; SKX-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [7:0.50]
   11991 ; SKX-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
   11992 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   11993 ;
   11994 ; SKX-LABEL: test_psrlw:
   11995 ; SKX:       # %bb.0:
   11996 ; SKX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   11997 ; SKX-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   11998 ; SKX-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
   11999 ; SKX-NEXT:    retq # sched: [7:1.00]
   12000 ;
   12001 ; BTVER2-SSE-LABEL: test_psrlw:
   12002 ; BTVER2-SSE:       # %bb.0:
   12003 ; BTVER2-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [1:0.50]
   12004 ; BTVER2-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [6:1.00]
   12005 ; BTVER2-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.50]
   12006 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   12007 ;
   12008 ; BTVER2-LABEL: test_psrlw:
   12009 ; BTVER2:       # %bb.0:
   12010 ; BTVER2-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12011 ; BTVER2-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   12012 ; BTVER2-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
   12013 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   12014 ;
   12015 ; ZNVER1-SSE-LABEL: test_psrlw:
   12016 ; ZNVER1-SSE:       # %bb.0:
   12017 ; ZNVER1-SSE-NEXT:    psrlw %xmm1, %xmm0 # sched: [1:1.00]
   12018 ; ZNVER1-SSE-NEXT:    psrlw (%rdi), %xmm0 # sched: [8:1.00]
   12019 ; ZNVER1-SSE-NEXT:    psrlw $2, %xmm0 # sched: [1:0.25]
   12020 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   12021 ;
   12022 ; ZNVER1-LABEL: test_psrlw:
   12023 ; ZNVER1:       # %bb.0:
   12024 ; ZNVER1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   12025 ; ZNVER1-NEXT:    vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   12026 ; ZNVER1-NEXT:    vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.25]
   12027 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   12028   %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
   12029   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   12030   %3 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %1, <8 x i16> %2)
   12031   %4 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %3, i32 2)
   12032   ret <8 x i16> %4
   12033 }
   12034 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
   12035 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
   12036 
   12037 define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   12038 ; GENERIC-LABEL: test_psubb:
   12039 ; GENERIC:       # %bb.0:
   12040 ; GENERIC-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
   12041 ; GENERIC-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
   12042 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   12043 ;
   12044 ; ATOM-LABEL: test_psubb:
   12045 ; ATOM:       # %bb.0:
   12046 ; ATOM-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
   12047 ; ATOM-NEXT:    psubb (%rdi), %xmm0 # sched: [1:1.00]
   12048 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12049 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12050 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12051 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12052 ; ATOM-NEXT:    retq # sched: [79:39.50]
   12053 ;
   12054 ; SLM-LABEL: test_psubb:
   12055 ; SLM:       # %bb.0:
   12056 ; SLM-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
   12057 ; SLM-NEXT:    psubb (%rdi), %xmm0 # sched: [4:1.00]
   12058 ; SLM-NEXT:    retq # sched: [4:1.00]
   12059 ;
   12060 ; SANDY-SSE-LABEL: test_psubb:
   12061 ; SANDY-SSE:       # %bb.0:
   12062 ; SANDY-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
   12063 ; SANDY-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
   12064 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   12065 ;
   12066 ; SANDY-LABEL: test_psubb:
   12067 ; SANDY:       # %bb.0:
   12068 ; SANDY-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12069 ; SANDY-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12070 ; SANDY-NEXT:    retq # sched: [1:1.00]
   12071 ;
   12072 ; HASWELL-SSE-LABEL: test_psubb:
   12073 ; HASWELL-SSE:       # %bb.0:
   12074 ; HASWELL-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
   12075 ; HASWELL-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
   12076 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12077 ;
   12078 ; HASWELL-LABEL: test_psubb:
   12079 ; HASWELL:       # %bb.0:
   12080 ; HASWELL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12081 ; HASWELL-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12082 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   12083 ;
   12084 ; BROADWELL-SSE-LABEL: test_psubb:
   12085 ; BROADWELL-SSE:       # %bb.0:
   12086 ; BROADWELL-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
   12087 ; BROADWELL-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [6:0.50]
   12088 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12089 ;
   12090 ; BROADWELL-LABEL: test_psubb:
   12091 ; BROADWELL:       # %bb.0:
   12092 ; BROADWELL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12093 ; BROADWELL-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   12094 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   12095 ;
   12096 ; SKYLAKE-SSE-LABEL: test_psubb:
   12097 ; SKYLAKE-SSE:       # %bb.0:
   12098 ; SKYLAKE-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.33]
   12099 ; SKYLAKE-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
   12100 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   12101 ;
   12102 ; SKYLAKE-LABEL: test_psubb:
   12103 ; SKYLAKE:       # %bb.0:
   12104 ; SKYLAKE-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   12105 ; SKYLAKE-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12106 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   12107 ;
   12108 ; SKX-SSE-LABEL: test_psubb:
   12109 ; SKX-SSE:       # %bb.0:
   12110 ; SKX-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.33]
   12111 ; SKX-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [7:0.50]
   12112 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   12113 ;
   12114 ; SKX-LABEL: test_psubb:
   12115 ; SKX:       # %bb.0:
   12116 ; SKX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   12117 ; SKX-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12118 ; SKX-NEXT:    retq # sched: [7:1.00]
   12119 ;
   12120 ; BTVER2-SSE-LABEL: test_psubb:
   12121 ; BTVER2-SSE:       # %bb.0:
   12122 ; BTVER2-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.50]
   12123 ; BTVER2-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [6:1.00]
   12124 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   12125 ;
   12126 ; BTVER2-LABEL: test_psubb:
   12127 ; BTVER2:       # %bb.0:
   12128 ; BTVER2-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12129 ; BTVER2-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   12130 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   12131 ;
   12132 ; ZNVER1-SSE-LABEL: test_psubb:
   12133 ; ZNVER1-SSE:       # %bb.0:
   12134 ; ZNVER1-SSE-NEXT:    psubb %xmm1, %xmm0 # sched: [1:0.25]
   12135 ; ZNVER1-SSE-NEXT:    psubb (%rdi), %xmm0 # sched: [8:0.50]
   12136 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   12137 ;
   12138 ; ZNVER1-LABEL: test_psubb:
   12139 ; ZNVER1:       # %bb.0:
   12140 ; ZNVER1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   12141 ; ZNVER1-NEXT:    vpsubb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   12142 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   12143   %1 = sub <16 x i8> %a0, %a1
   12144   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   12145   %3 = sub <16 x i8> %1, %2
   12146   ret <16 x i8> %3
   12147 }
   12148 
   12149 define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   12150 ; GENERIC-LABEL: test_psubd:
   12151 ; GENERIC:       # %bb.0:
   12152 ; GENERIC-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
   12153 ; GENERIC-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
   12154 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   12155 ;
   12156 ; ATOM-LABEL: test_psubd:
   12157 ; ATOM:       # %bb.0:
   12158 ; ATOM-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
   12159 ; ATOM-NEXT:    psubd (%rdi), %xmm0 # sched: [1:1.00]
   12160 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12161 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12162 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12163 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12164 ; ATOM-NEXT:    retq # sched: [79:39.50]
   12165 ;
   12166 ; SLM-LABEL: test_psubd:
   12167 ; SLM:       # %bb.0:
   12168 ; SLM-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
   12169 ; SLM-NEXT:    psubd (%rdi), %xmm0 # sched: [4:1.00]
   12170 ; SLM-NEXT:    retq # sched: [4:1.00]
   12171 ;
   12172 ; SANDY-SSE-LABEL: test_psubd:
   12173 ; SANDY-SSE:       # %bb.0:
   12174 ; SANDY-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
   12175 ; SANDY-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
   12176 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   12177 ;
   12178 ; SANDY-LABEL: test_psubd:
   12179 ; SANDY:       # %bb.0:
   12180 ; SANDY-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12181 ; SANDY-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12182 ; SANDY-NEXT:    retq # sched: [1:1.00]
   12183 ;
   12184 ; HASWELL-SSE-LABEL: test_psubd:
   12185 ; HASWELL-SSE:       # %bb.0:
   12186 ; HASWELL-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
   12187 ; HASWELL-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
   12188 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12189 ;
   12190 ; HASWELL-LABEL: test_psubd:
   12191 ; HASWELL:       # %bb.0:
   12192 ; HASWELL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12193 ; HASWELL-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12194 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   12195 ;
   12196 ; BROADWELL-SSE-LABEL: test_psubd:
   12197 ; BROADWELL-SSE:       # %bb.0:
   12198 ; BROADWELL-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
   12199 ; BROADWELL-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [6:0.50]
   12200 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12201 ;
   12202 ; BROADWELL-LABEL: test_psubd:
   12203 ; BROADWELL:       # %bb.0:
   12204 ; BROADWELL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12205 ; BROADWELL-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   12206 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   12207 ;
   12208 ; SKYLAKE-SSE-LABEL: test_psubd:
   12209 ; SKYLAKE-SSE:       # %bb.0:
   12210 ; SKYLAKE-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.33]
   12211 ; SKYLAKE-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
   12212 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   12213 ;
   12214 ; SKYLAKE-LABEL: test_psubd:
   12215 ; SKYLAKE:       # %bb.0:
   12216 ; SKYLAKE-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   12217 ; SKYLAKE-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12218 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   12219 ;
   12220 ; SKX-SSE-LABEL: test_psubd:
   12221 ; SKX-SSE:       # %bb.0:
   12222 ; SKX-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.33]
   12223 ; SKX-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [7:0.50]
   12224 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   12225 ;
   12226 ; SKX-LABEL: test_psubd:
   12227 ; SKX:       # %bb.0:
   12228 ; SKX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   12229 ; SKX-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12230 ; SKX-NEXT:    retq # sched: [7:1.00]
   12231 ;
   12232 ; BTVER2-SSE-LABEL: test_psubd:
   12233 ; BTVER2-SSE:       # %bb.0:
   12234 ; BTVER2-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.50]
   12235 ; BTVER2-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [6:1.00]
   12236 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   12237 ;
   12238 ; BTVER2-LABEL: test_psubd:
   12239 ; BTVER2:       # %bb.0:
   12240 ; BTVER2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12241 ; BTVER2-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   12242 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   12243 ;
   12244 ; ZNVER1-SSE-LABEL: test_psubd:
   12245 ; ZNVER1-SSE:       # %bb.0:
   12246 ; ZNVER1-SSE-NEXT:    psubd %xmm1, %xmm0 # sched: [1:0.25]
   12247 ; ZNVER1-SSE-NEXT:    psubd (%rdi), %xmm0 # sched: [8:0.50]
   12248 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   12249 ;
   12250 ; ZNVER1-LABEL: test_psubd:
   12251 ; ZNVER1:       # %bb.0:
   12252 ; ZNVER1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   12253 ; ZNVER1-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   12254 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   12255   %1 = sub <4 x i32> %a0, %a1
   12256   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   12257   %3 = sub <4 x i32> %1, %2
   12258   ret <4 x i32> %3
   12259 }
   12260 
   12261 define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   12262 ; GENERIC-LABEL: test_psubq:
   12263 ; GENERIC:       # %bb.0:
   12264 ; GENERIC-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
   12265 ; GENERIC-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
   12266 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   12267 ;
   12268 ; ATOM-LABEL: test_psubq:
   12269 ; ATOM:       # %bb.0:
   12270 ; ATOM-NEXT:    psubq %xmm1, %xmm0 # sched: [2:1.00]
   12271 ; ATOM-NEXT:    psubq (%rdi), %xmm0 # sched: [3:1.50]
   12272 ; ATOM-NEXT:    retq # sched: [79:39.50]
   12273 ;
   12274 ; SLM-LABEL: test_psubq:
   12275 ; SLM:       # %bb.0:
   12276 ; SLM-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
   12277 ; SLM-NEXT:    psubq (%rdi), %xmm0 # sched: [4:1.00]
   12278 ; SLM-NEXT:    retq # sched: [4:1.00]
   12279 ;
   12280 ; SANDY-SSE-LABEL: test_psubq:
   12281 ; SANDY-SSE:       # %bb.0:
   12282 ; SANDY-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
   12283 ; SANDY-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
   12284 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   12285 ;
   12286 ; SANDY-LABEL: test_psubq:
   12287 ; SANDY:       # %bb.0:
   12288 ; SANDY-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12289 ; SANDY-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12290 ; SANDY-NEXT:    retq # sched: [1:1.00]
   12291 ;
   12292 ; HASWELL-SSE-LABEL: test_psubq:
   12293 ; HASWELL-SSE:       # %bb.0:
   12294 ; HASWELL-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
   12295 ; HASWELL-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
   12296 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12297 ;
   12298 ; HASWELL-LABEL: test_psubq:
   12299 ; HASWELL:       # %bb.0:
   12300 ; HASWELL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12301 ; HASWELL-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12302 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   12303 ;
   12304 ; BROADWELL-SSE-LABEL: test_psubq:
   12305 ; BROADWELL-SSE:       # %bb.0:
   12306 ; BROADWELL-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
   12307 ; BROADWELL-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [6:0.50]
   12308 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12309 ;
   12310 ; BROADWELL-LABEL: test_psubq:
   12311 ; BROADWELL:       # %bb.0:
   12312 ; BROADWELL-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12313 ; BROADWELL-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   12314 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   12315 ;
   12316 ; SKYLAKE-SSE-LABEL: test_psubq:
   12317 ; SKYLAKE-SSE:       # %bb.0:
   12318 ; SKYLAKE-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.33]
   12319 ; SKYLAKE-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
   12320 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   12321 ;
   12322 ; SKYLAKE-LABEL: test_psubq:
   12323 ; SKYLAKE:       # %bb.0:
   12324 ; SKYLAKE-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   12325 ; SKYLAKE-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12326 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   12327 ;
   12328 ; SKX-SSE-LABEL: test_psubq:
   12329 ; SKX-SSE:       # %bb.0:
   12330 ; SKX-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.33]
   12331 ; SKX-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [7:0.50]
   12332 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   12333 ;
   12334 ; SKX-LABEL: test_psubq:
   12335 ; SKX:       # %bb.0:
   12336 ; SKX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   12337 ; SKX-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12338 ; SKX-NEXT:    retq # sched: [7:1.00]
   12339 ;
   12340 ; BTVER2-SSE-LABEL: test_psubq:
   12341 ; BTVER2-SSE:       # %bb.0:
   12342 ; BTVER2-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.50]
   12343 ; BTVER2-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [6:1.00]
   12344 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   12345 ;
   12346 ; BTVER2-LABEL: test_psubq:
   12347 ; BTVER2:       # %bb.0:
   12348 ; BTVER2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12349 ; BTVER2-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   12350 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   12351 ;
   12352 ; ZNVER1-SSE-LABEL: test_psubq:
   12353 ; ZNVER1-SSE:       # %bb.0:
   12354 ; ZNVER1-SSE-NEXT:    psubq %xmm1, %xmm0 # sched: [1:0.25]
   12355 ; ZNVER1-SSE-NEXT:    psubq (%rdi), %xmm0 # sched: [8:0.50]
   12356 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   12357 ;
   12358 ; ZNVER1-LABEL: test_psubq:
   12359 ; ZNVER1:       # %bb.0:
   12360 ; ZNVER1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   12361 ; ZNVER1-NEXT:    vpsubq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   12362 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   12363   %1 = sub <2 x i64> %a0, %a1
   12364   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   12365   %3 = sub <2 x i64> %1, %2
   12366   ret <2 x i64> %3
   12367 }
   12368 
   12369 define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   12370 ; GENERIC-LABEL: test_psubsb:
   12371 ; GENERIC:       # %bb.0:
   12372 ; GENERIC-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
   12373 ; GENERIC-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
   12374 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   12375 ;
   12376 ; ATOM-LABEL: test_psubsb:
   12377 ; ATOM:       # %bb.0:
   12378 ; ATOM-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
   12379 ; ATOM-NEXT:    psubsb (%rdi), %xmm0 # sched: [1:1.00]
   12380 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12381 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12382 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12383 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12384 ; ATOM-NEXT:    retq # sched: [79:39.50]
   12385 ;
   12386 ; SLM-LABEL: test_psubsb:
   12387 ; SLM:       # %bb.0:
   12388 ; SLM-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
   12389 ; SLM-NEXT:    psubsb (%rdi), %xmm0 # sched: [4:1.00]
   12390 ; SLM-NEXT:    retq # sched: [4:1.00]
   12391 ;
   12392 ; SANDY-SSE-LABEL: test_psubsb:
   12393 ; SANDY-SSE:       # %bb.0:
   12394 ; SANDY-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
   12395 ; SANDY-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
   12396 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   12397 ;
   12398 ; SANDY-LABEL: test_psubsb:
   12399 ; SANDY:       # %bb.0:
   12400 ; SANDY-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12401 ; SANDY-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12402 ; SANDY-NEXT:    retq # sched: [1:1.00]
   12403 ;
   12404 ; HASWELL-SSE-LABEL: test_psubsb:
   12405 ; HASWELL-SSE:       # %bb.0:
   12406 ; HASWELL-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
   12407 ; HASWELL-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
   12408 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12409 ;
   12410 ; HASWELL-LABEL: test_psubsb:
   12411 ; HASWELL:       # %bb.0:
   12412 ; HASWELL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12413 ; HASWELL-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12414 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   12415 ;
   12416 ; BROADWELL-SSE-LABEL: test_psubsb:
   12417 ; BROADWELL-SSE:       # %bb.0:
   12418 ; BROADWELL-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
   12419 ; BROADWELL-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [6:0.50]
   12420 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12421 ;
   12422 ; BROADWELL-LABEL: test_psubsb:
   12423 ; BROADWELL:       # %bb.0:
   12424 ; BROADWELL-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12425 ; BROADWELL-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   12426 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   12427 ;
   12428 ; SKYLAKE-SSE-LABEL: test_psubsb:
   12429 ; SKYLAKE-SSE:       # %bb.0:
   12430 ; SKYLAKE-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
   12431 ; SKYLAKE-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
   12432 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   12433 ;
   12434 ; SKYLAKE-LABEL: test_psubsb:
   12435 ; SKYLAKE:       # %bb.0:
   12436 ; SKYLAKE-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12437 ; SKYLAKE-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12438 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   12439 ;
   12440 ; SKX-SSE-LABEL: test_psubsb:
   12441 ; SKX-SSE:       # %bb.0:
   12442 ; SKX-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
   12443 ; SKX-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [7:0.50]
   12444 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   12445 ;
   12446 ; SKX-LABEL: test_psubsb:
   12447 ; SKX:       # %bb.0:
   12448 ; SKX-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12449 ; SKX-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12450 ; SKX-NEXT:    retq # sched: [7:1.00]
   12451 ;
   12452 ; BTVER2-SSE-LABEL: test_psubsb:
   12453 ; BTVER2-SSE:       # %bb.0:
   12454 ; BTVER2-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.50]
   12455 ; BTVER2-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [6:1.00]
   12456 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   12457 ;
   12458 ; BTVER2-LABEL: test_psubsb:
   12459 ; BTVER2:       # %bb.0:
   12460 ; BTVER2-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12461 ; BTVER2-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   12462 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   12463 ;
   12464 ; ZNVER1-SSE-LABEL: test_psubsb:
   12465 ; ZNVER1-SSE:       # %bb.0:
   12466 ; ZNVER1-SSE-NEXT:    psubsb %xmm1, %xmm0 # sched: [1:0.25]
   12467 ; ZNVER1-SSE-NEXT:    psubsb (%rdi), %xmm0 # sched: [8:0.50]
   12468 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   12469 ;
   12470 ; ZNVER1-LABEL: test_psubsb:
   12471 ; ZNVER1:       # %bb.0:
   12472 ; ZNVER1-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   12473 ; ZNVER1-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   12474 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   12475   %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1)
   12476   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   12477   %3 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %1, <16 x i8> %2)
   12478   ret <16 x i8> %3
   12479 }
   12480 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
   12481 
   12482 define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   12483 ; GENERIC-LABEL: test_psubsw:
   12484 ; GENERIC:       # %bb.0:
   12485 ; GENERIC-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
   12486 ; GENERIC-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
   12487 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   12488 ;
   12489 ; ATOM-LABEL: test_psubsw:
   12490 ; ATOM:       # %bb.0:
   12491 ; ATOM-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
   12492 ; ATOM-NEXT:    psubsw (%rdi), %xmm0 # sched: [1:1.00]
   12493 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12494 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12495 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12496 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12497 ; ATOM-NEXT:    retq # sched: [79:39.50]
   12498 ;
   12499 ; SLM-LABEL: test_psubsw:
   12500 ; SLM:       # %bb.0:
   12501 ; SLM-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
   12502 ; SLM-NEXT:    psubsw (%rdi), %xmm0 # sched: [4:1.00]
   12503 ; SLM-NEXT:    retq # sched: [4:1.00]
   12504 ;
   12505 ; SANDY-SSE-LABEL: test_psubsw:
   12506 ; SANDY-SSE:       # %bb.0:
   12507 ; SANDY-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
   12508 ; SANDY-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
   12509 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   12510 ;
   12511 ; SANDY-LABEL: test_psubsw:
   12512 ; SANDY:       # %bb.0:
   12513 ; SANDY-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12514 ; SANDY-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12515 ; SANDY-NEXT:    retq # sched: [1:1.00]
   12516 ;
   12517 ; HASWELL-SSE-LABEL: test_psubsw:
   12518 ; HASWELL-SSE:       # %bb.0:
   12519 ; HASWELL-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
   12520 ; HASWELL-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
   12521 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12522 ;
   12523 ; HASWELL-LABEL: test_psubsw:
   12524 ; HASWELL:       # %bb.0:
   12525 ; HASWELL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12526 ; HASWELL-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12527 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   12528 ;
   12529 ; BROADWELL-SSE-LABEL: test_psubsw:
   12530 ; BROADWELL-SSE:       # %bb.0:
   12531 ; BROADWELL-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
   12532 ; BROADWELL-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [6:0.50]
   12533 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12534 ;
   12535 ; BROADWELL-LABEL: test_psubsw:
   12536 ; BROADWELL:       # %bb.0:
   12537 ; BROADWELL-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12538 ; BROADWELL-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   12539 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   12540 ;
   12541 ; SKYLAKE-SSE-LABEL: test_psubsw:
   12542 ; SKYLAKE-SSE:       # %bb.0:
   12543 ; SKYLAKE-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
   12544 ; SKYLAKE-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
   12545 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   12546 ;
   12547 ; SKYLAKE-LABEL: test_psubsw:
   12548 ; SKYLAKE:       # %bb.0:
   12549 ; SKYLAKE-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12550 ; SKYLAKE-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12551 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   12552 ;
   12553 ; SKX-SSE-LABEL: test_psubsw:
   12554 ; SKX-SSE:       # %bb.0:
   12555 ; SKX-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
   12556 ; SKX-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [7:0.50]
   12557 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   12558 ;
   12559 ; SKX-LABEL: test_psubsw:
   12560 ; SKX:       # %bb.0:
   12561 ; SKX-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12562 ; SKX-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12563 ; SKX-NEXT:    retq # sched: [7:1.00]
   12564 ;
   12565 ; BTVER2-SSE-LABEL: test_psubsw:
   12566 ; BTVER2-SSE:       # %bb.0:
   12567 ; BTVER2-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.50]
   12568 ; BTVER2-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [6:1.00]
   12569 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   12570 ;
   12571 ; BTVER2-LABEL: test_psubsw:
   12572 ; BTVER2:       # %bb.0:
   12573 ; BTVER2-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12574 ; BTVER2-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   12575 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   12576 ;
   12577 ; ZNVER1-SSE-LABEL: test_psubsw:
   12578 ; ZNVER1-SSE:       # %bb.0:
   12579 ; ZNVER1-SSE-NEXT:    psubsw %xmm1, %xmm0 # sched: [1:0.25]
   12580 ; ZNVER1-SSE-NEXT:    psubsw (%rdi), %xmm0 # sched: [8:0.50]
   12581 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   12582 ;
   12583 ; ZNVER1-LABEL: test_psubsw:
   12584 ; ZNVER1:       # %bb.0:
   12585 ; ZNVER1-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   12586 ; ZNVER1-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   12587 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   12588   %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1)
   12589   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   12590   %3 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %1, <8 x i16> %2)
   12591   ret <8 x i16> %3
   12592 }
   12593 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
   12594 
   12595 define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   12596 ; GENERIC-LABEL: test_psubusb:
   12597 ; GENERIC:       # %bb.0:
   12598 ; GENERIC-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
   12599 ; GENERIC-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
   12600 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   12601 ;
   12602 ; ATOM-LABEL: test_psubusb:
   12603 ; ATOM:       # %bb.0:
   12604 ; ATOM-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
   12605 ; ATOM-NEXT:    psubusb (%rdi), %xmm0 # sched: [1:1.00]
   12606 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12607 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12608 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12609 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12610 ; ATOM-NEXT:    retq # sched: [79:39.50]
   12611 ;
   12612 ; SLM-LABEL: test_psubusb:
   12613 ; SLM:       # %bb.0:
   12614 ; SLM-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
   12615 ; SLM-NEXT:    psubusb (%rdi), %xmm0 # sched: [4:1.00]
   12616 ; SLM-NEXT:    retq # sched: [4:1.00]
   12617 ;
   12618 ; SANDY-SSE-LABEL: test_psubusb:
   12619 ; SANDY-SSE:       # %bb.0:
   12620 ; SANDY-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
   12621 ; SANDY-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
   12622 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   12623 ;
   12624 ; SANDY-LABEL: test_psubusb:
   12625 ; SANDY:       # %bb.0:
   12626 ; SANDY-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12627 ; SANDY-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12628 ; SANDY-NEXT:    retq # sched: [1:1.00]
   12629 ;
   12630 ; HASWELL-SSE-LABEL: test_psubusb:
   12631 ; HASWELL-SSE:       # %bb.0:
   12632 ; HASWELL-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
   12633 ; HASWELL-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
   12634 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12635 ;
   12636 ; HASWELL-LABEL: test_psubusb:
   12637 ; HASWELL:       # %bb.0:
   12638 ; HASWELL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12639 ; HASWELL-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12640 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   12641 ;
   12642 ; BROADWELL-SSE-LABEL: test_psubusb:
   12643 ; BROADWELL-SSE:       # %bb.0:
   12644 ; BROADWELL-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
   12645 ; BROADWELL-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [6:0.50]
   12646 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12647 ;
   12648 ; BROADWELL-LABEL: test_psubusb:
   12649 ; BROADWELL:       # %bb.0:
   12650 ; BROADWELL-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12651 ; BROADWELL-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   12652 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   12653 ;
   12654 ; SKYLAKE-SSE-LABEL: test_psubusb:
   12655 ; SKYLAKE-SSE:       # %bb.0:
   12656 ; SKYLAKE-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
   12657 ; SKYLAKE-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
   12658 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   12659 ;
   12660 ; SKYLAKE-LABEL: test_psubusb:
   12661 ; SKYLAKE:       # %bb.0:
   12662 ; SKYLAKE-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12663 ; SKYLAKE-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12664 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   12665 ;
   12666 ; SKX-SSE-LABEL: test_psubusb:
   12667 ; SKX-SSE:       # %bb.0:
   12668 ; SKX-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
   12669 ; SKX-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [7:0.50]
   12670 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   12671 ;
   12672 ; SKX-LABEL: test_psubusb:
   12673 ; SKX:       # %bb.0:
   12674 ; SKX-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12675 ; SKX-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12676 ; SKX-NEXT:    retq # sched: [7:1.00]
   12677 ;
   12678 ; BTVER2-SSE-LABEL: test_psubusb:
   12679 ; BTVER2-SSE:       # %bb.0:
   12680 ; BTVER2-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.50]
   12681 ; BTVER2-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [6:1.00]
   12682 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   12683 ;
   12684 ; BTVER2-LABEL: test_psubusb:
   12685 ; BTVER2:       # %bb.0:
   12686 ; BTVER2-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12687 ; BTVER2-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   12688 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   12689 ;
   12690 ; ZNVER1-SSE-LABEL: test_psubusb:
   12691 ; ZNVER1-SSE:       # %bb.0:
   12692 ; ZNVER1-SSE-NEXT:    psubusb %xmm1, %xmm0 # sched: [1:0.25]
   12693 ; ZNVER1-SSE-NEXT:    psubusb (%rdi), %xmm0 # sched: [8:0.50]
   12694 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   12695 ;
   12696 ; ZNVER1-LABEL: test_psubusb:
   12697 ; ZNVER1:       # %bb.0:
   12698 ; ZNVER1-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   12699 ; ZNVER1-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   12700 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   12701   %1 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1)
   12702   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   12703   %3 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %1, <16 x i8> %2)
   12704   ret <16 x i8> %3
   12705 }
   12706 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
   12707 
   12708 define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   12709 ; GENERIC-LABEL: test_psubusw:
   12710 ; GENERIC:       # %bb.0:
   12711 ; GENERIC-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
   12712 ; GENERIC-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
   12713 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   12714 ;
   12715 ; ATOM-LABEL: test_psubusw:
   12716 ; ATOM:       # %bb.0:
   12717 ; ATOM-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
   12718 ; ATOM-NEXT:    psubusw (%rdi), %xmm0 # sched: [1:1.00]
   12719 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12720 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12721 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12722 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12723 ; ATOM-NEXT:    retq # sched: [79:39.50]
   12724 ;
   12725 ; SLM-LABEL: test_psubusw:
   12726 ; SLM:       # %bb.0:
   12727 ; SLM-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
   12728 ; SLM-NEXT:    psubusw (%rdi), %xmm0 # sched: [4:1.00]
   12729 ; SLM-NEXT:    retq # sched: [4:1.00]
   12730 ;
   12731 ; SANDY-SSE-LABEL: test_psubusw:
   12732 ; SANDY-SSE:       # %bb.0:
   12733 ; SANDY-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
   12734 ; SANDY-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
   12735 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   12736 ;
   12737 ; SANDY-LABEL: test_psubusw:
   12738 ; SANDY:       # %bb.0:
   12739 ; SANDY-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12740 ; SANDY-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12741 ; SANDY-NEXT:    retq # sched: [1:1.00]
   12742 ;
   12743 ; HASWELL-SSE-LABEL: test_psubusw:
   12744 ; HASWELL-SSE:       # %bb.0:
   12745 ; HASWELL-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
   12746 ; HASWELL-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
   12747 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12748 ;
   12749 ; HASWELL-LABEL: test_psubusw:
   12750 ; HASWELL:       # %bb.0:
   12751 ; HASWELL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12752 ; HASWELL-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12753 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   12754 ;
   12755 ; BROADWELL-SSE-LABEL: test_psubusw:
   12756 ; BROADWELL-SSE:       # %bb.0:
   12757 ; BROADWELL-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
   12758 ; BROADWELL-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [6:0.50]
   12759 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12760 ;
   12761 ; BROADWELL-LABEL: test_psubusw:
   12762 ; BROADWELL:       # %bb.0:
   12763 ; BROADWELL-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12764 ; BROADWELL-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   12765 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   12766 ;
   12767 ; SKYLAKE-SSE-LABEL: test_psubusw:
   12768 ; SKYLAKE-SSE:       # %bb.0:
   12769 ; SKYLAKE-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
   12770 ; SKYLAKE-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
   12771 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   12772 ;
   12773 ; SKYLAKE-LABEL: test_psubusw:
   12774 ; SKYLAKE:       # %bb.0:
   12775 ; SKYLAKE-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12776 ; SKYLAKE-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12777 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   12778 ;
   12779 ; SKX-SSE-LABEL: test_psubusw:
   12780 ; SKX-SSE:       # %bb.0:
   12781 ; SKX-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
   12782 ; SKX-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [7:0.50]
   12783 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   12784 ;
   12785 ; SKX-LABEL: test_psubusw:
   12786 ; SKX:       # %bb.0:
   12787 ; SKX-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12788 ; SKX-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12789 ; SKX-NEXT:    retq # sched: [7:1.00]
   12790 ;
   12791 ; BTVER2-SSE-LABEL: test_psubusw:
   12792 ; BTVER2-SSE:       # %bb.0:
   12793 ; BTVER2-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.50]
   12794 ; BTVER2-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [6:1.00]
   12795 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   12796 ;
   12797 ; BTVER2-LABEL: test_psubusw:
   12798 ; BTVER2:       # %bb.0:
   12799 ; BTVER2-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12800 ; BTVER2-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   12801 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   12802 ;
   12803 ; ZNVER1-SSE-LABEL: test_psubusw:
   12804 ; ZNVER1-SSE:       # %bb.0:
   12805 ; ZNVER1-SSE-NEXT:    psubusw %xmm1, %xmm0 # sched: [1:0.25]
   12806 ; ZNVER1-SSE-NEXT:    psubusw (%rdi), %xmm0 # sched: [8:0.50]
   12807 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   12808 ;
   12809 ; ZNVER1-LABEL: test_psubusw:
   12810 ; ZNVER1:       # %bb.0:
   12811 ; ZNVER1-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   12812 ; ZNVER1-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   12813 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   12814   %1 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1)
   12815   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   12816   %3 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %1, <8 x i16> %2)
   12817   ret <8 x i16> %3
   12818 }
   12819 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
   12820 
   12821 define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   12822 ; GENERIC-LABEL: test_psubw:
   12823 ; GENERIC:       # %bb.0:
   12824 ; GENERIC-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
   12825 ; GENERIC-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
   12826 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   12827 ;
   12828 ; ATOM-LABEL: test_psubw:
   12829 ; ATOM:       # %bb.0:
   12830 ; ATOM-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
   12831 ; ATOM-NEXT:    psubw (%rdi), %xmm0 # sched: [1:1.00]
   12832 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12833 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12834 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12835 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12836 ; ATOM-NEXT:    retq # sched: [79:39.50]
   12837 ;
   12838 ; SLM-LABEL: test_psubw:
   12839 ; SLM:       # %bb.0:
   12840 ; SLM-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
   12841 ; SLM-NEXT:    psubw (%rdi), %xmm0 # sched: [4:1.00]
   12842 ; SLM-NEXT:    retq # sched: [4:1.00]
   12843 ;
   12844 ; SANDY-SSE-LABEL: test_psubw:
   12845 ; SANDY-SSE:       # %bb.0:
   12846 ; SANDY-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
   12847 ; SANDY-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
   12848 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   12849 ;
   12850 ; SANDY-LABEL: test_psubw:
   12851 ; SANDY:       # %bb.0:
   12852 ; SANDY-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12853 ; SANDY-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12854 ; SANDY-NEXT:    retq # sched: [1:1.00]
   12855 ;
   12856 ; HASWELL-SSE-LABEL: test_psubw:
   12857 ; HASWELL-SSE:       # %bb.0:
   12858 ; HASWELL-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
   12859 ; HASWELL-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
   12860 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12861 ;
   12862 ; HASWELL-LABEL: test_psubw:
   12863 ; HASWELL:       # %bb.0:
   12864 ; HASWELL-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12865 ; HASWELL-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12866 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   12867 ;
   12868 ; BROADWELL-SSE-LABEL: test_psubw:
   12869 ; BROADWELL-SSE:       # %bb.0:
   12870 ; BROADWELL-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
   12871 ; BROADWELL-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [6:0.50]
   12872 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12873 ;
   12874 ; BROADWELL-LABEL: test_psubw:
   12875 ; BROADWELL:       # %bb.0:
   12876 ; BROADWELL-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12877 ; BROADWELL-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   12878 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   12879 ;
   12880 ; SKYLAKE-SSE-LABEL: test_psubw:
   12881 ; SKYLAKE-SSE:       # %bb.0:
   12882 ; SKYLAKE-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.33]
   12883 ; SKYLAKE-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
   12884 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   12885 ;
   12886 ; SKYLAKE-LABEL: test_psubw:
   12887 ; SKYLAKE:       # %bb.0:
   12888 ; SKYLAKE-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   12889 ; SKYLAKE-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12890 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   12891 ;
   12892 ; SKX-SSE-LABEL: test_psubw:
   12893 ; SKX-SSE:       # %bb.0:
   12894 ; SKX-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.33]
   12895 ; SKX-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [7:0.50]
   12896 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   12897 ;
   12898 ; SKX-LABEL: test_psubw:
   12899 ; SKX:       # %bb.0:
   12900 ; SKX-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   12901 ; SKX-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   12902 ; SKX-NEXT:    retq # sched: [7:1.00]
   12903 ;
   12904 ; BTVER2-SSE-LABEL: test_psubw:
   12905 ; BTVER2-SSE:       # %bb.0:
   12906 ; BTVER2-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.50]
   12907 ; BTVER2-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [6:1.00]
   12908 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   12909 ;
   12910 ; BTVER2-LABEL: test_psubw:
   12911 ; BTVER2:       # %bb.0:
   12912 ; BTVER2-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   12913 ; BTVER2-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   12914 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   12915 ;
   12916 ; ZNVER1-SSE-LABEL: test_psubw:
   12917 ; ZNVER1-SSE:       # %bb.0:
   12918 ; ZNVER1-SSE-NEXT:    psubw %xmm1, %xmm0 # sched: [1:0.25]
   12919 ; ZNVER1-SSE-NEXT:    psubw (%rdi), %xmm0 # sched: [8:0.50]
   12920 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   12921 ;
   12922 ; ZNVER1-LABEL: test_psubw:
   12923 ; ZNVER1:       # %bb.0:
   12924 ; ZNVER1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   12925 ; ZNVER1-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   12926 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   12927   %1 = sub <8 x i16> %a0, %a1
   12928   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   12929   %3 = sub <8 x i16> %1, %2
   12930   ret <8 x i16> %3
   12931 }
   12932 
   12933 define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   12934 ; GENERIC-LABEL: test_punpckhbw:
   12935 ; GENERIC:       # %bb.0:
   12936 ; GENERIC-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
   12937 ; GENERIC-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
   12938 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   12939 ;
   12940 ; ATOM-LABEL: test_punpckhbw:
   12941 ; ATOM:       # %bb.0:
   12942 ; ATOM-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   12943 ; ATOM-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00]
   12944 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12945 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12946 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12947 ; ATOM-NEXT:    nop # sched: [1:0.50]
   12948 ; ATOM-NEXT:    retq # sched: [79:39.50]
   12949 ;
   12950 ; SLM-LABEL: test_punpckhbw:
   12951 ; SLM:       # %bb.0:
   12952 ; SLM-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   12953 ; SLM-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00]
   12954 ; SLM-NEXT:    retq # sched: [4:1.00]
   12955 ;
   12956 ; SANDY-SSE-LABEL: test_punpckhbw:
   12957 ; SANDY-SSE:       # %bb.0:
   12958 ; SANDY-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
   12959 ; SANDY-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
   12960 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   12961 ;
   12962 ; SANDY-LABEL: test_punpckhbw:
   12963 ; SANDY:       # %bb.0:
   12964 ; SANDY-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
   12965 ; SANDY-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
   12966 ; SANDY-NEXT:    retq # sched: [1:1.00]
   12967 ;
   12968 ; HASWELL-SSE-LABEL: test_punpckhbw:
   12969 ; HASWELL-SSE:       # %bb.0:
   12970 ; HASWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   12971 ; HASWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
   12972 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12973 ;
   12974 ; HASWELL-LABEL: test_punpckhbw:
   12975 ; HASWELL:       # %bb.0:
   12976 ; HASWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   12977 ; HASWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
   12978 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   12979 ;
   12980 ; BROADWELL-SSE-LABEL: test_punpckhbw:
   12981 ; BROADWELL-SSE:       # %bb.0:
   12982 ; BROADWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   12983 ; BROADWELL-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
   12984 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   12985 ;
   12986 ; BROADWELL-LABEL: test_punpckhbw:
   12987 ; BROADWELL:       # %bb.0:
   12988 ; BROADWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   12989 ; BROADWELL-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
   12990 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   12991 ;
   12992 ; SKYLAKE-SSE-LABEL: test_punpckhbw:
   12993 ; SKYLAKE-SSE:       # %bb.0:
   12994 ; SKYLAKE-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   12995 ; SKYLAKE-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
   12996 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   12997 ;
   12998 ; SKYLAKE-LABEL: test_punpckhbw:
   12999 ; SKYLAKE:       # %bb.0:
   13000 ; SKYLAKE-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   13001 ; SKYLAKE-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
   13002 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   13003 ;
   13004 ; SKX-SSE-LABEL: test_punpckhbw:
   13005 ; SKX-SSE:       # %bb.0:
   13006 ; SKX-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   13007 ; SKX-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
   13008 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   13009 ;
   13010 ; SKX-LABEL: test_punpckhbw:
   13011 ; SKX:       # %bb.0:
   13012 ; SKX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
   13013 ; SKX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
   13014 ; SKX-NEXT:    retq # sched: [7:1.00]
   13015 ;
   13016 ; BTVER2-SSE-LABEL: test_punpckhbw:
   13017 ; BTVER2-SSE:       # %bb.0:
   13018 ; BTVER2-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
   13019 ; BTVER2-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
   13020 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   13021 ;
   13022 ; BTVER2-LABEL: test_punpckhbw:
   13023 ; BTVER2:       # %bb.0:
   13024 ; BTVER2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
   13025 ; BTVER2-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
   13026 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   13027 ;
   13028 ; ZNVER1-SSE-LABEL: test_punpckhbw:
   13029 ; ZNVER1-SSE:       # %bb.0:
   13030 ; ZNVER1-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
   13031 ; ZNVER1-SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50]
   13032 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   13033 ;
   13034 ; ZNVER1-LABEL: test_punpckhbw:
   13035 ; ZNVER1:       # %bb.0:
   13036 ; ZNVER1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
   13037 ; ZNVER1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50]
   13038 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   13039   %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   13040   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   13041   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
   13042   ret <16 x i8> %3
   13043 }
   13044 
   13045 define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   13046 ; GENERIC-LABEL: test_punpckhdq:
   13047 ; GENERIC:       # %bb.0:
   13048 ; GENERIC-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13049 ; GENERIC-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
   13050 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13051 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   13052 ;
   13053 ; ATOM-LABEL: test_punpckhdq:
   13054 ; ATOM:       # %bb.0:
   13055 ; ATOM-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13056 ; ATOM-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00]
   13057 ; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13058 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13059 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13060 ; ATOM-NEXT:    retq # sched: [79:39.50]
   13061 ;
   13062 ; SLM-LABEL: test_punpckhdq:
   13063 ; SLM:       # %bb.0:
   13064 ; SLM-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13065 ; SLM-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00]
   13066 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13067 ; SLM-NEXT:    retq # sched: [4:1.00]
   13068 ;
   13069 ; SANDY-SSE-LABEL: test_punpckhdq:
   13070 ; SANDY-SSE:       # %bb.0:
   13071 ; SANDY-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13072 ; SANDY-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
   13073 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13074 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   13075 ;
   13076 ; SANDY-LABEL: test_punpckhdq:
   13077 ; SANDY:       # %bb.0:
   13078 ; SANDY-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13079 ; SANDY-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
   13080 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13081 ; SANDY-NEXT:    retq # sched: [1:1.00]
   13082 ;
   13083 ; HASWELL-SSE-LABEL: test_punpckhdq:
   13084 ; HASWELL-SSE:       # %bb.0:
   13085 ; HASWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13086 ; HASWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   13087 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13088 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13089 ;
   13090 ; HASWELL-LABEL: test_punpckhdq:
   13091 ; HASWELL:       # %bb.0:
   13092 ; HASWELL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13093 ; HASWELL-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   13094 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13095 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   13096 ;
   13097 ; BROADWELL-SSE-LABEL: test_punpckhdq:
   13098 ; BROADWELL-SSE:       # %bb.0:
   13099 ; BROADWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13100 ; BROADWELL-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
   13101 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13102 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13103 ;
   13104 ; BROADWELL-LABEL: test_punpckhdq:
   13105 ; BROADWELL:       # %bb.0:
   13106 ; BROADWELL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13107 ; BROADWELL-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
   13108 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13109 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   13110 ;
   13111 ; SKYLAKE-SSE-LABEL: test_punpckhdq:
   13112 ; SKYLAKE-SSE:       # %bb.0:
   13113 ; SKYLAKE-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13114 ; SKYLAKE-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   13115 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   13116 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   13117 ;
   13118 ; SKYLAKE-LABEL: test_punpckhdq:
   13119 ; SKYLAKE:       # %bb.0:
   13120 ; SKYLAKE-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13121 ; SKYLAKE-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   13122 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13123 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   13124 ;
   13125 ; SKX-SSE-LABEL: test_punpckhdq:
   13126 ; SKX-SSE:       # %bb.0:
   13127 ; SKX-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13128 ; SKX-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   13129 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   13130 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   13131 ;
   13132 ; SKX-LABEL: test_punpckhdq:
   13133 ; SKX:       # %bb.0:
   13134 ; SKX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13135 ; SKX-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   13136 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13137 ; SKX-NEXT:    retq # sched: [7:1.00]
   13138 ;
   13139 ; BTVER2-SSE-LABEL: test_punpckhdq:
   13140 ; BTVER2-SSE:       # %bb.0:
   13141 ; BTVER2-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13142 ; BTVER2-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
   13143 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13144 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   13145 ;
   13146 ; BTVER2-LABEL: test_punpckhdq:
   13147 ; BTVER2:       # %bb.0:
   13148 ; BTVER2-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13149 ; BTVER2-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
   13150 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13151 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   13152 ;
   13153 ; ZNVER1-SSE-LABEL: test_punpckhdq:
   13154 ; ZNVER1-SSE:       # %bb.0:
   13155 ; ZNVER1-SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
   13156 ; ZNVER1-SSE-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
   13157 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   13158 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   13159 ;
   13160 ; ZNVER1-LABEL: test_punpckhdq:
   13161 ; ZNVER1:       # %bb.0:
   13162 ; ZNVER1-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
   13163 ; ZNVER1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
   13164 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   13165 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   13166   %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   13167   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   13168   %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   13169   %4 = add <4 x i32> %1, %3
   13170   ret <4 x i32> %4
   13171 }
   13172 
   13173 define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   13174 ; GENERIC-LABEL: test_punpckhqdq:
   13175 ; GENERIC:       # %bb.0:
   13176 ; GENERIC-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
   13177 ; GENERIC-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
   13178 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13179 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   13180 ;
   13181 ; ATOM-LABEL: test_punpckhqdq:
   13182 ; ATOM:       # %bb.0:
   13183 ; ATOM-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13184 ; ATOM-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
   13185 ; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
   13186 ; ATOM-NEXT:    retq # sched: [79:39.50]
   13187 ;
   13188 ; SLM-LABEL: test_punpckhqdq:
   13189 ; SLM:       # %bb.0:
   13190 ; SLM-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13191 ; SLM-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00]
   13192 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13193 ; SLM-NEXT:    retq # sched: [4:1.00]
   13194 ;
   13195 ; SANDY-SSE-LABEL: test_punpckhqdq:
   13196 ; SANDY-SSE:       # %bb.0:
   13197 ; SANDY-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
   13198 ; SANDY-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
   13199 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13200 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   13201 ;
   13202 ; SANDY-LABEL: test_punpckhqdq:
   13203 ; SANDY:       # %bb.0:
   13204 ; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
   13205 ; SANDY-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
   13206 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13207 ; SANDY-NEXT:    retq # sched: [1:1.00]
   13208 ;
   13209 ; HASWELL-SSE-LABEL: test_punpckhqdq:
   13210 ; HASWELL-SSE:       # %bb.0:
   13211 ; HASWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13212 ; HASWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   13213 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13214 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13215 ;
   13216 ; HASWELL-LABEL: test_punpckhqdq:
   13217 ; HASWELL:       # %bb.0:
   13218 ; HASWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13219 ; HASWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   13220 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13221 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   13222 ;
   13223 ; BROADWELL-SSE-LABEL: test_punpckhqdq:
   13224 ; BROADWELL-SSE:       # %bb.0:
   13225 ; BROADWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13226 ; BROADWELL-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
   13227 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13228 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13229 ;
   13230 ; BROADWELL-LABEL: test_punpckhqdq:
   13231 ; BROADWELL:       # %bb.0:
   13232 ; BROADWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13233 ; BROADWELL-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
   13234 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13235 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   13236 ;
   13237 ; SKYLAKE-SSE-LABEL: test_punpckhqdq:
   13238 ; SKYLAKE-SSE:       # %bb.0:
   13239 ; SKYLAKE-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13240 ; SKYLAKE-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   13241 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   13242 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   13243 ;
   13244 ; SKYLAKE-LABEL: test_punpckhqdq:
   13245 ; SKYLAKE:       # %bb.0:
   13246 ; SKYLAKE-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13247 ; SKYLAKE-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   13248 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13249 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   13250 ;
   13251 ; SKX-SSE-LABEL: test_punpckhqdq:
   13252 ; SKX-SSE:       # %bb.0:
   13253 ; SKX-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13254 ; SKX-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   13255 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   13256 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   13257 ;
   13258 ; SKX-LABEL: test_punpckhqdq:
   13259 ; SKX:       # %bb.0:
   13260 ; SKX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   13261 ; SKX-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   13262 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13263 ; SKX-NEXT:    retq # sched: [7:1.00]
   13264 ;
   13265 ; BTVER2-SSE-LABEL: test_punpckhqdq:
   13266 ; BTVER2-SSE:       # %bb.0:
   13267 ; BTVER2-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
   13268 ; BTVER2-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
   13269 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13270 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   13271 ;
   13272 ; BTVER2-LABEL: test_punpckhqdq:
   13273 ; BTVER2:       # %bb.0:
   13274 ; BTVER2-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
   13275 ; BTVER2-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
   13276 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13277 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   13278 ;
   13279 ; ZNVER1-SSE-LABEL: test_punpckhqdq:
   13280 ; ZNVER1-SSE:       # %bb.0:
   13281 ; ZNVER1-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
   13282 ; ZNVER1-SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
   13283 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   13284 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   13285 ;
   13286 ; ZNVER1-LABEL: test_punpckhqdq:
   13287 ; ZNVER1:       # %bb.0:
   13288 ; ZNVER1-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
   13289 ; ZNVER1-NEXT:    vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
   13290 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   13291 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   13292   %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
   13293   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   13294   %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 1, i32 3>
   13295   %4 = add <2 x i64> %1, %3
   13296   ret <2 x i64> %4
   13297 }
   13298 
   13299 define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   13300 ; GENERIC-LABEL: test_punpckhwd:
   13301 ; GENERIC:       # %bb.0:
   13302 ; GENERIC-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13303 ; GENERIC-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
   13304 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   13305 ;
   13306 ; ATOM-LABEL: test_punpckhwd:
   13307 ; ATOM:       # %bb.0:
   13308 ; ATOM-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13309 ; ATOM-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
   13310 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13311 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13312 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13313 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13314 ; ATOM-NEXT:    retq # sched: [79:39.50]
   13315 ;
   13316 ; SLM-LABEL: test_punpckhwd:
   13317 ; SLM:       # %bb.0:
   13318 ; SLM-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13319 ; SLM-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
   13320 ; SLM-NEXT:    retq # sched: [4:1.00]
   13321 ;
   13322 ; SANDY-SSE-LABEL: test_punpckhwd:
   13323 ; SANDY-SSE:       # %bb.0:
   13324 ; SANDY-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13325 ; SANDY-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
   13326 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   13327 ;
   13328 ; SANDY-LABEL: test_punpckhwd:
   13329 ; SANDY:       # %bb.0:
   13330 ; SANDY-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13331 ; SANDY-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
   13332 ; SANDY-NEXT:    retq # sched: [1:1.00]
   13333 ;
   13334 ; HASWELL-SSE-LABEL: test_punpckhwd:
   13335 ; HASWELL-SSE:       # %bb.0:
   13336 ; HASWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13337 ; HASWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13338 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13339 ;
   13340 ; HASWELL-LABEL: test_punpckhwd:
   13341 ; HASWELL:       # %bb.0:
   13342 ; HASWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13343 ; HASWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13344 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   13345 ;
   13346 ; BROADWELL-SSE-LABEL: test_punpckhwd:
   13347 ; BROADWELL-SSE:       # %bb.0:
   13348 ; BROADWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13349 ; BROADWELL-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
   13350 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13351 ;
   13352 ; BROADWELL-LABEL: test_punpckhwd:
   13353 ; BROADWELL:       # %bb.0:
   13354 ; BROADWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13355 ; BROADWELL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
   13356 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   13357 ;
   13358 ; SKYLAKE-SSE-LABEL: test_punpckhwd:
   13359 ; SKYLAKE-SSE:       # %bb.0:
   13360 ; SKYLAKE-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13361 ; SKYLAKE-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13362 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   13363 ;
   13364 ; SKYLAKE-LABEL: test_punpckhwd:
   13365 ; SKYLAKE:       # %bb.0:
   13366 ; SKYLAKE-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13367 ; SKYLAKE-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13368 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   13369 ;
   13370 ; SKX-SSE-LABEL: test_punpckhwd:
   13371 ; SKX-SSE:       # %bb.0:
   13372 ; SKX-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13373 ; SKX-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13374 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   13375 ;
   13376 ; SKX-LABEL: test_punpckhwd:
   13377 ; SKX:       # %bb.0:
   13378 ; SKX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13379 ; SKX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13380 ; SKX-NEXT:    retq # sched: [7:1.00]
   13381 ;
   13382 ; BTVER2-SSE-LABEL: test_punpckhwd:
   13383 ; BTVER2-SSE:       # %bb.0:
   13384 ; BTVER2-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13385 ; BTVER2-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
   13386 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   13387 ;
   13388 ; BTVER2-LABEL: test_punpckhwd:
   13389 ; BTVER2:       # %bb.0:
   13390 ; BTVER2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13391 ; BTVER2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
   13392 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   13393 ;
   13394 ; ZNVER1-SSE-LABEL: test_punpckhwd:
   13395 ; ZNVER1-SSE:       # %bb.0:
   13396 ; ZNVER1-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
   13397 ; ZNVER1-SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
   13398 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   13399 ;
   13400 ; ZNVER1-LABEL: test_punpckhwd:
   13401 ; ZNVER1:       # %bb.0:
   13402 ; ZNVER1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
   13403 ; ZNVER1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
   13404 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   13405   %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   13406   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   13407   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   13408   ret <8 x i16> %3
   13409 }
   13410 
   13411 define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
   13412 ; GENERIC-LABEL: test_punpcklbw:
   13413 ; GENERIC:       # %bb.0:
   13414 ; GENERIC-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13415 ; GENERIC-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
   13416 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   13417 ;
   13418 ; ATOM-LABEL: test_punpcklbw:
   13419 ; ATOM:       # %bb.0:
   13420 ; ATOM-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13421 ; ATOM-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
   13422 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13423 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13424 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13425 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13426 ; ATOM-NEXT:    retq # sched: [79:39.50]
   13427 ;
   13428 ; SLM-LABEL: test_punpcklbw:
   13429 ; SLM:       # %bb.0:
   13430 ; SLM-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13431 ; SLM-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
   13432 ; SLM-NEXT:    retq # sched: [4:1.00]
   13433 ;
   13434 ; SANDY-SSE-LABEL: test_punpcklbw:
   13435 ; SANDY-SSE:       # %bb.0:
   13436 ; SANDY-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13437 ; SANDY-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
   13438 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   13439 ;
   13440 ; SANDY-LABEL: test_punpcklbw:
   13441 ; SANDY:       # %bb.0:
   13442 ; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13443 ; SANDY-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
   13444 ; SANDY-NEXT:    retq # sched: [1:1.00]
   13445 ;
   13446 ; HASWELL-SSE-LABEL: test_punpcklbw:
   13447 ; HASWELL-SSE:       # %bb.0:
   13448 ; HASWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13449 ; HASWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13450 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13451 ;
   13452 ; HASWELL-LABEL: test_punpcklbw:
   13453 ; HASWELL:       # %bb.0:
   13454 ; HASWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13455 ; HASWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13456 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   13457 ;
   13458 ; BROADWELL-SSE-LABEL: test_punpcklbw:
   13459 ; BROADWELL-SSE:       # %bb.0:
   13460 ; BROADWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13461 ; BROADWELL-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
   13462 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13463 ;
   13464 ; BROADWELL-LABEL: test_punpcklbw:
   13465 ; BROADWELL:       # %bb.0:
   13466 ; BROADWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13467 ; BROADWELL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
   13468 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   13469 ;
   13470 ; SKYLAKE-SSE-LABEL: test_punpcklbw:
   13471 ; SKYLAKE-SSE:       # %bb.0:
   13472 ; SKYLAKE-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13473 ; SKYLAKE-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13474 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   13475 ;
   13476 ; SKYLAKE-LABEL: test_punpcklbw:
   13477 ; SKYLAKE:       # %bb.0:
   13478 ; SKYLAKE-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13479 ; SKYLAKE-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13480 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   13481 ;
   13482 ; SKX-SSE-LABEL: test_punpcklbw:
   13483 ; SKX-SSE:       # %bb.0:
   13484 ; SKX-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13485 ; SKX-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13486 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   13487 ;
   13488 ; SKX-LABEL: test_punpcklbw:
   13489 ; SKX:       # %bb.0:
   13490 ; SKX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
   13491 ; SKX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
   13492 ; SKX-NEXT:    retq # sched: [7:1.00]
   13493 ;
   13494 ; BTVER2-SSE-LABEL: test_punpcklbw:
   13495 ; BTVER2-SSE:       # %bb.0:
   13496 ; BTVER2-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13497 ; BTVER2-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
   13498 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   13499 ;
   13500 ; BTVER2-LABEL: test_punpcklbw:
   13501 ; BTVER2:       # %bb.0:
   13502 ; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
   13503 ; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
   13504 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   13505 ;
   13506 ; ZNVER1-SSE-LABEL: test_punpcklbw:
   13507 ; ZNVER1-SSE:       # %bb.0:
   13508 ; ZNVER1-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
   13509 ; ZNVER1-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
   13510 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   13511 ;
   13512 ; ZNVER1-LABEL: test_punpcklbw:
   13513 ; ZNVER1:       # %bb.0:
   13514 ; ZNVER1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
   13515 ; ZNVER1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
   13516 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   13517   %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   13518   %2 = load <16 x i8>, <16 x i8> *%a2, align 16
   13519   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   13520   ret <16 x i8> %3
   13521 }
   13522 
   13523 define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   13524 ; GENERIC-LABEL: test_punpckldq:
   13525 ; GENERIC:       # %bb.0:
   13526 ; GENERIC-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
   13527 ; GENERIC-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
   13528 ; GENERIC-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13529 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   13530 ;
   13531 ; ATOM-LABEL: test_punpckldq:
   13532 ; ATOM:       # %bb.0:
   13533 ; ATOM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13534 ; ATOM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00]
   13535 ; ATOM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13536 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13537 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13538 ; ATOM-NEXT:    retq # sched: [79:39.50]
   13539 ;
   13540 ; SLM-LABEL: test_punpckldq:
   13541 ; SLM:       # %bb.0:
   13542 ; SLM-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13543 ; SLM-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00]
   13544 ; SLM-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13545 ; SLM-NEXT:    retq # sched: [4:1.00]
   13546 ;
   13547 ; SANDY-SSE-LABEL: test_punpckldq:
   13548 ; SANDY-SSE:       # %bb.0:
   13549 ; SANDY-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
   13550 ; SANDY-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
   13551 ; SANDY-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13552 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   13553 ;
   13554 ; SANDY-LABEL: test_punpckldq:
   13555 ; SANDY:       # %bb.0:
   13556 ; SANDY-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
   13557 ; SANDY-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
   13558 ; SANDY-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13559 ; SANDY-NEXT:    retq # sched: [1:1.00]
   13560 ;
   13561 ; HASWELL-SSE-LABEL: test_punpckldq:
   13562 ; HASWELL-SSE:       # %bb.0:
   13563 ; HASWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13564 ; HASWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   13565 ; HASWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13566 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13567 ;
   13568 ; HASWELL-LABEL: test_punpckldq:
   13569 ; HASWELL:       # %bb.0:
   13570 ; HASWELL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13571 ; HASWELL-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   13572 ; HASWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13573 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   13574 ;
   13575 ; BROADWELL-SSE-LABEL: test_punpckldq:
   13576 ; BROADWELL-SSE:       # %bb.0:
   13577 ; BROADWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13578 ; BROADWELL-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
   13579 ; BROADWELL-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13580 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13581 ;
   13582 ; BROADWELL-LABEL: test_punpckldq:
   13583 ; BROADWELL:       # %bb.0:
   13584 ; BROADWELL-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13585 ; BROADWELL-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
   13586 ; BROADWELL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13587 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   13588 ;
   13589 ; SKYLAKE-SSE-LABEL: test_punpckldq:
   13590 ; SKYLAKE-SSE:       # %bb.0:
   13591 ; SKYLAKE-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13592 ; SKYLAKE-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   13593 ; SKYLAKE-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   13594 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   13595 ;
   13596 ; SKYLAKE-LABEL: test_punpckldq:
   13597 ; SKYLAKE:       # %bb.0:
   13598 ; SKYLAKE-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13599 ; SKYLAKE-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   13600 ; SKYLAKE-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13601 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   13602 ;
   13603 ; SKX-SSE-LABEL: test_punpckldq:
   13604 ; SKX-SSE:       # %bb.0:
   13605 ; SKX-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13606 ; SKX-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   13607 ; SKX-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.33]
   13608 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   13609 ;
   13610 ; SKX-LABEL: test_punpckldq:
   13611 ; SKX:       # %bb.0:
   13612 ; SKX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   13613 ; SKX-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   13614 ; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13615 ; SKX-NEXT:    retq # sched: [7:1.00]
   13616 ;
   13617 ; BTVER2-SSE-LABEL: test_punpckldq:
   13618 ; BTVER2-SSE:       # %bb.0:
   13619 ; BTVER2-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
   13620 ; BTVER2-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
   13621 ; BTVER2-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.50]
   13622 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   13623 ;
   13624 ; BTVER2-LABEL: test_punpckldq:
   13625 ; BTVER2:       # %bb.0:
   13626 ; BTVER2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
   13627 ; BTVER2-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
   13628 ; BTVER2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13629 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   13630 ;
   13631 ; ZNVER1-SSE-LABEL: test_punpckldq:
   13632 ; ZNVER1-SSE:       # %bb.0:
   13633 ; ZNVER1-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
   13634 ; ZNVER1-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
   13635 ; ZNVER1-SSE-NEXT:    paddd %xmm1, %xmm0 # sched: [1:0.25]
   13636 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   13637 ;
   13638 ; ZNVER1-LABEL: test_punpckldq:
   13639 ; ZNVER1:       # %bb.0:
   13640 ; ZNVER1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
   13641 ; ZNVER1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
   13642 ; ZNVER1-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   13643 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   13644   %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   13645   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   13646   %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   13647   %4 = add <4 x i32> %1, %3
   13648   ret <4 x i32> %4
   13649 }
   13650 
   13651 define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   13652 ; GENERIC-LABEL: test_punpcklqdq:
   13653 ; GENERIC:       # %bb.0:
   13654 ; GENERIC-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   13655 ; GENERIC-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
   13656 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13657 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   13658 ;
   13659 ; ATOM-LABEL: test_punpcklqdq:
   13660 ; ATOM:       # %bb.0:
   13661 ; ATOM-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13662 ; ATOM-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
   13663 ; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
   13664 ; ATOM-NEXT:    retq # sched: [79:39.50]
   13665 ;
   13666 ; SLM-LABEL: test_punpcklqdq:
   13667 ; SLM:       # %bb.0:
   13668 ; SLM-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13669 ; SLM-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
   13670 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13671 ; SLM-NEXT:    retq # sched: [4:1.00]
   13672 ;
   13673 ; SANDY-SSE-LABEL: test_punpcklqdq:
   13674 ; SANDY-SSE:       # %bb.0:
   13675 ; SANDY-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   13676 ; SANDY-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
   13677 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13678 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   13679 ;
   13680 ; SANDY-LABEL: test_punpcklqdq:
   13681 ; SANDY:       # %bb.0:
   13682 ; SANDY-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   13683 ; SANDY-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
   13684 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13685 ; SANDY-NEXT:    retq # sched: [1:1.00]
   13686 ;
   13687 ; HASWELL-SSE-LABEL: test_punpcklqdq:
   13688 ; HASWELL-SSE:       # %bb.0:
   13689 ; HASWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13690 ; HASWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   13691 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13692 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13693 ;
   13694 ; HASWELL-LABEL: test_punpcklqdq:
   13695 ; HASWELL:       # %bb.0:
   13696 ; HASWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13697 ; HASWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   13698 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13699 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   13700 ;
   13701 ; BROADWELL-SSE-LABEL: test_punpcklqdq:
   13702 ; BROADWELL-SSE:       # %bb.0:
   13703 ; BROADWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13704 ; BROADWELL-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   13705 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13706 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13707 ;
   13708 ; BROADWELL-LABEL: test_punpcklqdq:
   13709 ; BROADWELL:       # %bb.0:
   13710 ; BROADWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13711 ; BROADWELL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   13712 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13713 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   13714 ;
   13715 ; SKYLAKE-SSE-LABEL: test_punpcklqdq:
   13716 ; SKYLAKE-SSE:       # %bb.0:
   13717 ; SKYLAKE-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13718 ; SKYLAKE-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   13719 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   13720 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   13721 ;
   13722 ; SKYLAKE-LABEL: test_punpcklqdq:
   13723 ; SKYLAKE:       # %bb.0:
   13724 ; SKYLAKE-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13725 ; SKYLAKE-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   13726 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13727 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   13728 ;
   13729 ; SKX-SSE-LABEL: test_punpcklqdq:
   13730 ; SKX-SSE:       # %bb.0:
   13731 ; SKX-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13732 ; SKX-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   13733 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   13734 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   13735 ;
   13736 ; SKX-LABEL: test_punpcklqdq:
   13737 ; SKX:       # %bb.0:
   13738 ; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   13739 ; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   13740 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13741 ; SKX-NEXT:    retq # sched: [7:1.00]
   13742 ;
   13743 ; BTVER2-SSE-LABEL: test_punpcklqdq:
   13744 ; BTVER2-SSE:       # %bb.0:
   13745 ; BTVER2-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   13746 ; BTVER2-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   13747 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13748 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   13749 ;
   13750 ; BTVER2-LABEL: test_punpcklqdq:
   13751 ; BTVER2:       # %bb.0:
   13752 ; BTVER2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   13753 ; BTVER2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   13754 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13755 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   13756 ;
   13757 ; ZNVER1-SSE-LABEL: test_punpcklqdq:
   13758 ; ZNVER1-SSE:       # %bb.0:
   13759 ; ZNVER1-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
   13760 ; ZNVER1-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
   13761 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   13762 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   13763 ;
   13764 ; ZNVER1-LABEL: test_punpcklqdq:
   13765 ; ZNVER1:       # %bb.0:
   13766 ; ZNVER1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
   13767 ; ZNVER1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
   13768 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   13769 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   13770   %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
   13771   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   13772   %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 0, i32 2>
   13773   %4 = add <2 x i64> %1, %3
   13774   ret <2 x i64> %4
   13775 }
   13776 
   13777 define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
   13778 ; GENERIC-LABEL: test_punpcklwd:
   13779 ; GENERIC:       # %bb.0:
   13780 ; GENERIC-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13781 ; GENERIC-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
   13782 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   13783 ;
   13784 ; ATOM-LABEL: test_punpcklwd:
   13785 ; ATOM:       # %bb.0:
   13786 ; ATOM-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13787 ; ATOM-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
   13788 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13789 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13790 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13791 ; ATOM-NEXT:    nop # sched: [1:0.50]
   13792 ; ATOM-NEXT:    retq # sched: [79:39.50]
   13793 ;
   13794 ; SLM-LABEL: test_punpcklwd:
   13795 ; SLM:       # %bb.0:
   13796 ; SLM-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13797 ; SLM-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
   13798 ; SLM-NEXT:    retq # sched: [4:1.00]
   13799 ;
   13800 ; SANDY-SSE-LABEL: test_punpcklwd:
   13801 ; SANDY-SSE:       # %bb.0:
   13802 ; SANDY-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13803 ; SANDY-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
   13804 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   13805 ;
   13806 ; SANDY-LABEL: test_punpcklwd:
   13807 ; SANDY:       # %bb.0:
   13808 ; SANDY-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13809 ; SANDY-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
   13810 ; SANDY-NEXT:    retq # sched: [1:1.00]
   13811 ;
   13812 ; HASWELL-SSE-LABEL: test_punpcklwd:
   13813 ; HASWELL-SSE:       # %bb.0:
   13814 ; HASWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13815 ; HASWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
   13816 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13817 ;
   13818 ; HASWELL-LABEL: test_punpcklwd:
   13819 ; HASWELL:       # %bb.0:
   13820 ; HASWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13821 ; HASWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
   13822 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   13823 ;
   13824 ; BROADWELL-SSE-LABEL: test_punpcklwd:
   13825 ; BROADWELL-SSE:       # %bb.0:
   13826 ; BROADWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13827 ; BROADWELL-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
   13828 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13829 ;
   13830 ; BROADWELL-LABEL: test_punpcklwd:
   13831 ; BROADWELL:       # %bb.0:
   13832 ; BROADWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13833 ; BROADWELL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
   13834 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   13835 ;
   13836 ; SKYLAKE-SSE-LABEL: test_punpcklwd:
   13837 ; SKYLAKE-SSE:       # %bb.0:
   13838 ; SKYLAKE-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13839 ; SKYLAKE-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
   13840 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   13841 ;
   13842 ; SKYLAKE-LABEL: test_punpcklwd:
   13843 ; SKYLAKE:       # %bb.0:
   13844 ; SKYLAKE-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13845 ; SKYLAKE-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
   13846 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   13847 ;
   13848 ; SKX-SSE-LABEL: test_punpcklwd:
   13849 ; SKX-SSE:       # %bb.0:
   13850 ; SKX-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13851 ; SKX-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
   13852 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   13853 ;
   13854 ; SKX-LABEL: test_punpcklwd:
   13855 ; SKX:       # %bb.0:
   13856 ; SKX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   13857 ; SKX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
   13858 ; SKX-NEXT:    retq # sched: [7:1.00]
   13859 ;
   13860 ; BTVER2-SSE-LABEL: test_punpcklwd:
   13861 ; BTVER2-SSE:       # %bb.0:
   13862 ; BTVER2-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13863 ; BTVER2-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
   13864 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   13865 ;
   13866 ; BTVER2-LABEL: test_punpcklwd:
   13867 ; BTVER2:       # %bb.0:
   13868 ; BTVER2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   13869 ; BTVER2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
   13870 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   13871 ;
   13872 ; ZNVER1-SSE-LABEL: test_punpcklwd:
   13873 ; ZNVER1-SSE:       # %bb.0:
   13874 ; ZNVER1-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
   13875 ; ZNVER1-SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
   13876 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   13877 ;
   13878 ; ZNVER1-LABEL: test_punpcklwd:
   13879 ; ZNVER1:       # %bb.0:
   13880 ; ZNVER1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
   13881 ; ZNVER1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
   13882 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   13883   %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   13884   %2 = load <8 x i16>, <8 x i16> *%a2, align 16
   13885   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   13886   ret <8 x i16> %3
   13887 }
   13888 
   13889 define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   13890 ; GENERIC-LABEL: test_pxor:
   13891 ; GENERIC:       # %bb.0:
   13892 ; GENERIC-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
   13893 ; GENERIC-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
   13894 ; GENERIC-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13895 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   13896 ;
   13897 ; ATOM-LABEL: test_pxor:
   13898 ; ATOM:       # %bb.0:
   13899 ; ATOM-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.50]
   13900 ; ATOM-NEXT:    pxor (%rdi), %xmm0 # sched: [1:1.00]
   13901 ; ATOM-NEXT:    paddq %xmm1, %xmm0 # sched: [2:1.00]
   13902 ; ATOM-NEXT:    retq # sched: [79:39.50]
   13903 ;
   13904 ; SLM-LABEL: test_pxor:
   13905 ; SLM:       # %bb.0:
   13906 ; SLM-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.50]
   13907 ; SLM-NEXT:    pxor (%rdi), %xmm0 # sched: [4:1.00]
   13908 ; SLM-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13909 ; SLM-NEXT:    retq # sched: [4:1.00]
   13910 ;
   13911 ; SANDY-SSE-LABEL: test_pxor:
   13912 ; SANDY-SSE:       # %bb.0:
   13913 ; SANDY-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
   13914 ; SANDY-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
   13915 ; SANDY-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13916 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   13917 ;
   13918 ; SANDY-LABEL: test_pxor:
   13919 ; SANDY:       # %bb.0:
   13920 ; SANDY-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13921 ; SANDY-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   13922 ; SANDY-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13923 ; SANDY-NEXT:    retq # sched: [1:1.00]
   13924 ;
   13925 ; HASWELL-SSE-LABEL: test_pxor:
   13926 ; HASWELL-SSE:       # %bb.0:
   13927 ; HASWELL-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
   13928 ; HASWELL-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
   13929 ; HASWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13930 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13931 ;
   13932 ; HASWELL-LABEL: test_pxor:
   13933 ; HASWELL:       # %bb.0:
   13934 ; HASWELL-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13935 ; HASWELL-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   13936 ; HASWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13937 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   13938 ;
   13939 ; BROADWELL-SSE-LABEL: test_pxor:
   13940 ; BROADWELL-SSE:       # %bb.0:
   13941 ; BROADWELL-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
   13942 ; BROADWELL-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [6:0.50]
   13943 ; BROADWELL-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13944 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   13945 ;
   13946 ; BROADWELL-LABEL: test_pxor:
   13947 ; BROADWELL:       # %bb.0:
   13948 ; BROADWELL-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13949 ; BROADWELL-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
   13950 ; BROADWELL-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13951 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   13952 ;
   13953 ; SKYLAKE-SSE-LABEL: test_pxor:
   13954 ; SKYLAKE-SSE:       # %bb.0:
   13955 ; SKYLAKE-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
   13956 ; SKYLAKE-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
   13957 ; SKYLAKE-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   13958 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   13959 ;
   13960 ; SKYLAKE-LABEL: test_pxor:
   13961 ; SKYLAKE:       # %bb.0:
   13962 ; SKYLAKE-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13963 ; SKYLAKE-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   13964 ; SKYLAKE-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13965 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   13966 ;
   13967 ; SKX-SSE-LABEL: test_pxor:
   13968 ; SKX-SSE:       # %bb.0:
   13969 ; SKX-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.33]
   13970 ; SKX-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [7:0.50]
   13971 ; SKX-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.33]
   13972 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   13973 ;
   13974 ; SKX-LABEL: test_pxor:
   13975 ; SKX:       # %bb.0:
   13976 ; SKX-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13977 ; SKX-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   13978 ; SKX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   13979 ; SKX-NEXT:    retq # sched: [7:1.00]
   13980 ;
   13981 ; BTVER2-SSE-LABEL: test_pxor:
   13982 ; BTVER2-SSE:       # %bb.0:
   13983 ; BTVER2-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.50]
   13984 ; BTVER2-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [6:1.00]
   13985 ; BTVER2-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.50]
   13986 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   13987 ;
   13988 ; BTVER2-LABEL: test_pxor:
   13989 ; BTVER2:       # %bb.0:
   13990 ; BTVER2-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13991 ; BTVER2-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   13992 ; BTVER2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   13993 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   13994 ;
   13995 ; ZNVER1-SSE-LABEL: test_pxor:
   13996 ; ZNVER1-SSE:       # %bb.0:
   13997 ; ZNVER1-SSE-NEXT:    pxor %xmm1, %xmm0 # sched: [1:0.25]
   13998 ; ZNVER1-SSE-NEXT:    pxor (%rdi), %xmm0 # sched: [8:0.50]
   13999 ; ZNVER1-SSE-NEXT:    paddq %xmm1, %xmm0 # sched: [1:0.25]
   14000 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   14001 ;
   14002 ; ZNVER1-LABEL: test_pxor:
   14003 ; ZNVER1:       # %bb.0:
   14004 ; ZNVER1-NEXT:    vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   14005 ; ZNVER1-NEXT:    vpxor (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   14006 ; ZNVER1-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   14007 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   14008   %1 = xor <2 x i64> %a0, %a1
   14009   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   14010   %3 = xor <2 x i64> %1, %2
   14011   %4 = add <2 x i64> %3, %a1
   14012   ret <2 x i64> %4
   14013 }
   14014 
   14015 define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   14016 ; GENERIC-LABEL: test_shufpd:
   14017 ; GENERIC:       # %bb.0:
   14018 ; GENERIC-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14019 ; GENERIC-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
   14020 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14021 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   14022 ;
   14023 ; ATOM-LABEL: test_shufpd:
   14024 ; ATOM:       # %bb.0:
   14025 ; ATOM-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14026 ; ATOM-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00]
   14027 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
   14028 ; ATOM-NEXT:    retq # sched: [79:39.50]
   14029 ;
   14030 ; SLM-LABEL: test_shufpd:
   14031 ; SLM:       # %bb.0:
   14032 ; SLM-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14033 ; SLM-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [4:1.00]
   14034 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14035 ; SLM-NEXT:    retq # sched: [4:1.00]
   14036 ;
   14037 ; SANDY-SSE-LABEL: test_shufpd:
   14038 ; SANDY-SSE:       # %bb.0:
   14039 ; SANDY-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14040 ; SANDY-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
   14041 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14042 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   14043 ;
   14044 ; SANDY-LABEL: test_shufpd:
   14045 ; SANDY:       # %bb.0:
   14046 ; SANDY-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14047 ; SANDY-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
   14048 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14049 ; SANDY-NEXT:    retq # sched: [1:1.00]
   14050 ;
   14051 ; HASWELL-SSE-LABEL: test_shufpd:
   14052 ; HASWELL-SSE:       # %bb.0:
   14053 ; HASWELL-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14054 ; HASWELL-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
   14055 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14056 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14057 ;
   14058 ; HASWELL-LABEL: test_shufpd:
   14059 ; HASWELL:       # %bb.0:
   14060 ; HASWELL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14061 ; HASWELL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
   14062 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14063 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   14064 ;
   14065 ; BROADWELL-SSE-LABEL: test_shufpd:
   14066 ; BROADWELL-SSE:       # %bb.0:
   14067 ; BROADWELL-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14068 ; BROADWELL-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
   14069 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14070 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14071 ;
   14072 ; BROADWELL-LABEL: test_shufpd:
   14073 ; BROADWELL:       # %bb.0:
   14074 ; BROADWELL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14075 ; BROADWELL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
   14076 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14077 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   14078 ;
   14079 ; SKYLAKE-SSE-LABEL: test_shufpd:
   14080 ; SKYLAKE-SSE:       # %bb.0:
   14081 ; SKYLAKE-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14082 ; SKYLAKE-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
   14083 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   14084 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   14085 ;
   14086 ; SKYLAKE-LABEL: test_shufpd:
   14087 ; SKYLAKE:       # %bb.0:
   14088 ; SKYLAKE-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14089 ; SKYLAKE-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
   14090 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14091 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   14092 ;
   14093 ; SKX-SSE-LABEL: test_shufpd:
   14094 ; SKX-SSE:       # %bb.0:
   14095 ; SKX-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14096 ; SKX-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
   14097 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   14098 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   14099 ;
   14100 ; SKX-LABEL: test_shufpd:
   14101 ; SKX:       # %bb.0:
   14102 ; SKX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
   14103 ; SKX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
   14104 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14105 ; SKX-NEXT:    retq # sched: [7:1.00]
   14106 ;
   14107 ; BTVER2-SSE-LABEL: test_shufpd:
   14108 ; BTVER2-SSE:       # %bb.0:
   14109 ; BTVER2-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
   14110 ; BTVER2-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
   14111 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14112 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   14113 ;
   14114 ; BTVER2-LABEL: test_shufpd:
   14115 ; BTVER2:       # %bb.0:
   14116 ; BTVER2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
   14117 ; BTVER2-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
   14118 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14119 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   14120 ;
   14121 ; ZNVER1-SSE-LABEL: test_shufpd:
   14122 ; ZNVER1-SSE:       # %bb.0:
   14123 ; ZNVER1-SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
   14124 ; ZNVER1-SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50]
   14125 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14126 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   14127 ;
   14128 ; ZNVER1-LABEL: test_shufpd:
   14129 ; ZNVER1:       # %bb.0:
   14130 ; ZNVER1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
   14131 ; ZNVER1-NEXT:    vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50]
   14132 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14133 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   14134   %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
   14135   %2 = load <2 x double>, <2 x double> *%a2, align 16
   14136   %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 2>
   14137   %4 = fadd <2 x double> %1, %3
   14138   ret <2 x double> %4
   14139 }
   14140 
   14141 define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
   14142 ; GENERIC-LABEL: test_sqrtpd:
   14143 ; GENERIC:       # %bb.0:
   14144 ; GENERIC-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
   14145 ; GENERIC-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [27:21.00]
   14146 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14147 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   14148 ;
   14149 ; ATOM-LABEL: test_sqrtpd:
   14150 ; ATOM:       # %bb.0:
   14151 ; ATOM-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [125:62.50]
   14152 ; ATOM-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [125:62.50]
   14153 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
   14154 ; ATOM-NEXT:    retq # sched: [79:39.50]
   14155 ;
   14156 ; SLM-LABEL: test_sqrtpd:
   14157 ; SLM:       # %bb.0:
   14158 ; SLM-NEXT:    sqrtpd (%rdi), %xmm1 # sched: [74:70.00]
   14159 ; SLM-NEXT:    sqrtpd %xmm0, %xmm0 # sched: [71:70.00]
   14160 ; SLM-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   14161 ; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
   14162 ; SLM-NEXT:    retq # sched: [4:1.00]
   14163 ;
   14164 ; SANDY-SSE-LABEL: test_sqrtpd:
   14165 ; SANDY-SSE:       # %bb.0:
   14166 ; SANDY-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
   14167 ; SANDY-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [27:21.00]
   14168 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14169 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   14170 ;
   14171 ; SANDY-LABEL: test_sqrtpd:
   14172 ; SANDY:       # %bb.0:
   14173 ; SANDY-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [21:21.00]
   14174 ; SANDY-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [27:21.00]
   14175 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14176 ; SANDY-NEXT:    retq # sched: [1:1.00]
   14177 ;
   14178 ; HASWELL-SSE-LABEL: test_sqrtpd:
   14179 ; HASWELL-SSE:       # %bb.0:
   14180 ; HASWELL-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [16:14.00]
   14181 ; HASWELL-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [22:14.00]
   14182 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14183 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14184 ;
   14185 ; HASWELL-LABEL: test_sqrtpd:
   14186 ; HASWELL:       # %bb.0:
   14187 ; HASWELL-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [16:14.00]
   14188 ; HASWELL-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [22:14.00]
   14189 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14190 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   14191 ;
   14192 ; BROADWELL-SSE-LABEL: test_sqrtpd:
   14193 ; BROADWELL-SSE:       # %bb.0:
   14194 ; BROADWELL-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [16:14.00]
   14195 ; BROADWELL-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [21:14.00]
   14196 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14197 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14198 ;
   14199 ; BROADWELL-LABEL: test_sqrtpd:
   14200 ; BROADWELL:       # %bb.0:
   14201 ; BROADWELL-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [16:14.00]
   14202 ; BROADWELL-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [21:14.00]
   14203 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14204 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   14205 ;
   14206 ; SKYLAKE-SSE-LABEL: test_sqrtpd:
   14207 ; SKYLAKE-SSE:       # %bb.0:
   14208 ; SKYLAKE-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [18:6.00]
   14209 ; SKYLAKE-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [24:6.00]
   14210 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   14211 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   14212 ;
   14213 ; SKYLAKE-LABEL: test_sqrtpd:
   14214 ; SKYLAKE:       # %bb.0:
   14215 ; SKYLAKE-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [18:6.00]
   14216 ; SKYLAKE-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [24:6.00]
   14217 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14218 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   14219 ;
   14220 ; SKX-SSE-LABEL: test_sqrtpd:
   14221 ; SKX-SSE:       # %bb.0:
   14222 ; SKX-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [18:6.00]
   14223 ; SKX-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [24:6.00]
   14224 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   14225 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   14226 ;
   14227 ; SKX-LABEL: test_sqrtpd:
   14228 ; SKX:       # %bb.0:
   14229 ; SKX-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [18:6.00]
   14230 ; SKX-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [24:6.00]
   14231 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14232 ; SKX-NEXT:    retq # sched: [7:1.00]
   14233 ;
   14234 ; BTVER2-SSE-LABEL: test_sqrtpd:
   14235 ; BTVER2-SSE:       # %bb.0:
   14236 ; BTVER2-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [27:27.00]
   14237 ; BTVER2-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [32:27.00]
   14238 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14239 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   14240 ;
   14241 ; BTVER2-LABEL: test_sqrtpd:
   14242 ; BTVER2:       # %bb.0:
   14243 ; BTVER2-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [32:27.00]
   14244 ; BTVER2-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [27:27.00]
   14245 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14246 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   14247 ;
   14248 ; ZNVER1-SSE-LABEL: test_sqrtpd:
   14249 ; ZNVER1-SSE:       # %bb.0:
   14250 ; ZNVER1-SSE-NEXT:    sqrtpd %xmm0, %xmm1 # sched: [20:20.00]
   14251 ; ZNVER1-SSE-NEXT:    sqrtpd (%rdi), %xmm0 # sched: [27:20.00]
   14252 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14253 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   14254 ;
   14255 ; ZNVER1-LABEL: test_sqrtpd:
   14256 ; ZNVER1:       # %bb.0:
   14257 ; ZNVER1-NEXT:    vsqrtpd (%rdi), %xmm1 # sched: [27:20.00]
   14258 ; ZNVER1-NEXT:    vsqrtpd %xmm0, %xmm0 # sched: [20:20.00]
   14259 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14260 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   14261   %1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
   14262   %2 = load <2 x double>, <2 x double> *%a1, align 16
   14263   %3 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %2)
   14264   %4 = fadd <2 x double> %1, %3
   14265   ret <2 x double> %4
   14266 }
   14267 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
   14268 
   14269 ; TODO - sqrtsd_m
   14270 
   14271 define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
   14272 ; GENERIC-LABEL: test_sqrtsd:
   14273 ; GENERIC:       # %bb.0:
   14274 ; GENERIC-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [21:21.00]
   14275 ; GENERIC-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
   14276 ; GENERIC-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [21:21.00]
   14277 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14278 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   14279 ;
   14280 ; ATOM-LABEL: test_sqrtsd:
   14281 ; ATOM:       # %bb.0:
   14282 ; ATOM-NEXT:    movapd (%rdi), %xmm1 # sched: [1:1.00]
   14283 ; ATOM-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [62:31.00]
   14284 ; ATOM-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [62:31.00]
   14285 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
   14286 ; ATOM-NEXT:    retq # sched: [79:39.50]
   14287 ;
   14288 ; SLM-LABEL: test_sqrtsd:
   14289 ; SLM:       # %bb.0:
   14290 ; SLM-NEXT:    movapd (%rdi), %xmm1 # sched: [3:1.00]
   14291 ; SLM-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [35:35.00]
   14292 ; SLM-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [35:35.00]
   14293 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14294 ; SLM-NEXT:    retq # sched: [4:1.00]
   14295 ;
   14296 ; SANDY-SSE-LABEL: test_sqrtsd:
   14297 ; SANDY-SSE:       # %bb.0:
   14298 ; SANDY-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [21:21.00]
   14299 ; SANDY-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
   14300 ; SANDY-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [21:21.00]
   14301 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14302 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   14303 ;
   14304 ; SANDY-LABEL: test_sqrtsd:
   14305 ; SANDY:       # %bb.0:
   14306 ; SANDY-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
   14307 ; SANDY-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:0.50]
   14308 ; SANDY-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:21.00]
   14309 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14310 ; SANDY-NEXT:    retq # sched: [1:1.00]
   14311 ;
   14312 ; HASWELL-SSE-LABEL: test_sqrtsd:
   14313 ; HASWELL-SSE:       # %bb.0:
   14314 ; HASWELL-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [16:14.00]
   14315 ; HASWELL-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
   14316 ; HASWELL-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [16:14.00]
   14317 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14318 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14319 ;
   14320 ; HASWELL-LABEL: test_sqrtsd:
   14321 ; HASWELL:       # %bb.0:
   14322 ; HASWELL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:14.00]
   14323 ; HASWELL-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:0.50]
   14324 ; HASWELL-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:14.00]
   14325 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14326 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   14327 ;
   14328 ; BROADWELL-SSE-LABEL: test_sqrtsd:
   14329 ; BROADWELL-SSE:       # %bb.0:
   14330 ; BROADWELL-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [16:8.00]
   14331 ; BROADWELL-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [5:0.50]
   14332 ; BROADWELL-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [16:8.00]
   14333 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14334 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14335 ;
   14336 ; BROADWELL-LABEL: test_sqrtsd:
   14337 ; BROADWELL:       # %bb.0:
   14338 ; BROADWELL-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:8.00]
   14339 ; BROADWELL-NEXT:    vmovapd (%rdi), %xmm1 # sched: [5:0.50]
   14340 ; BROADWELL-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:8.00]
   14341 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14342 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   14343 ;
   14344 ; SKYLAKE-SSE-LABEL: test_sqrtsd:
   14345 ; SKYLAKE-SSE:       # %bb.0:
   14346 ; SKYLAKE-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [18:6.00]
   14347 ; SKYLAKE-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
   14348 ; SKYLAKE-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [18:6.00]
   14349 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   14350 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   14351 ;
   14352 ; SKYLAKE-LABEL: test_sqrtsd:
   14353 ; SKYLAKE:       # %bb.0:
   14354 ; SKYLAKE-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
   14355 ; SKYLAKE-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:0.50]
   14356 ; SKYLAKE-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00]
   14357 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14358 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   14359 ;
   14360 ; SKX-SSE-LABEL: test_sqrtsd:
   14361 ; SKX-SSE:       # %bb.0:
   14362 ; SKX-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [18:6.00]
   14363 ; SKX-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [6:0.50]
   14364 ; SKX-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [18:6.00]
   14365 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   14366 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   14367 ;
   14368 ; SKX-LABEL: test_sqrtsd:
   14369 ; SKX:       # %bb.0:
   14370 ; SKX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
   14371 ; SKX-NEXT:    vmovapd (%rdi), %xmm1 # sched: [6:0.50]
   14372 ; SKX-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00]
   14373 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14374 ; SKX-NEXT:    retq # sched: [7:1.00]
   14375 ;
   14376 ; BTVER2-SSE-LABEL: test_sqrtsd:
   14377 ; BTVER2-SSE:       # %bb.0:
   14378 ; BTVER2-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [5:1.00]
   14379 ; BTVER2-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [27:27.00]
   14380 ; BTVER2-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [27:27.00]
   14381 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14382 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   14383 ;
   14384 ; BTVER2-LABEL: test_sqrtsd:
   14385 ; BTVER2:       # %bb.0:
   14386 ; BTVER2-NEXT:    vmovapd (%rdi), %xmm1 # sched: [5:1.00]
   14387 ; BTVER2-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [27:27.00]
   14388 ; BTVER2-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [27:27.00]
   14389 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14390 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   14391 ;
   14392 ; ZNVER1-SSE-LABEL: test_sqrtsd:
   14393 ; ZNVER1-SSE:       # %bb.0:
   14394 ; ZNVER1-SSE-NEXT:    movapd (%rdi), %xmm1 # sched: [8:0.50]
   14395 ; ZNVER1-SSE-NEXT:    sqrtsd %xmm0, %xmm0 # sched: [20:20.00]
   14396 ; ZNVER1-SSE-NEXT:    sqrtsd %xmm1, %xmm1 # sched: [20:20.00]
   14397 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14398 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   14399 ;
   14400 ; ZNVER1-LABEL: test_sqrtsd:
   14401 ; ZNVER1:       # %bb.0:
   14402 ; ZNVER1-NEXT:    vmovapd (%rdi), %xmm1 # sched: [8:0.50]
   14403 ; ZNVER1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:20.00]
   14404 ; ZNVER1-NEXT:    vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:20.00]
   14405 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14406 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   14407   %1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
   14408   %2 = load <2 x double>, <2 x double> *%a1, align 16
   14409   %3 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
   14410   %4 = fadd <2 x double> %1, %3
   14411   ret <2 x double> %4
   14412 }
   14413 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
   14414 
   14415 define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   14416 ; GENERIC-LABEL: test_subpd:
   14417 ; GENERIC:       # %bb.0:
   14418 ; GENERIC-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
   14419 ; GENERIC-NEXT:    subpd (%rdi), %xmm0 # sched: [9:1.00]
   14420 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   14421 ;
   14422 ; ATOM-LABEL: test_subpd:
   14423 ; ATOM:       # %bb.0:
   14424 ; ATOM-NEXT:    subpd %xmm1, %xmm0 # sched: [6:3.00]
   14425 ; ATOM-NEXT:    subpd (%rdi), %xmm0 # sched: [7:3.50]
   14426 ; ATOM-NEXT:    retq # sched: [79:39.50]
   14427 ;
   14428 ; SLM-LABEL: test_subpd:
   14429 ; SLM:       # %bb.0:
   14430 ; SLM-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
   14431 ; SLM-NEXT:    subpd (%rdi), %xmm0 # sched: [6:1.00]
   14432 ; SLM-NEXT:    retq # sched: [4:1.00]
   14433 ;
   14434 ; SANDY-SSE-LABEL: test_subpd:
   14435 ; SANDY-SSE:       # %bb.0:
   14436 ; SANDY-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
   14437 ; SANDY-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [9:1.00]
   14438 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   14439 ;
   14440 ; SANDY-LABEL: test_subpd:
   14441 ; SANDY:       # %bb.0:
   14442 ; SANDY-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14443 ; SANDY-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   14444 ; SANDY-NEXT:    retq # sched: [1:1.00]
   14445 ;
   14446 ; HASWELL-SSE-LABEL: test_subpd:
   14447 ; HASWELL-SSE:       # %bb.0:
   14448 ; HASWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
   14449 ; HASWELL-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [9:1.00]
   14450 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14451 ;
   14452 ; HASWELL-LABEL: test_subpd:
   14453 ; HASWELL:       # %bb.0:
   14454 ; HASWELL-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14455 ; HASWELL-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   14456 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   14457 ;
   14458 ; BROADWELL-SSE-LABEL: test_subpd:
   14459 ; BROADWELL-SSE:       # %bb.0:
   14460 ; BROADWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
   14461 ; BROADWELL-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [8:1.00]
   14462 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14463 ;
   14464 ; BROADWELL-LABEL: test_subpd:
   14465 ; BROADWELL:       # %bb.0:
   14466 ; BROADWELL-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14467 ; BROADWELL-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   14468 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   14469 ;
   14470 ; SKYLAKE-SSE-LABEL: test_subpd:
   14471 ; SKYLAKE-SSE:       # %bb.0:
   14472 ; SKYLAKE-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
   14473 ; SKYLAKE-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:0.50]
   14474 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   14475 ;
   14476 ; SKYLAKE-LABEL: test_subpd:
   14477 ; SKYLAKE:       # %bb.0:
   14478 ; SKYLAKE-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14479 ; SKYLAKE-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   14480 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   14481 ;
   14482 ; SKX-SSE-LABEL: test_subpd:
   14483 ; SKX-SSE:       # %bb.0:
   14484 ; SKX-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
   14485 ; SKX-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:0.50]
   14486 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   14487 ;
   14488 ; SKX-LABEL: test_subpd:
   14489 ; SKX:       # %bb.0:
   14490 ; SKX-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14491 ; SKX-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   14492 ; SKX-NEXT:    retq # sched: [7:1.00]
   14493 ;
   14494 ; BTVER2-SSE-LABEL: test_subpd:
   14495 ; BTVER2-SSE:       # %bb.0:
   14496 ; BTVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
   14497 ; BTVER2-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [8:1.00]
   14498 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   14499 ;
   14500 ; BTVER2-LABEL: test_subpd:
   14501 ; BTVER2:       # %bb.0:
   14502 ; BTVER2-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14503 ; BTVER2-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   14504 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   14505 ;
   14506 ; ZNVER1-SSE-LABEL: test_subpd:
   14507 ; ZNVER1-SSE:       # %bb.0:
   14508 ; ZNVER1-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
   14509 ; ZNVER1-SSE-NEXT:    subpd (%rdi), %xmm0 # sched: [10:1.00]
   14510 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   14511 ;
   14512 ; ZNVER1-LABEL: test_subpd:
   14513 ; ZNVER1:       # %bb.0:
   14514 ; ZNVER1-NEXT:    vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14515 ; ZNVER1-NEXT:    vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   14516 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   14517   %1 = fsub <2 x double> %a0, %a1
   14518   %2 = load <2 x double>, <2 x double> *%a2, align 16
   14519   %3 = fsub <2 x double> %1, %2
   14520   ret <2 x double> %3
   14521 }
   14522 
   14523 define double @test_subsd(double %a0, double %a1, double *%a2) {
   14524 ; GENERIC-LABEL: test_subsd:
   14525 ; GENERIC:       # %bb.0:
   14526 ; GENERIC-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
   14527 ; GENERIC-NEXT:    subsd (%rdi), %xmm0 # sched: [9:1.00]
   14528 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   14529 ;
   14530 ; ATOM-LABEL: test_subsd:
   14531 ; ATOM:       # %bb.0:
   14532 ; ATOM-NEXT:    subsd %xmm1, %xmm0 # sched: [5:5.00]
   14533 ; ATOM-NEXT:    subsd (%rdi), %xmm0 # sched: [5:5.00]
   14534 ; ATOM-NEXT:    retq # sched: [79:39.50]
   14535 ;
   14536 ; SLM-LABEL: test_subsd:
   14537 ; SLM:       # %bb.0:
   14538 ; SLM-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
   14539 ; SLM-NEXT:    subsd (%rdi), %xmm0 # sched: [6:1.00]
   14540 ; SLM-NEXT:    retq # sched: [4:1.00]
   14541 ;
   14542 ; SANDY-SSE-LABEL: test_subsd:
   14543 ; SANDY-SSE:       # %bb.0:
   14544 ; SANDY-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
   14545 ; SANDY-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [9:1.00]
   14546 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   14547 ;
   14548 ; SANDY-LABEL: test_subsd:
   14549 ; SANDY:       # %bb.0:
   14550 ; SANDY-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14551 ; SANDY-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   14552 ; SANDY-NEXT:    retq # sched: [1:1.00]
   14553 ;
   14554 ; HASWELL-SSE-LABEL: test_subsd:
   14555 ; HASWELL-SSE:       # %bb.0:
   14556 ; HASWELL-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
   14557 ; HASWELL-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [8:1.00]
   14558 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14559 ;
   14560 ; HASWELL-LABEL: test_subsd:
   14561 ; HASWELL:       # %bb.0:
   14562 ; HASWELL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14563 ; HASWELL-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   14564 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   14565 ;
   14566 ; BROADWELL-SSE-LABEL: test_subsd:
   14567 ; BROADWELL-SSE:       # %bb.0:
   14568 ; BROADWELL-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
   14569 ; BROADWELL-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [8:1.00]
   14570 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14571 ;
   14572 ; BROADWELL-LABEL: test_subsd:
   14573 ; BROADWELL:       # %bb.0:
   14574 ; BROADWELL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14575 ; BROADWELL-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   14576 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   14577 ;
   14578 ; SKYLAKE-SSE-LABEL: test_subsd:
   14579 ; SKYLAKE-SSE:       # %bb.0:
   14580 ; SKYLAKE-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [4:0.50]
   14581 ; SKYLAKE-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [9:0.50]
   14582 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   14583 ;
   14584 ; SKYLAKE-LABEL: test_subsd:
   14585 ; SKYLAKE:       # %bb.0:
   14586 ; SKYLAKE-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14587 ; SKYLAKE-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   14588 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   14589 ;
   14590 ; SKX-SSE-LABEL: test_subsd:
   14591 ; SKX-SSE:       # %bb.0:
   14592 ; SKX-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [4:0.50]
   14593 ; SKX-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [9:0.50]
   14594 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   14595 ;
   14596 ; SKX-LABEL: test_subsd:
   14597 ; SKX:       # %bb.0:
   14598 ; SKX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14599 ; SKX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   14600 ; SKX-NEXT:    retq # sched: [7:1.00]
   14601 ;
   14602 ; BTVER2-SSE-LABEL: test_subsd:
   14603 ; BTVER2-SSE:       # %bb.0:
   14604 ; BTVER2-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
   14605 ; BTVER2-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [8:1.00]
   14606 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   14607 ;
   14608 ; BTVER2-LABEL: test_subsd:
   14609 ; BTVER2:       # %bb.0:
   14610 ; BTVER2-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14611 ; BTVER2-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   14612 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   14613 ;
   14614 ; ZNVER1-SSE-LABEL: test_subsd:
   14615 ; ZNVER1-SSE:       # %bb.0:
   14616 ; ZNVER1-SSE-NEXT:    subsd %xmm1, %xmm0 # sched: [3:1.00]
   14617 ; ZNVER1-SSE-NEXT:    subsd (%rdi), %xmm0 # sched: [10:1.00]
   14618 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   14619 ;
   14620 ; ZNVER1-LABEL: test_subsd:
   14621 ; ZNVER1:       # %bb.0:
   14622 ; ZNVER1-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14623 ; ZNVER1-NEXT:    vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   14624 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   14625   %1 = fsub double %a0, %a1
   14626   %2 = load double, double *%a2, align 8
   14627   %3 = fsub double %1, %2
   14628   ret double %3
   14629 }
   14630 
   14631 define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   14632 ; GENERIC-LABEL: test_ucomisd:
   14633 ; GENERIC:       # %bb.0:
   14634 ; GENERIC-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
   14635 ; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
   14636 ; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
   14637 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
   14638 ; GENERIC-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
   14639 ; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
   14640 ; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
   14641 ; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
   14642 ; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
   14643 ; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
   14644 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   14645 ;
   14646 ; ATOM-LABEL: test_ucomisd:
   14647 ; ATOM:       # %bb.0:
   14648 ; ATOM-NEXT:    ucomisd %xmm1, %xmm0 # sched: [9:4.50]
   14649 ; ATOM-NEXT:    setnp %al # sched: [1:0.50]
   14650 ; ATOM-NEXT:    sete %cl # sched: [1:0.50]
   14651 ; ATOM-NEXT:    andb %al, %cl # sched: [1:0.50]
   14652 ; ATOM-NEXT:    ucomisd (%rdi), %xmm0 # sched: [10:5.00]
   14653 ; ATOM-NEXT:    setnp %al # sched: [1:0.50]
   14654 ; ATOM-NEXT:    sete %dl # sched: [1:0.50]
   14655 ; ATOM-NEXT:    andb %al, %dl # sched: [1:0.50]
   14656 ; ATOM-NEXT:    orb %cl, %dl # sched: [1:0.50]
   14657 ; ATOM-NEXT:    movzbl %dl, %eax # sched: [1:1.00]
   14658 ; ATOM-NEXT:    retq # sched: [79:39.50]
   14659 ;
   14660 ; SLM-LABEL: test_ucomisd:
   14661 ; SLM:       # %bb.0:
   14662 ; SLM-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
   14663 ; SLM-NEXT:    setnp %al # sched: [1:0.50]
   14664 ; SLM-NEXT:    sete %cl # sched: [1:0.50]
   14665 ; SLM-NEXT:    andb %al, %cl # sched: [1:0.50]
   14666 ; SLM-NEXT:    ucomisd (%rdi), %xmm0 # sched: [6:1.00]
   14667 ; SLM-NEXT:    setnp %al # sched: [1:0.50]
   14668 ; SLM-NEXT:    sete %dl # sched: [1:0.50]
   14669 ; SLM-NEXT:    andb %al, %dl # sched: [1:0.50]
   14670 ; SLM-NEXT:    orb %cl, %dl # sched: [1:0.50]
   14671 ; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
   14672 ; SLM-NEXT:    retq # sched: [4:1.00]
   14673 ;
   14674 ; SANDY-SSE-LABEL: test_ucomisd:
   14675 ; SANDY-SSE:       # %bb.0:
   14676 ; SANDY-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
   14677 ; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14678 ; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
   14679 ; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
   14680 ; SANDY-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
   14681 ; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14682 ; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
   14683 ; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
   14684 ; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
   14685 ; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
   14686 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   14687 ;
   14688 ; SANDY-LABEL: test_ucomisd:
   14689 ; SANDY:       # %bb.0:
   14690 ; SANDY-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
   14691 ; SANDY-NEXT:    setnp %al # sched: [1:0.50]
   14692 ; SANDY-NEXT:    sete %cl # sched: [1:0.50]
   14693 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
   14694 ; SANDY-NEXT:    vucomisd (%rdi), %xmm0 # sched: [8:1.00]
   14695 ; SANDY-NEXT:    setnp %al # sched: [1:0.50]
   14696 ; SANDY-NEXT:    sete %dl # sched: [1:0.50]
   14697 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
   14698 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
   14699 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
   14700 ; SANDY-NEXT:    retq # sched: [1:1.00]
   14701 ;
   14702 ; HASWELL-SSE-LABEL: test_ucomisd:
   14703 ; HASWELL-SSE:       # %bb.0:
   14704 ; HASWELL-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
   14705 ; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14706 ; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
   14707 ; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   14708 ; HASWELL-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
   14709 ; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14710 ; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
   14711 ; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   14712 ; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14713 ; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14714 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14715 ;
   14716 ; HASWELL-LABEL: test_ucomisd:
   14717 ; HASWELL:       # %bb.0:
   14718 ; HASWELL-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
   14719 ; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
   14720 ; HASWELL-NEXT:    sete %cl # sched: [1:0.50]
   14721 ; HASWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
   14722 ; HASWELL-NEXT:    vucomisd (%rdi), %xmm0 # sched: [8:1.00]
   14723 ; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
   14724 ; HASWELL-NEXT:    sete %dl # sched: [1:0.50]
   14725 ; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
   14726 ; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14727 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14728 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   14729 ;
   14730 ; BROADWELL-SSE-LABEL: test_ucomisd:
   14731 ; BROADWELL-SSE:       # %bb.0:
   14732 ; BROADWELL-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
   14733 ; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14734 ; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
   14735 ; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   14736 ; BROADWELL-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
   14737 ; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14738 ; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
   14739 ; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   14740 ; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14741 ; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14742 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14743 ;
   14744 ; BROADWELL-LABEL: test_ucomisd:
   14745 ; BROADWELL:       # %bb.0:
   14746 ; BROADWELL-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
   14747 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
   14748 ; BROADWELL-NEXT:    sete %cl # sched: [1:0.50]
   14749 ; BROADWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
   14750 ; BROADWELL-NEXT:    vucomisd (%rdi), %xmm0 # sched: [8:1.00]
   14751 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
   14752 ; BROADWELL-NEXT:    sete %dl # sched: [1:0.50]
   14753 ; BROADWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
   14754 ; BROADWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14755 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14756 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   14757 ;
   14758 ; SKYLAKE-SSE-LABEL: test_ucomisd:
   14759 ; SKYLAKE-SSE:       # %bb.0:
   14760 ; SKYLAKE-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
   14761 ; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14762 ; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
   14763 ; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   14764 ; SKYLAKE-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [7:1.00]
   14765 ; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14766 ; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
   14767 ; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   14768 ; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14769 ; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14770 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   14771 ;
   14772 ; SKYLAKE-LABEL: test_ucomisd:
   14773 ; SKYLAKE:       # %bb.0:
   14774 ; SKYLAKE-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
   14775 ; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
   14776 ; SKYLAKE-NEXT:    sete %cl # sched: [1:0.50]
   14777 ; SKYLAKE-NEXT:    andb %al, %cl # sched: [1:0.25]
   14778 ; SKYLAKE-NEXT:    vucomisd (%rdi), %xmm0 # sched: [7:1.00]
   14779 ; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
   14780 ; SKYLAKE-NEXT:    sete %dl # sched: [1:0.50]
   14781 ; SKYLAKE-NEXT:    andb %al, %dl # sched: [1:0.25]
   14782 ; SKYLAKE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14783 ; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14784 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   14785 ;
   14786 ; SKX-SSE-LABEL: test_ucomisd:
   14787 ; SKX-SSE:       # %bb.0:
   14788 ; SKX-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [2:1.00]
   14789 ; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14790 ; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
   14791 ; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   14792 ; SKX-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [7:1.00]
   14793 ; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14794 ; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
   14795 ; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   14796 ; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14797 ; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14798 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   14799 ;
   14800 ; SKX-LABEL: test_ucomisd:
   14801 ; SKX:       # %bb.0:
   14802 ; SKX-NEXT:    vucomisd %xmm1, %xmm0 # sched: [2:1.00]
   14803 ; SKX-NEXT:    setnp %al # sched: [1:0.50]
   14804 ; SKX-NEXT:    sete %cl # sched: [1:0.50]
   14805 ; SKX-NEXT:    andb %al, %cl # sched: [1:0.25]
   14806 ; SKX-NEXT:    vucomisd (%rdi), %xmm0 # sched: [7:1.00]
   14807 ; SKX-NEXT:    setnp %al # sched: [1:0.50]
   14808 ; SKX-NEXT:    sete %dl # sched: [1:0.50]
   14809 ; SKX-NEXT:    andb %al, %dl # sched: [1:0.25]
   14810 ; SKX-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14811 ; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14812 ; SKX-NEXT:    retq # sched: [7:1.00]
   14813 ;
   14814 ; BTVER2-SSE-LABEL: test_ucomisd:
   14815 ; BTVER2-SSE:       # %bb.0:
   14816 ; BTVER2-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
   14817 ; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14818 ; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
   14819 ; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
   14820 ; BTVER2-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [8:1.00]
   14821 ; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
   14822 ; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
   14823 ; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
   14824 ; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
   14825 ; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
   14826 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   14827 ;
   14828 ; BTVER2-LABEL: test_ucomisd:
   14829 ; BTVER2:       # %bb.0:
   14830 ; BTVER2-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
   14831 ; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
   14832 ; BTVER2-NEXT:    sete %cl # sched: [1:0.50]
   14833 ; BTVER2-NEXT:    andb %al, %cl # sched: [1:0.50]
   14834 ; BTVER2-NEXT:    vucomisd (%rdi), %xmm0 # sched: [8:1.00]
   14835 ; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
   14836 ; BTVER2-NEXT:    sete %dl # sched: [1:0.50]
   14837 ; BTVER2-NEXT:    andb %al, %dl # sched: [1:0.50]
   14838 ; BTVER2-NEXT:    orb %cl, %dl # sched: [1:0.50]
   14839 ; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
   14840 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   14841 ;
   14842 ; ZNVER1-SSE-LABEL: test_ucomisd:
   14843 ; ZNVER1-SSE:       # %bb.0:
   14844 ; ZNVER1-SSE-NEXT:    ucomisd %xmm1, %xmm0 # sched: [3:1.00]
   14845 ; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
   14846 ; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
   14847 ; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   14848 ; ZNVER1-SSE-NEXT:    ucomisd (%rdi), %xmm0 # sched: [10:1.00]
   14849 ; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
   14850 ; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
   14851 ; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   14852 ; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14853 ; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14854 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   14855 ;
   14856 ; ZNVER1-LABEL: test_ucomisd:
   14857 ; ZNVER1:       # %bb.0:
   14858 ; ZNVER1-NEXT:    vucomisd %xmm1, %xmm0 # sched: [3:1.00]
   14859 ; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
   14860 ; ZNVER1-NEXT:    sete %cl # sched: [1:0.25]
   14861 ; ZNVER1-NEXT:    andb %al, %cl # sched: [1:0.25]
   14862 ; ZNVER1-NEXT:    vucomisd (%rdi), %xmm0 # sched: [10:1.00]
   14863 ; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
   14864 ; ZNVER1-NEXT:    sete %dl # sched: [1:0.25]
   14865 ; ZNVER1-NEXT:    andb %al, %dl # sched: [1:0.25]
   14866 ; ZNVER1-NEXT:    orb %cl, %dl # sched: [1:0.25]
   14867 ; ZNVER1-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   14868 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   14869   %1 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
   14870   %2 = load <2 x double>, <2 x double> *%a2, align 8
   14871   %3 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %2)
   14872   %4 = or i32 %1, %3
   14873   ret i32 %4
   14874 }
   14875 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
   14876 
   14877 define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   14878 ; GENERIC-LABEL: test_unpckhpd:
   14879 ; GENERIC:       # %bb.0:
   14880 ; GENERIC-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14881 ; GENERIC-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   14882 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14883 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   14884 ;
   14885 ; ATOM-LABEL: test_unpckhpd:
   14886 ; ATOM:       # %bb.0:
   14887 ; ATOM-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14888 ; ATOM-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
   14889 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
   14890 ; ATOM-NEXT:    retq # sched: [79:39.50]
   14891 ;
   14892 ; SLM-LABEL: test_unpckhpd:
   14893 ; SLM:       # %bb.0:
   14894 ; SLM-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14895 ; SLM-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00]
   14896 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14897 ; SLM-NEXT:    retq # sched: [4:1.00]
   14898 ;
   14899 ; SANDY-SSE-LABEL: test_unpckhpd:
   14900 ; SANDY-SSE:       # %bb.0:
   14901 ; SANDY-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14902 ; SANDY-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   14903 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14904 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   14905 ;
   14906 ; SANDY-LABEL: test_unpckhpd:
   14907 ; SANDY:       # %bb.0:
   14908 ; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14909 ; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   14910 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14911 ; SANDY-NEXT:    retq # sched: [1:1.00]
   14912 ;
   14913 ; HASWELL-SSE-LABEL: test_unpckhpd:
   14914 ; HASWELL-SSE:       # %bb.0:
   14915 ; HASWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14916 ; HASWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   14917 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14918 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14919 ;
   14920 ; HASWELL-LABEL: test_unpckhpd:
   14921 ; HASWELL:       # %bb.0:
   14922 ; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14923 ; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   14924 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14925 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   14926 ;
   14927 ; BROADWELL-SSE-LABEL: test_unpckhpd:
   14928 ; BROADWELL-SSE:       # %bb.0:
   14929 ; BROADWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14930 ; BROADWELL-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
   14931 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14932 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   14933 ;
   14934 ; BROADWELL-LABEL: test_unpckhpd:
   14935 ; BROADWELL:       # %bb.0:
   14936 ; BROADWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14937 ; BROADWELL-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
   14938 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14939 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   14940 ;
   14941 ; SKYLAKE-SSE-LABEL: test_unpckhpd:
   14942 ; SKYLAKE-SSE:       # %bb.0:
   14943 ; SKYLAKE-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14944 ; SKYLAKE-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   14945 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   14946 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   14947 ;
   14948 ; SKYLAKE-LABEL: test_unpckhpd:
   14949 ; SKYLAKE:       # %bb.0:
   14950 ; SKYLAKE-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14951 ; SKYLAKE-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   14952 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14953 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   14954 ;
   14955 ; SKX-SSE-LABEL: test_unpckhpd:
   14956 ; SKX-SSE:       # %bb.0:
   14957 ; SKX-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14958 ; SKX-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   14959 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   14960 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   14961 ;
   14962 ; SKX-LABEL: test_unpckhpd:
   14963 ; SKX:       # %bb.0:
   14964 ; SKX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
   14965 ; SKX-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
   14966 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   14967 ; SKX-NEXT:    retq # sched: [7:1.00]
   14968 ;
   14969 ; BTVER2-SSE-LABEL: test_unpckhpd:
   14970 ; BTVER2-SSE:       # %bb.0:
   14971 ; BTVER2-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
   14972 ; BTVER2-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
   14973 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14974 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   14975 ;
   14976 ; BTVER2-LABEL: test_unpckhpd:
   14977 ; BTVER2:       # %bb.0:
   14978 ; BTVER2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
   14979 ; BTVER2-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
   14980 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14981 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   14982 ;
   14983 ; ZNVER1-SSE-LABEL: test_unpckhpd:
   14984 ; ZNVER1-SSE:       # %bb.0:
   14985 ; ZNVER1-SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
   14986 ; ZNVER1-SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
   14987 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   14988 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   14989 ;
   14990 ; ZNVER1-LABEL: test_unpckhpd:
   14991 ; ZNVER1:       # %bb.0:
   14992 ; ZNVER1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
   14993 ; ZNVER1-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
   14994 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   14995 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   14996   %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
   14997   %2 = load <2 x double>, <2 x double> *%a2, align 16
   14998   %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 3>
   14999   %4 = fadd <2 x double> %1, %3
   15000   ret <2 x double> %4
   15001 }
   15002 
   15003 define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   15004 ; GENERIC-LABEL: test_unpcklpd:
   15005 ; GENERIC:       # %bb.0:
   15006 ; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15007 ; GENERIC-NEXT:    movapd %xmm0, %xmm1 # sched: [1:1.00]
   15008 ; GENERIC-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   15009 ; GENERIC-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   15010 ; GENERIC-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
   15011 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   15012 ;
   15013 ; ATOM-LABEL: test_unpcklpd:
   15014 ; ATOM:       # %bb.0:
   15015 ; ATOM-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15016 ; ATOM-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.50]
   15017 ; ATOM-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
   15018 ; ATOM-NEXT:    addpd %xmm0, %xmm1 # sched: [6:3.00]
   15019 ; ATOM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
   15020 ; ATOM-NEXT:    retq # sched: [79:39.50]
   15021 ;
   15022 ; SLM-LABEL: test_unpcklpd:
   15023 ; SLM:       # %bb.0:
   15024 ; SLM-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15025 ; SLM-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.50]
   15026 ; SLM-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
   15027 ; SLM-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   15028 ; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
   15029 ; SLM-NEXT:    retq # sched: [4:1.00]
   15030 ;
   15031 ; SANDY-SSE-LABEL: test_unpcklpd:
   15032 ; SANDY-SSE:       # %bb.0:
   15033 ; SANDY-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15034 ; SANDY-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:1.00]
   15035 ; SANDY-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   15036 ; SANDY-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   15037 ; SANDY-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
   15038 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   15039 ;
   15040 ; SANDY-LABEL: test_unpcklpd:
   15041 ; SANDY:       # %bb.0:
   15042 ; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15043 ; SANDY-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00]
   15044 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   15045 ; SANDY-NEXT:    retq # sched: [1:1.00]
   15046 ;
   15047 ; HASWELL-SSE-LABEL: test_unpcklpd:
   15048 ; HASWELL-SSE:       # %bb.0:
   15049 ; HASWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15050 ; HASWELL-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:1.00]
   15051 ; HASWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   15052 ; HASWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   15053 ; HASWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
   15054 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   15055 ;
   15056 ; HASWELL-LABEL: test_unpcklpd:
   15057 ; HASWELL:       # %bb.0:
   15058 ; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15059 ; HASWELL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00]
   15060 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   15061 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   15062 ;
   15063 ; BROADWELL-SSE-LABEL: test_unpcklpd:
   15064 ; BROADWELL-SSE:       # %bb.0:
   15065 ; BROADWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15066 ; BROADWELL-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:1.00]
   15067 ; BROADWELL-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   15068 ; BROADWELL-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   15069 ; BROADWELL-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:1.00]
   15070 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   15071 ;
   15072 ; BROADWELL-LABEL: test_unpcklpd:
   15073 ; BROADWELL:       # %bb.0:
   15074 ; BROADWELL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15075 ; BROADWELL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [6:1.00]
   15076 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   15077 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   15078 ;
   15079 ; SKYLAKE-SSE-LABEL: test_unpcklpd:
   15080 ; SKYLAKE-SSE:       # %bb.0:
   15081 ; SKYLAKE-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15082 ; SKYLAKE-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.33]
   15083 ; SKYLAKE-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   15084 ; SKYLAKE-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.50]
   15085 ; SKYLAKE-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.33]
   15086 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   15087 ;
   15088 ; SKYLAKE-LABEL: test_unpcklpd:
   15089 ; SKYLAKE:       # %bb.0:
   15090 ; SKYLAKE-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15091 ; SKYLAKE-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00]
   15092 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   15093 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   15094 ;
   15095 ; SKX-SSE-LABEL: test_unpcklpd:
   15096 ; SKX-SSE:       # %bb.0:
   15097 ; SKX-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15098 ; SKX-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.33]
   15099 ; SKX-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   15100 ; SKX-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [4:0.50]
   15101 ; SKX-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.33]
   15102 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   15103 ;
   15104 ; SKX-LABEL: test_unpcklpd:
   15105 ; SKX:       # %bb.0:
   15106 ; SKX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   15107 ; SKX-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00]
   15108 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   15109 ; SKX-NEXT:    retq # sched: [7:1.00]
   15110 ;
   15111 ; BTVER2-SSE-LABEL: test_unpcklpd:
   15112 ; BTVER2-SSE:       # %bb.0:
   15113 ; BTVER2-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   15114 ; BTVER2-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.50]
   15115 ; BTVER2-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   15116 ; BTVER2-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   15117 ; BTVER2-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
   15118 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   15119 ;
   15120 ; BTVER2-LABEL: test_unpcklpd:
   15121 ; BTVER2:       # %bb.0:
   15122 ; BTVER2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   15123 ; BTVER2-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [6:1.00]
   15124 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   15125 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   15126 ;
   15127 ; ZNVER1-SSE-LABEL: test_unpcklpd:
   15128 ; ZNVER1-SSE:       # %bb.0:
   15129 ; ZNVER1-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   15130 ; ZNVER1-SSE-NEXT:    movapd %xmm0, %xmm1 # sched: [1:0.25]
   15131 ; ZNVER1-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
   15132 ; ZNVER1-SSE-NEXT:    addpd %xmm0, %xmm1 # sched: [3:1.00]
   15133 ; ZNVER1-SSE-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.25]
   15134 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   15135 ;
   15136 ; ZNVER1-LABEL: test_unpcklpd:
   15137 ; ZNVER1:       # %bb.0:
   15138 ; ZNVER1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   15139 ; ZNVER1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [8:0.50]
   15140 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   15141 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   15142   %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
   15143   %2 = load <2 x double>, <2 x double> *%a2, align 16
   15144   %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 0, i32 2>
   15145   %4 = fadd <2 x double> %1, %3
   15146   ret <2 x double> %4
   15147 }
   15148 
   15149 define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   15150 ; GENERIC-LABEL: test_xorpd:
   15151 ; GENERIC:       # %bb.0:
   15152 ; GENERIC-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
   15153 ; GENERIC-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:1.00]
   15154 ; GENERIC-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   15155 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   15156 ;
   15157 ; ATOM-LABEL: test_xorpd:
   15158 ; ATOM:       # %bb.0:
   15159 ; ATOM-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.50]
   15160 ; ATOM-NEXT:    xorpd (%rdi), %xmm0 # sched: [1:1.00]
   15161 ; ATOM-NEXT:    addpd %xmm1, %xmm0 # sched: [6:3.00]
   15162 ; ATOM-NEXT:    retq # sched: [79:39.50]
   15163 ;
   15164 ; SLM-LABEL: test_xorpd:
   15165 ; SLM:       # %bb.0:
   15166 ; SLM-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.50]
   15167 ; SLM-NEXT:    xorpd (%rdi), %xmm0 # sched: [4:1.00]
   15168 ; SLM-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   15169 ; SLM-NEXT:    retq # sched: [4:1.00]
   15170 ;
   15171 ; SANDY-SSE-LABEL: test_xorpd:
   15172 ; SANDY-SSE:       # %bb.0:
   15173 ; SANDY-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
   15174 ; SANDY-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:1.00]
   15175 ; SANDY-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   15176 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   15177 ;
   15178 ; SANDY-LABEL: test_xorpd:
   15179 ; SANDY:       # %bb.0:
   15180 ; SANDY-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   15181 ; SANDY-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   15182 ; SANDY-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   15183 ; SANDY-NEXT:    retq # sched: [1:1.00]
   15184 ;
   15185 ; HASWELL-SSE-LABEL: test_xorpd:
   15186 ; HASWELL-SSE:       # %bb.0:
   15187 ; HASWELL-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
   15188 ; HASWELL-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:1.00]
   15189 ; HASWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   15190 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   15191 ;
   15192 ; HASWELL-LABEL: test_xorpd:
   15193 ; HASWELL:       # %bb.0:
   15194 ; HASWELL-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   15195 ; HASWELL-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   15196 ; HASWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   15197 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   15198 ;
   15199 ; BROADWELL-SSE-LABEL: test_xorpd:
   15200 ; BROADWELL-SSE:       # %bb.0:
   15201 ; BROADWELL-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:1.00]
   15202 ; BROADWELL-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [6:1.00]
   15203 ; BROADWELL-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   15204 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   15205 ;
   15206 ; BROADWELL-LABEL: test_xorpd:
   15207 ; BROADWELL:       # %bb.0:
   15208 ; BROADWELL-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   15209 ; BROADWELL-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   15210 ; BROADWELL-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   15211 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   15212 ;
   15213 ; SKYLAKE-SSE-LABEL: test_xorpd:
   15214 ; SKYLAKE-SSE:       # %bb.0:
   15215 ; SKYLAKE-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.33]
   15216 ; SKYLAKE-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:0.50]
   15217 ; SKYLAKE-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   15218 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   15219 ;
   15220 ; SKYLAKE-LABEL: test_xorpd:
   15221 ; SKYLAKE:       # %bb.0:
   15222 ; SKYLAKE-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   15223 ; SKYLAKE-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   15224 ; SKYLAKE-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   15225 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   15226 ;
   15227 ; SKX-SSE-LABEL: test_xorpd:
   15228 ; SKX-SSE:       # %bb.0:
   15229 ; SKX-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.33]
   15230 ; SKX-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [7:0.50]
   15231 ; SKX-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [4:0.50]
   15232 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   15233 ;
   15234 ; SKX-LABEL: test_xorpd:
   15235 ; SKX:       # %bb.0:
   15236 ; SKX-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   15237 ; SKX-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   15238 ; SKX-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   15239 ; SKX-NEXT:    retq # sched: [7:1.00]
   15240 ;
   15241 ; BTVER2-SSE-LABEL: test_xorpd:
   15242 ; BTVER2-SSE:       # %bb.0:
   15243 ; BTVER2-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.50]
   15244 ; BTVER2-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [6:1.00]
   15245 ; BTVER2-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   15246 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   15247 ;
   15248 ; BTVER2-LABEL: test_xorpd:
   15249 ; BTVER2:       # %bb.0:
   15250 ; BTVER2-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   15251 ; BTVER2-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   15252 ; BTVER2-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   15253 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   15254 ;
   15255 ; ZNVER1-SSE-LABEL: test_xorpd:
   15256 ; ZNVER1-SSE:       # %bb.0:
   15257 ; ZNVER1-SSE-NEXT:    xorpd %xmm1, %xmm0 # sched: [1:0.25]
   15258 ; ZNVER1-SSE-NEXT:    xorpd (%rdi), %xmm0 # sched: [8:0.50]
   15259 ; ZNVER1-SSE-NEXT:    addpd %xmm1, %xmm0 # sched: [3:1.00]
   15260 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   15261 ;
   15262 ; ZNVER1-LABEL: test_xorpd:
   15263 ; ZNVER1:       # %bb.0:
   15264 ; ZNVER1-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   15265 ; ZNVER1-NEXT:    vxorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   15266 ; ZNVER1-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   15267 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   15268   %1 = bitcast <2 x double> %a0 to <4 x i32>
   15269   %2 = bitcast <2 x double> %a1 to <4 x i32>
   15270   %3 = xor <4 x i32> %1, %2
   15271   %4 = load <2 x double>, <2 x double> *%a2, align 16
   15272   %5 = bitcast <2 x double> %4 to <4 x i32>
   15273   %6 = xor <4 x i32> %3, %5
   15274   %7 = bitcast <4 x i32> %6 to <2 x double>
   15275   %8 = fadd <2 x double> %a1, %7
   15276   ret <2 x double> %8
   15277 }
   15278 
   15279 !0 = !{i32 1}
   15280