Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SANDY
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
     10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,HASWELL
     11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
     12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BROADWELL
     13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
     14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKYLAKE
     15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
     16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,SKX
     17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
     18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,BTVER2
     19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
     20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2  | FileCheck %s --check-prefixes=CHECK,ZNVER1
     21 
     22 define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
     23 ; GENERIC-LABEL: test_addsubpd:
     24 ; GENERIC:       # %bb.0:
     25 ; GENERIC-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
     26 ; GENERIC-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
     27 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     28 ;
     29 ; ATOM-LABEL: test_addsubpd:
     30 ; ATOM:       # %bb.0:
     31 ; ATOM-NEXT:    addsubpd %xmm1, %xmm0 # sched: [6:3.00]
     32 ; ATOM-NEXT:    addsubpd (%rdi), %xmm0 # sched: [7:3.50]
     33 ; ATOM-NEXT:    retq # sched: [79:39.50]
     34 ;
     35 ; SLM-LABEL: test_addsubpd:
     36 ; SLM:       # %bb.0:
     37 ; SLM-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
     38 ; SLM-NEXT:    addsubpd (%rdi), %xmm0 # sched: [6:1.00]
     39 ; SLM-NEXT:    retq # sched: [4:1.00]
     40 ;
     41 ; SANDY-SSE-LABEL: test_addsubpd:
     42 ; SANDY-SSE:       # %bb.0:
     43 ; SANDY-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
     44 ; SANDY-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
     45 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
     46 ;
     47 ; SANDY-LABEL: test_addsubpd:
     48 ; SANDY:       # %bb.0:
     49 ; SANDY-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
     50 ; SANDY-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
     51 ; SANDY-NEXT:    retq # sched: [1:1.00]
     52 ;
     53 ; HASWELL-SSE-LABEL: test_addsubpd:
     54 ; HASWELL-SSE:       # %bb.0:
     55 ; HASWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
     56 ; HASWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [9:1.00]
     57 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
     58 ;
     59 ; HASWELL-LABEL: test_addsubpd:
     60 ; HASWELL:       # %bb.0:
     61 ; HASWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
     62 ; HASWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
     63 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     64 ;
     65 ; BROADWELL-SSE-LABEL: test_addsubpd:
     66 ; BROADWELL-SSE:       # %bb.0:
     67 ; BROADWELL-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
     68 ; BROADWELL-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
     69 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
     70 ;
     71 ; BROADWELL-LABEL: test_addsubpd:
     72 ; BROADWELL:       # %bb.0:
     73 ; BROADWELL-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
     74 ; BROADWELL-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
     75 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     76 ;
     77 ; SKYLAKE-SSE-LABEL: test_addsubpd:
     78 ; SKYLAKE-SSE:       # %bb.0:
     79 ; SKYLAKE-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.50]
     80 ; SKYLAKE-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
     81 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
     82 ;
     83 ; SKYLAKE-LABEL: test_addsubpd:
     84 ; SKYLAKE:       # %bb.0:
     85 ; SKYLAKE-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
     86 ; SKYLAKE-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
     87 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     88 ;
     89 ; SKX-SSE-LABEL: test_addsubpd:
     90 ; SKX-SSE:       # %bb.0:
     91 ; SKX-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [4:0.50]
     92 ; SKX-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:0.50]
     93 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
     94 ;
     95 ; SKX-LABEL: test_addsubpd:
     96 ; SKX:       # %bb.0:
     97 ; SKX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
     98 ; SKX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
     99 ; SKX-NEXT:    retq # sched: [7:1.00]
    100 ;
    101 ; BTVER2-SSE-LABEL: test_addsubpd:
    102 ; BTVER2-SSE:       # %bb.0:
    103 ; BTVER2-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
    104 ; BTVER2-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [8:1.00]
    105 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    106 ;
    107 ; BTVER2-LABEL: test_addsubpd:
    108 ; BTVER2:       # %bb.0:
    109 ; BTVER2-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    110 ; BTVER2-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    111 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    112 ;
    113 ; ZNVER1-SSE-LABEL: test_addsubpd:
    114 ; ZNVER1-SSE:       # %bb.0:
    115 ; ZNVER1-SSE-NEXT:    addsubpd %xmm1, %xmm0 # sched: [3:1.00]
    116 ; ZNVER1-SSE-NEXT:    addsubpd (%rdi), %xmm0 # sched: [10:1.00]
    117 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    118 ;
    119 ; ZNVER1-LABEL: test_addsubpd:
    120 ; ZNVER1:       # %bb.0:
    121 ; ZNVER1-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    122 ; ZNVER1-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    123 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    124   %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
    125   %2 = load <2 x double>, <2 x double> *%a2, align 16
    126   %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
    127   ret <2 x double> %3
    128 }
    129 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
    130 
    131 define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
    132 ; GENERIC-LABEL: test_addsubps:
    133 ; GENERIC:       # %bb.0:
    134 ; GENERIC-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
    135 ; GENERIC-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
    136 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    137 ;
    138 ; ATOM-LABEL: test_addsubps:
    139 ; ATOM:       # %bb.0:
    140 ; ATOM-NEXT:    addsubps %xmm1, %xmm0 # sched: [5:5.00]
    141 ; ATOM-NEXT:    addsubps (%rdi), %xmm0 # sched: [5:5.00]
    142 ; ATOM-NEXT:    retq # sched: [79:39.50]
    143 ;
    144 ; SLM-LABEL: test_addsubps:
    145 ; SLM:       # %bb.0:
    146 ; SLM-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
    147 ; SLM-NEXT:    addsubps (%rdi), %xmm0 # sched: [6:1.00]
    148 ; SLM-NEXT:    retq # sched: [4:1.00]
    149 ;
    150 ; SANDY-SSE-LABEL: test_addsubps:
    151 ; SANDY-SSE:       # %bb.0:
    152 ; SANDY-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
    153 ; SANDY-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
    154 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    155 ;
    156 ; SANDY-LABEL: test_addsubps:
    157 ; SANDY:       # %bb.0:
    158 ; SANDY-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    159 ; SANDY-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    160 ; SANDY-NEXT:    retq # sched: [1:1.00]
    161 ;
    162 ; HASWELL-SSE-LABEL: test_addsubps:
    163 ; HASWELL-SSE:       # %bb.0:
    164 ; HASWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
    165 ; HASWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [9:1.00]
    166 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    167 ;
    168 ; HASWELL-LABEL: test_addsubps:
    169 ; HASWELL:       # %bb.0:
    170 ; HASWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    171 ; HASWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    172 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    173 ;
    174 ; BROADWELL-SSE-LABEL: test_addsubps:
    175 ; BROADWELL-SSE:       # %bb.0:
    176 ; BROADWELL-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
    177 ; BROADWELL-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
    178 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    179 ;
    180 ; BROADWELL-LABEL: test_addsubps:
    181 ; BROADWELL:       # %bb.0:
    182 ; BROADWELL-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    183 ; BROADWELL-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    184 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    185 ;
    186 ; SKYLAKE-SSE-LABEL: test_addsubps:
    187 ; SKYLAKE-SSE:       # %bb.0:
    188 ; SKYLAKE-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.50]
    189 ; SKYLAKE-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
    190 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    191 ;
    192 ; SKYLAKE-LABEL: test_addsubps:
    193 ; SKYLAKE:       # %bb.0:
    194 ; SKYLAKE-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    195 ; SKYLAKE-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    196 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    197 ;
    198 ; SKX-SSE-LABEL: test_addsubps:
    199 ; SKX-SSE:       # %bb.0:
    200 ; SKX-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [4:0.50]
    201 ; SKX-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:0.50]
    202 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    203 ;
    204 ; SKX-LABEL: test_addsubps:
    205 ; SKX:       # %bb.0:
    206 ; SKX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    207 ; SKX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    208 ; SKX-NEXT:    retq # sched: [7:1.00]
    209 ;
    210 ; BTVER2-SSE-LABEL: test_addsubps:
    211 ; BTVER2-SSE:       # %bb.0:
    212 ; BTVER2-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
    213 ; BTVER2-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [8:1.00]
    214 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    215 ;
    216 ; BTVER2-LABEL: test_addsubps:
    217 ; BTVER2:       # %bb.0:
    218 ; BTVER2-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    219 ; BTVER2-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    220 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    221 ;
    222 ; ZNVER1-SSE-LABEL: test_addsubps:
    223 ; ZNVER1-SSE:       # %bb.0:
    224 ; ZNVER1-SSE-NEXT:    addsubps %xmm1, %xmm0 # sched: [3:1.00]
    225 ; ZNVER1-SSE-NEXT:    addsubps (%rdi), %xmm0 # sched: [10:1.00]
    226 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    227 ;
    228 ; ZNVER1-LABEL: test_addsubps:
    229 ; ZNVER1:       # %bb.0:
    230 ; ZNVER1-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    231 ; ZNVER1-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    232 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    233   %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
    234   %2 = load <4 x float>, <4 x float> *%a2, align 16
    235   %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
    236   ret <4 x float> %3
    237 }
    238 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
    239 
    240 define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
    241 ; GENERIC-LABEL: test_haddpd:
    242 ; GENERIC:       # %bb.0:
    243 ; GENERIC-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
    244 ; GENERIC-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
    245 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    246 ;
    247 ; ATOM-LABEL: test_haddpd:
    248 ; ATOM:       # %bb.0:
    249 ; ATOM-NEXT:    haddpd %xmm1, %xmm0 # sched: [8:4.00]
    250 ; ATOM-NEXT:    haddpd (%rdi), %xmm0 # sched: [9:4.50]
    251 ; ATOM-NEXT:    retq # sched: [79:39.50]
    252 ;
    253 ; SLM-LABEL: test_haddpd:
    254 ; SLM:       # %bb.0:
    255 ; SLM-NEXT:    haddpd %xmm1, %xmm0 # sched: [3:1.00]
    256 ; SLM-NEXT:    haddpd (%rdi), %xmm0 # sched: [6:1.00]
    257 ; SLM-NEXT:    retq # sched: [4:1.00]
    258 ;
    259 ; SANDY-SSE-LABEL: test_haddpd:
    260 ; SANDY-SSE:       # %bb.0:
    261 ; SANDY-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
    262 ; SANDY-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
    263 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    264 ;
    265 ; SANDY-LABEL: test_haddpd:
    266 ; SANDY:       # %bb.0:
    267 ; SANDY-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    268 ; SANDY-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
    269 ; SANDY-NEXT:    retq # sched: [1:1.00]
    270 ;
    271 ; HASWELL-SSE-LABEL: test_haddpd:
    272 ; HASWELL-SSE:       # %bb.0:
    273 ; HASWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
    274 ; HASWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [11:2.00]
    275 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    276 ;
    277 ; HASWELL-LABEL: test_haddpd:
    278 ; HASWELL:       # %bb.0:
    279 ; HASWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    280 ; HASWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
    281 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    282 ;
    283 ; BROADWELL-SSE-LABEL: test_haddpd:
    284 ; BROADWELL-SSE:       # %bb.0:
    285 ; BROADWELL-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [5:2.00]
    286 ; BROADWELL-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [10:2.00]
    287 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    288 ;
    289 ; BROADWELL-LABEL: test_haddpd:
    290 ; BROADWELL:       # %bb.0:
    291 ; BROADWELL-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    292 ; BROADWELL-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
    293 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    294 ;
    295 ; SKYLAKE-SSE-LABEL: test_haddpd:
    296 ; SKYLAKE-SSE:       # %bb.0:
    297 ; SKYLAKE-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
    298 ; SKYLAKE-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
    299 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    300 ;
    301 ; SKYLAKE-LABEL: test_haddpd:
    302 ; SKYLAKE:       # %bb.0:
    303 ; SKYLAKE-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
    304 ; SKYLAKE-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
    305 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    306 ;
    307 ; SKX-SSE-LABEL: test_haddpd:
    308 ; SKX-SSE:       # %bb.0:
    309 ; SKX-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [6:2.00]
    310 ; SKX-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [12:2.00]
    311 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    312 ;
    313 ; SKX-LABEL: test_haddpd:
    314 ; SKX:       # %bb.0:
    315 ; SKX-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
    316 ; SKX-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
    317 ; SKX-NEXT:    retq # sched: [7:1.00]
    318 ;
    319 ; BTVER2-SSE-LABEL: test_haddpd:
    320 ; BTVER2-SSE:       # %bb.0:
    321 ; BTVER2-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [3:1.00]
    322 ; BTVER2-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [8:1.00]
    323 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    324 ;
    325 ; BTVER2-LABEL: test_haddpd:
    326 ; BTVER2:       # %bb.0:
    327 ; BTVER2-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    328 ; BTVER2-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    329 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    330 ;
    331 ; ZNVER1-SSE-LABEL: test_haddpd:
    332 ; ZNVER1-SSE:       # %bb.0:
    333 ; ZNVER1-SSE-NEXT:    haddpd %xmm1, %xmm0 # sched: [100:0.25]
    334 ; ZNVER1-SSE-NEXT:    haddpd (%rdi), %xmm0 # sched: [100:0.25]
    335 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    336 ;
    337 ; ZNVER1-LABEL: test_haddpd:
    338 ; ZNVER1:       # %bb.0:
    339 ; ZNVER1-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    340 ; ZNVER1-NEXT:    vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    341 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    342   %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
    343   %2 = load <2 x double>, <2 x double> *%a2, align 16
    344   %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
    345   ret <2 x double> %3
    346 }
    347 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
    348 
    349 define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
    350 ; GENERIC-LABEL: test_haddps:
    351 ; GENERIC:       # %bb.0:
    352 ; GENERIC-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
    353 ; GENERIC-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
    354 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    355 ;
    356 ; ATOM-LABEL: test_haddps:
    357 ; ATOM:       # %bb.0:
    358 ; ATOM-NEXT:    haddps %xmm1, %xmm0 # sched: [8:4.00]
    359 ; ATOM-NEXT:    haddps (%rdi), %xmm0 # sched: [9:4.50]
    360 ; ATOM-NEXT:    retq # sched: [79:39.50]
    361 ;
    362 ; SLM-LABEL: test_haddps:
    363 ; SLM:       # %bb.0:
    364 ; SLM-NEXT:    haddps %xmm1, %xmm0 # sched: [3:1.00]
    365 ; SLM-NEXT:    haddps (%rdi), %xmm0 # sched: [6:1.00]
    366 ; SLM-NEXT:    retq # sched: [4:1.00]
    367 ;
    368 ; SANDY-SSE-LABEL: test_haddps:
    369 ; SANDY-SSE:       # %bb.0:
    370 ; SANDY-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
    371 ; SANDY-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
    372 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    373 ;
    374 ; SANDY-LABEL: test_haddps:
    375 ; SANDY:       # %bb.0:
    376 ; SANDY-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    377 ; SANDY-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
    378 ; SANDY-NEXT:    retq # sched: [1:1.00]
    379 ;
    380 ; HASWELL-SSE-LABEL: test_haddps:
    381 ; HASWELL-SSE:       # %bb.0:
    382 ; HASWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
    383 ; HASWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [11:2.00]
    384 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    385 ;
    386 ; HASWELL-LABEL: test_haddps:
    387 ; HASWELL:       # %bb.0:
    388 ; HASWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    389 ; HASWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
    390 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    391 ;
    392 ; BROADWELL-SSE-LABEL: test_haddps:
    393 ; BROADWELL-SSE:       # %bb.0:
    394 ; BROADWELL-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [5:2.00]
    395 ; BROADWELL-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [10:2.00]
    396 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    397 ;
    398 ; BROADWELL-LABEL: test_haddps:
    399 ; BROADWELL:       # %bb.0:
    400 ; BROADWELL-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    401 ; BROADWELL-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
    402 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    403 ;
    404 ; SKYLAKE-SSE-LABEL: test_haddps:
    405 ; SKYLAKE-SSE:       # %bb.0:
    406 ; SKYLAKE-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
    407 ; SKYLAKE-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
    408 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    409 ;
    410 ; SKYLAKE-LABEL: test_haddps:
    411 ; SKYLAKE:       # %bb.0:
    412 ; SKYLAKE-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
    413 ; SKYLAKE-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
    414 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    415 ;
    416 ; SKX-SSE-LABEL: test_haddps:
    417 ; SKX-SSE:       # %bb.0:
    418 ; SKX-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [6:2.00]
    419 ; SKX-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [12:2.00]
    420 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    421 ;
    422 ; SKX-LABEL: test_haddps:
    423 ; SKX:       # %bb.0:
    424 ; SKX-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
    425 ; SKX-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
    426 ; SKX-NEXT:    retq # sched: [7:1.00]
    427 ;
    428 ; BTVER2-SSE-LABEL: test_haddps:
    429 ; BTVER2-SSE:       # %bb.0:
    430 ; BTVER2-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [3:1.00]
    431 ; BTVER2-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [8:1.00]
    432 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    433 ;
    434 ; BTVER2-LABEL: test_haddps:
    435 ; BTVER2:       # %bb.0:
    436 ; BTVER2-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    437 ; BTVER2-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    438 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    439 ;
    440 ; ZNVER1-SSE-LABEL: test_haddps:
    441 ; ZNVER1-SSE:       # %bb.0:
    442 ; ZNVER1-SSE-NEXT:    haddps %xmm1, %xmm0 # sched: [100:0.25]
    443 ; ZNVER1-SSE-NEXT:    haddps (%rdi), %xmm0 # sched: [100:0.25]
    444 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    445 ;
    446 ; ZNVER1-LABEL: test_haddps:
    447 ; ZNVER1:       # %bb.0:
    448 ; ZNVER1-NEXT:    vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    449 ; ZNVER1-NEXT:    vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    450 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    451   %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
    452   %2 = load <4 x float>, <4 x float> *%a2, align 16
    453   %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
    454   ret <4 x float> %3
    455 }
    456 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
    457 
    458 define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
    459 ; GENERIC-LABEL: test_hsubpd:
    460 ; GENERIC:       # %bb.0:
    461 ; GENERIC-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
    462 ; GENERIC-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
    463 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    464 ;
    465 ; ATOM-LABEL: test_hsubpd:
    466 ; ATOM:       # %bb.0:
    467 ; ATOM-NEXT:    hsubpd %xmm1, %xmm0 # sched: [8:4.00]
    468 ; ATOM-NEXT:    hsubpd (%rdi), %xmm0 # sched: [9:4.50]
    469 ; ATOM-NEXT:    retq # sched: [79:39.50]
    470 ;
    471 ; SLM-LABEL: test_hsubpd:
    472 ; SLM:       # %bb.0:
    473 ; SLM-NEXT:    hsubpd %xmm1, %xmm0 # sched: [3:1.00]
    474 ; SLM-NEXT:    hsubpd (%rdi), %xmm0 # sched: [6:1.00]
    475 ; SLM-NEXT:    retq # sched: [4:1.00]
    476 ;
    477 ; SANDY-SSE-LABEL: test_hsubpd:
    478 ; SANDY-SSE:       # %bb.0:
    479 ; SANDY-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
    480 ; SANDY-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
    481 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    482 ;
    483 ; SANDY-LABEL: test_hsubpd:
    484 ; SANDY:       # %bb.0:
    485 ; SANDY-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    486 ; SANDY-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
    487 ; SANDY-NEXT:    retq # sched: [1:1.00]
    488 ;
    489 ; HASWELL-SSE-LABEL: test_hsubpd:
    490 ; HASWELL-SSE:       # %bb.0:
    491 ; HASWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
    492 ; HASWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [11:2.00]
    493 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    494 ;
    495 ; HASWELL-LABEL: test_hsubpd:
    496 ; HASWELL:       # %bb.0:
    497 ; HASWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    498 ; HASWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
    499 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    500 ;
    501 ; BROADWELL-SSE-LABEL: test_hsubpd:
    502 ; BROADWELL-SSE:       # %bb.0:
    503 ; BROADWELL-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [5:2.00]
    504 ; BROADWELL-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [10:2.00]
    505 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    506 ;
    507 ; BROADWELL-LABEL: test_hsubpd:
    508 ; BROADWELL:       # %bb.0:
    509 ; BROADWELL-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    510 ; BROADWELL-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
    511 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    512 ;
    513 ; SKYLAKE-SSE-LABEL: test_hsubpd:
    514 ; SKYLAKE-SSE:       # %bb.0:
    515 ; SKYLAKE-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
    516 ; SKYLAKE-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
    517 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    518 ;
    519 ; SKYLAKE-LABEL: test_hsubpd:
    520 ; SKYLAKE:       # %bb.0:
    521 ; SKYLAKE-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
    522 ; SKYLAKE-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
    523 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    524 ;
    525 ; SKX-SSE-LABEL: test_hsubpd:
    526 ; SKX-SSE:       # %bb.0:
    527 ; SKX-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [6:2.00]
    528 ; SKX-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [12:2.00]
    529 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    530 ;
    531 ; SKX-LABEL: test_hsubpd:
    532 ; SKX:       # %bb.0:
    533 ; SKX-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
    534 ; SKX-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
    535 ; SKX-NEXT:    retq # sched: [7:1.00]
    536 ;
    537 ; BTVER2-SSE-LABEL: test_hsubpd:
    538 ; BTVER2-SSE:       # %bb.0:
    539 ; BTVER2-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [3:1.00]
    540 ; BTVER2-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [8:1.00]
    541 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    542 ;
    543 ; BTVER2-LABEL: test_hsubpd:
    544 ; BTVER2:       # %bb.0:
    545 ; BTVER2-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    546 ; BTVER2-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    547 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    548 ;
    549 ; ZNVER1-SSE-LABEL: test_hsubpd:
    550 ; ZNVER1-SSE:       # %bb.0:
    551 ; ZNVER1-SSE-NEXT:    hsubpd %xmm1, %xmm0 # sched: [100:0.25]
    552 ; ZNVER1-SSE-NEXT:    hsubpd (%rdi), %xmm0 # sched: [100:0.25]
    553 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    554 ;
    555 ; ZNVER1-LABEL: test_hsubpd:
    556 ; ZNVER1:       # %bb.0:
    557 ; ZNVER1-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    558 ; ZNVER1-NEXT:    vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    559 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    560   %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
    561   %2 = load <2 x double>, <2 x double> *%a2, align 16
    562   %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
    563   ret <2 x double> %3
    564 }
    565 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
    566 
    567 define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
    568 ; GENERIC-LABEL: test_hsubps:
    569 ; GENERIC:       # %bb.0:
    570 ; GENERIC-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
    571 ; GENERIC-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
    572 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    573 ;
    574 ; ATOM-LABEL: test_hsubps:
    575 ; ATOM:       # %bb.0:
    576 ; ATOM-NEXT:    hsubps %xmm1, %xmm0 # sched: [8:4.00]
    577 ; ATOM-NEXT:    hsubps (%rdi), %xmm0 # sched: [9:4.50]
    578 ; ATOM-NEXT:    retq # sched: [79:39.50]
    579 ;
    580 ; SLM-LABEL: test_hsubps:
    581 ; SLM:       # %bb.0:
    582 ; SLM-NEXT:    hsubps %xmm1, %xmm0 # sched: [3:1.00]
    583 ; SLM-NEXT:    hsubps (%rdi), %xmm0 # sched: [6:1.00]
    584 ; SLM-NEXT:    retq # sched: [4:1.00]
    585 ;
    586 ; SANDY-SSE-LABEL: test_hsubps:
    587 ; SANDY-SSE:       # %bb.0:
    588 ; SANDY-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
    589 ; SANDY-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
    590 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    591 ;
    592 ; SANDY-LABEL: test_hsubps:
    593 ; SANDY:       # %bb.0:
    594 ; SANDY-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    595 ; SANDY-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
    596 ; SANDY-NEXT:    retq # sched: [1:1.00]
    597 ;
    598 ; HASWELL-SSE-LABEL: test_hsubps:
    599 ; HASWELL-SSE:       # %bb.0:
    600 ; HASWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
    601 ; HASWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [11:2.00]
    602 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    603 ;
    604 ; HASWELL-LABEL: test_hsubps:
    605 ; HASWELL:       # %bb.0:
    606 ; HASWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    607 ; HASWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
    608 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    609 ;
    610 ; BROADWELL-SSE-LABEL: test_hsubps:
    611 ; BROADWELL-SSE:       # %bb.0:
    612 ; BROADWELL-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [5:2.00]
    613 ; BROADWELL-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [10:2.00]
    614 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    615 ;
    616 ; BROADWELL-LABEL: test_hsubps:
    617 ; BROADWELL:       # %bb.0:
    618 ; BROADWELL-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
    619 ; BROADWELL-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
    620 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    621 ;
    622 ; SKYLAKE-SSE-LABEL: test_hsubps:
    623 ; SKYLAKE-SSE:       # %bb.0:
    624 ; SKYLAKE-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
    625 ; SKYLAKE-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
    626 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    627 ;
    628 ; SKYLAKE-LABEL: test_hsubps:
    629 ; SKYLAKE:       # %bb.0:
    630 ; SKYLAKE-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
    631 ; SKYLAKE-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
    632 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    633 ;
    634 ; SKX-SSE-LABEL: test_hsubps:
    635 ; SKX-SSE:       # %bb.0:
    636 ; SKX-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [6:2.00]
    637 ; SKX-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [12:2.00]
    638 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    639 ;
    640 ; SKX-LABEL: test_hsubps:
    641 ; SKX:       # %bb.0:
    642 ; SKX-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
    643 ; SKX-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
    644 ; SKX-NEXT:    retq # sched: [7:1.00]
    645 ;
    646 ; BTVER2-SSE-LABEL: test_hsubps:
    647 ; BTVER2-SSE:       # %bb.0:
    648 ; BTVER2-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [3:1.00]
    649 ; BTVER2-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [8:1.00]
    650 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    651 ;
    652 ; BTVER2-LABEL: test_hsubps:
    653 ; BTVER2:       # %bb.0:
    654 ; BTVER2-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    655 ; BTVER2-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    656 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    657 ;
    658 ; ZNVER1-SSE-LABEL: test_hsubps:
    659 ; ZNVER1-SSE:       # %bb.0:
    660 ; ZNVER1-SSE-NEXT:    hsubps %xmm1, %xmm0 # sched: [100:0.25]
    661 ; ZNVER1-SSE-NEXT:    hsubps (%rdi), %xmm0 # sched: [100:0.25]
    662 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    663 ;
    664 ; ZNVER1-LABEL: test_hsubps:
    665 ; ZNVER1:       # %bb.0:
    666 ; ZNVER1-NEXT:    vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
    667 ; ZNVER1-NEXT:    vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
    668 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    669   %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
    670   %2 = load <4 x float>, <4 x float> *%a2, align 16
    671   %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
    672   ret <4 x float> %3
    673 }
    674 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
    675 
    676 define <16 x i8> @test_lddqu(i8* %a0) {
    677 ; GENERIC-LABEL: test_lddqu:
    678 ; GENERIC:       # %bb.0:
    679 ; GENERIC-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
    680 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    681 ;
    682 ; ATOM-LABEL: test_lddqu:
    683 ; ATOM:       # %bb.0:
    684 ; ATOM-NEXT:    lddqu (%rdi), %xmm0 # sched: [3:1.50]
    685 ; ATOM-NEXT:    nop # sched: [1:0.50]
    686 ; ATOM-NEXT:    nop # sched: [1:0.50]
    687 ; ATOM-NEXT:    retq # sched: [79:39.50]
    688 ;
    689 ; SLM-LABEL: test_lddqu:
    690 ; SLM:       # %bb.0:
    691 ; SLM-NEXT:    lddqu (%rdi), %xmm0 # sched: [3:1.00]
    692 ; SLM-NEXT:    retq # sched: [4:1.00]
    693 ;
    694 ; SANDY-SSE-LABEL: test_lddqu:
    695 ; SANDY-SSE:       # %bb.0:
    696 ; SANDY-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
    697 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    698 ;
    699 ; SANDY-LABEL: test_lddqu:
    700 ; SANDY:       # %bb.0:
    701 ; SANDY-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
    702 ; SANDY-NEXT:    retq # sched: [1:1.00]
    703 ;
    704 ; HASWELL-SSE-LABEL: test_lddqu:
    705 ; HASWELL-SSE:       # %bb.0:
    706 ; HASWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
    707 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    708 ;
    709 ; HASWELL-LABEL: test_lddqu:
    710 ; HASWELL:       # %bb.0:
    711 ; HASWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
    712 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    713 ;
    714 ; BROADWELL-SSE-LABEL: test_lddqu:
    715 ; BROADWELL-SSE:       # %bb.0:
    716 ; BROADWELL-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:0.50]
    717 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    718 ;
    719 ; BROADWELL-LABEL: test_lddqu:
    720 ; BROADWELL:       # %bb.0:
    721 ; BROADWELL-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:0.50]
    722 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    723 ;
    724 ; SKYLAKE-SSE-LABEL: test_lddqu:
    725 ; SKYLAKE-SSE:       # %bb.0:
    726 ; SKYLAKE-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
    727 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    728 ;
    729 ; SKYLAKE-LABEL: test_lddqu:
    730 ; SKYLAKE:       # %bb.0:
    731 ; SKYLAKE-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
    732 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    733 ;
    734 ; SKX-SSE-LABEL: test_lddqu:
    735 ; SKX-SSE:       # %bb.0:
    736 ; SKX-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [6:0.50]
    737 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    738 ;
    739 ; SKX-LABEL: test_lddqu:
    740 ; SKX:       # %bb.0:
    741 ; SKX-NEXT:    vlddqu (%rdi), %xmm0 # sched: [6:0.50]
    742 ; SKX-NEXT:    retq # sched: [7:1.00]
    743 ;
    744 ; BTVER2-SSE-LABEL: test_lddqu:
    745 ; BTVER2-SSE:       # %bb.0:
    746 ; BTVER2-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [5:1.00]
    747 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    748 ;
    749 ; BTVER2-LABEL: test_lddqu:
    750 ; BTVER2:       # %bb.0:
    751 ; BTVER2-NEXT:    vlddqu (%rdi), %xmm0 # sched: [5:1.00]
    752 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    753 ;
    754 ; ZNVER1-SSE-LABEL: test_lddqu:
    755 ; ZNVER1-SSE:       # %bb.0:
    756 ; ZNVER1-SSE-NEXT:    lddqu (%rdi), %xmm0 # sched: [8:0.50]
    757 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    758 ;
    759 ; ZNVER1-LABEL: test_lddqu:
    760 ; ZNVER1:       # %bb.0:
    761 ; ZNVER1-NEXT:    vlddqu (%rdi), %xmm0 # sched: [8:0.50]
    762 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    763   %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
    764   ret <16 x i8> %1
    765 }
    766 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
    767 
    768 define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
    769 ; GENERIC-LABEL: test_monitor:
    770 ; GENERIC:       # %bb.0:
    771 ; GENERIC-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    772 ; GENERIC-NEXT:    movl %esi, %ecx # sched: [1:0.33]
    773 ; GENERIC-NEXT:    monitor # sched: [100:0.33]
    774 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    775 ;
    776 ; ATOM-LABEL: test_monitor:
    777 ; ATOM:       # %bb.0:
    778 ; ATOM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
    779 ; ATOM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
    780 ; ATOM-NEXT:    monitor # sched: [45:22.50]
    781 ; ATOM-NEXT:    retq # sched: [79:39.50]
    782 ;
    783 ; SLM-LABEL: test_monitor:
    784 ; SLM:       # %bb.0:
    785 ; SLM-NEXT:    leaq (%rdi), %rax # sched: [1:1.00]
    786 ; SLM-NEXT:    movl %esi, %ecx # sched: [1:0.50]
    787 ; SLM-NEXT:    monitor # sched: [100:1.00]
    788 ; SLM-NEXT:    retq # sched: [4:1.00]
    789 ;
    790 ; SANDY-SSE-LABEL: test_monitor:
    791 ; SANDY-SSE:       # %bb.0:
    792 ; SANDY-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    793 ; SANDY-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.33]
    794 ; SANDY-SSE-NEXT:    monitor # sched: [100:0.33]
    795 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    796 ;
    797 ; SANDY-LABEL: test_monitor:
    798 ; SANDY:       # %bb.0:
    799 ; SANDY-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    800 ; SANDY-NEXT:    movl %esi, %ecx # sched: [1:0.33]
    801 ; SANDY-NEXT:    monitor # sched: [100:0.33]
    802 ; SANDY-NEXT:    retq # sched: [1:1.00]
    803 ;
    804 ; HASWELL-SSE-LABEL: test_monitor:
    805 ; HASWELL-SSE:       # %bb.0:
    806 ; HASWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    807 ; HASWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    808 ; HASWELL-SSE-NEXT:    monitor # sched: [100:0.25]
    809 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    810 ;
    811 ; HASWELL-LABEL: test_monitor:
    812 ; HASWELL:       # %bb.0:
    813 ; HASWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    814 ; HASWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    815 ; HASWELL-NEXT:    monitor # sched: [100:0.25]
    816 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    817 ;
    818 ; BROADWELL-SSE-LABEL: test_monitor:
    819 ; BROADWELL-SSE:       # %bb.0:
    820 ; BROADWELL-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    821 ; BROADWELL-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    822 ; BROADWELL-SSE-NEXT:    monitor # sched: [100:0.25]
    823 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    824 ;
    825 ; BROADWELL-LABEL: test_monitor:
    826 ; BROADWELL:       # %bb.0:
    827 ; BROADWELL-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    828 ; BROADWELL-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    829 ; BROADWELL-NEXT:    monitor # sched: [100:0.25]
    830 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    831 ;
    832 ; SKYLAKE-SSE-LABEL: test_monitor:
    833 ; SKYLAKE-SSE:       # %bb.0:
    834 ; SKYLAKE-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    835 ; SKYLAKE-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    836 ; SKYLAKE-SSE-NEXT:    monitor # sched: [100:0.25]
    837 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    838 ;
    839 ; SKYLAKE-LABEL: test_monitor:
    840 ; SKYLAKE:       # %bb.0:
    841 ; SKYLAKE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    842 ; SKYLAKE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    843 ; SKYLAKE-NEXT:    monitor # sched: [100:0.25]
    844 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    845 ;
    846 ; SKX-SSE-LABEL: test_monitor:
    847 ; SKX-SSE:       # %bb.0:
    848 ; SKX-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    849 ; SKX-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    850 ; SKX-SSE-NEXT:    monitor # sched: [100:0.25]
    851 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    852 ;
    853 ; SKX-LABEL: test_monitor:
    854 ; SKX:       # %bb.0:
    855 ; SKX-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    856 ; SKX-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    857 ; SKX-NEXT:    monitor # sched: [100:0.25]
    858 ; SKX-NEXT:    retq # sched: [7:1.00]
    859 ;
    860 ; BTVER2-SSE-LABEL: test_monitor:
    861 ; BTVER2-SSE:       # %bb.0:
    862 ; BTVER2-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    863 ; BTVER2-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.50]
    864 ; BTVER2-SSE-NEXT:    monitor # sched: [100:0.50]
    865 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    866 ;
    867 ; BTVER2-LABEL: test_monitor:
    868 ; BTVER2:       # %bb.0:
    869 ; BTVER2-NEXT:    leaq (%rdi), %rax # sched: [1:0.50]
    870 ; BTVER2-NEXT:    movl %esi, %ecx # sched: [1:0.50]
    871 ; BTVER2-NEXT:    monitor # sched: [100:0.50]
    872 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    873 ;
    874 ; ZNVER1-SSE-LABEL: test_monitor:
    875 ; ZNVER1-SSE:       # %bb.0:
    876 ; ZNVER1-SSE-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
    877 ; ZNVER1-SSE-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    878 ; ZNVER1-SSE-NEXT:    monitor # sched: [100:0.25]
    879 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    880 ;
    881 ; ZNVER1-LABEL: test_monitor:
    882 ; ZNVER1:       # %bb.0:
    883 ; ZNVER1-NEXT:    leaq (%rdi), %rax # sched: [1:0.25]
    884 ; ZNVER1-NEXT:    movl %esi, %ecx # sched: [1:0.25]
    885 ; ZNVER1-NEXT:    monitor # sched: [100:0.25]
    886 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    887   tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
    888   ret void
    889 }
    890 declare void @llvm.x86.sse3.monitor(i8*, i32, i32)
    891 
    892 define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
    893 ; GENERIC-LABEL: test_movddup:
    894 ; GENERIC:       # %bb.0:
    895 ; GENERIC-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
    896 ; GENERIC-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
    897 ; GENERIC-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
    898 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    899 ;
    900 ; ATOM-LABEL: test_movddup:
    901 ; ATOM:       # %bb.0:
    902 ; ATOM-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
    903 ; ATOM-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
    904 ; ATOM-NEXT:    subpd %xmm1, %xmm0 # sched: [6:3.00]
    905 ; ATOM-NEXT:    retq # sched: [79:39.50]
    906 ;
    907 ; SLM-LABEL: test_movddup:
    908 ; SLM:       # %bb.0:
    909 ; SLM-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00]
    910 ; SLM-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
    911 ; SLM-NEXT:    subpd %xmm0, %xmm1 # sched: [3:1.00]
    912 ; SLM-NEXT:    movapd %xmm1, %xmm0 # sched: [1:0.50]
    913 ; SLM-NEXT:    retq # sched: [4:1.00]
    914 ;
    915 ; SANDY-SSE-LABEL: test_movddup:
    916 ; SANDY-SSE:       # %bb.0:
    917 ; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
    918 ; SANDY-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
    919 ; SANDY-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
    920 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    921 ;
    922 ; SANDY-LABEL: test_movddup:
    923 ; SANDY:       # %bb.0:
    924 ; SANDY-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
    925 ; SANDY-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
    926 ; SANDY-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    927 ; SANDY-NEXT:    retq # sched: [1:1.00]
    928 ;
    929 ; HASWELL-SSE-LABEL: test_movddup:
    930 ; HASWELL-SSE:       # %bb.0:
    931 ; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
    932 ; HASWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
    933 ; HASWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
    934 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    935 ;
    936 ; HASWELL-LABEL: test_movddup:
    937 ; HASWELL:       # %bb.0:
    938 ; HASWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
    939 ; HASWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
    940 ; HASWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    941 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    942 ;
    943 ; BROADWELL-SSE-LABEL: test_movddup:
    944 ; BROADWELL-SSE:       # %bb.0:
    945 ; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
    946 ; BROADWELL-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
    947 ; BROADWELL-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
    948 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    949 ;
    950 ; BROADWELL-LABEL: test_movddup:
    951 ; BROADWELL:       # %bb.0:
    952 ; BROADWELL-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
    953 ; BROADWELL-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
    954 ; BROADWELL-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    955 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    956 ;
    957 ; SKYLAKE-SSE-LABEL: test_movddup:
    958 ; SKYLAKE-SSE:       # %bb.0:
    959 ; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
    960 ; SKYLAKE-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
    961 ; SKYLAKE-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
    962 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    963 ;
    964 ; SKYLAKE-LABEL: test_movddup:
    965 ; SKYLAKE:       # %bb.0:
    966 ; SKYLAKE-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
    967 ; SKYLAKE-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
    968 ; SKYLAKE-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
    969 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    970 ;
    971 ; SKX-SSE-LABEL: test_movddup:
    972 ; SKX-SSE:       # %bb.0:
    973 ; SKX-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
    974 ; SKX-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
    975 ; SKX-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [4:0.50]
    976 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    977 ;
    978 ; SKX-LABEL: test_movddup:
    979 ; SKX:       # %bb.0:
    980 ; SKX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
    981 ; SKX-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
    982 ; SKX-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
    983 ; SKX-NEXT:    retq # sched: [7:1.00]
    984 ;
    985 ; BTVER2-SSE-LABEL: test_movddup:
    986 ; BTVER2-SSE:       # %bb.0:
    987 ; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
    988 ; BTVER2-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00]
    989 ; BTVER2-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
    990 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    991 ;
    992 ; BTVER2-LABEL: test_movddup:
    993 ; BTVER2:       # %bb.0:
    994 ; BTVER2-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
    995 ; BTVER2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
    996 ; BTVER2-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
    997 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    998 ;
    999 ; ZNVER1-SSE-LABEL: test_movddup:
   1000 ; ZNVER1-SSE:       # %bb.0:
   1001 ; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
   1002 ; ZNVER1-SSE-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50]
   1003 ; ZNVER1-SSE-NEXT:    subpd %xmm1, %xmm0 # sched: [3:1.00]
   1004 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1005 ;
   1006 ; ZNVER1-LABEL: test_movddup:
   1007 ; ZNVER1:       # %bb.0:
   1008 ; ZNVER1-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
   1009 ; ZNVER1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
   1010 ; ZNVER1-NEXT:    vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   1011 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1012   %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
   1013   %2 = load <2 x double>, <2 x double> *%a1, align 16
   1014   %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
   1015   %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl.
   1016   ret <2 x double> %4
   1017 }
   1018 
   1019 define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
   1020 ; GENERIC-LABEL: test_movshdup:
   1021 ; GENERIC:       # %bb.0:
   1022 ; GENERIC-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
   1023 ; GENERIC-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
   1024 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1025 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1026 ;
   1027 ; ATOM-LABEL: test_movshdup:
   1028 ; ATOM:       # %bb.0:
   1029 ; ATOM-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
   1030 ; ATOM-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
   1031 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   1032 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1033 ;
   1034 ; SLM-LABEL: test_movshdup:
   1035 ; SLM:       # %bb.0:
   1036 ; SLM-NEXT:    movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00]
   1037 ; SLM-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
   1038 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   1039 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   1040 ; SLM-NEXT:    retq # sched: [4:1.00]
   1041 ;
   1042 ; SANDY-SSE-LABEL: test_movshdup:
   1043 ; SANDY-SSE:       # %bb.0:
   1044 ; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
   1045 ; SANDY-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
   1046 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1047 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1048 ;
   1049 ; SANDY-LABEL: test_movshdup:
   1050 ; SANDY:       # %bb.0:
   1051 ; SANDY-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
   1052 ; SANDY-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
   1053 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1054 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1055 ;
   1056 ; HASWELL-SSE-LABEL: test_movshdup:
   1057 ; HASWELL-SSE:       # %bb.0:
   1058 ; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
   1059 ; HASWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
   1060 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1061 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1062 ;
   1063 ; HASWELL-LABEL: test_movshdup:
   1064 ; HASWELL:       # %bb.0:
   1065 ; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
   1066 ; HASWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
   1067 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1068 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1069 ;
   1070 ; BROADWELL-SSE-LABEL: test_movshdup:
   1071 ; BROADWELL-SSE:       # %bb.0:
   1072 ; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
   1073 ; BROADWELL-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50]
   1074 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1075 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1076 ;
   1077 ; BROADWELL-LABEL: test_movshdup:
   1078 ; BROADWELL:       # %bb.0:
   1079 ; BROADWELL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
   1080 ; BROADWELL-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50]
   1081 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1082 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1083 ;
   1084 ; SKYLAKE-SSE-LABEL: test_movshdup:
   1085 ; SKYLAKE-SSE:       # %bb.0:
   1086 ; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
   1087 ; SKYLAKE-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
   1088 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   1089 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1090 ;
   1091 ; SKYLAKE-LABEL: test_movshdup:
   1092 ; SKYLAKE:       # %bb.0:
   1093 ; SKYLAKE-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
   1094 ; SKYLAKE-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
   1095 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1096 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1097 ;
   1098 ; SKX-SSE-LABEL: test_movshdup:
   1099 ; SKX-SSE:       # %bb.0:
   1100 ; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
   1101 ; SKX-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
   1102 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   1103 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1104 ;
   1105 ; SKX-LABEL: test_movshdup:
   1106 ; SKX:       # %bb.0:
   1107 ; SKX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
   1108 ; SKX-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
   1109 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1110 ; SKX-NEXT:    retq # sched: [7:1.00]
   1111 ;
   1112 ; BTVER2-SSE-LABEL: test_movshdup:
   1113 ; BTVER2-SSE:       # %bb.0:
   1114 ; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
   1115 ; BTVER2-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00]
   1116 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1117 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1118 ;
   1119 ; BTVER2-LABEL: test_movshdup:
   1120 ; BTVER2:       # %bb.0:
   1121 ; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
   1122 ; BTVER2-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
   1123 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1124 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1125 ;
   1126 ; ZNVER1-SSE-LABEL: test_movshdup:
   1127 ; ZNVER1-SSE:       # %bb.0:
   1128 ; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
   1129 ; ZNVER1-SSE-NEXT:    movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50]
   1130 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1131 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1132 ;
   1133 ; ZNVER1-LABEL: test_movshdup:
   1134 ; ZNVER1:       # %bb.0:
   1135 ; ZNVER1-NEXT:    vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
   1136 ; ZNVER1-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
   1137 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1138 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1139   %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
   1140   %2 = load <4 x float>, <4 x float> *%a1, align 16
   1141   %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
   1142   %4 = fadd <4 x float> %1, %3
   1143   ret <4 x float> %4
   1144 }
   1145 
   1146 define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
   1147 ; GENERIC-LABEL: test_movsldup:
   1148 ; GENERIC:       # %bb.0:
   1149 ; GENERIC-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
   1150 ; GENERIC-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
   1151 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1152 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1153 ;
   1154 ; ATOM-LABEL: test_movsldup:
   1155 ; ATOM:       # %bb.0:
   1156 ; ATOM-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
   1157 ; ATOM-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
   1158 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   1159 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1160 ;
   1161 ; SLM-LABEL: test_movsldup:
   1162 ; SLM:       # %bb.0:
   1163 ; SLM-NEXT:    movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00]
   1164 ; SLM-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
   1165 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   1166 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   1167 ; SLM-NEXT:    retq # sched: [4:1.00]
   1168 ;
   1169 ; SANDY-SSE-LABEL: test_movsldup:
   1170 ; SANDY-SSE:       # %bb.0:
   1171 ; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
   1172 ; SANDY-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
   1173 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1174 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1175 ;
   1176 ; SANDY-LABEL: test_movsldup:
   1177 ; SANDY:       # %bb.0:
   1178 ; SANDY-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
   1179 ; SANDY-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
   1180 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1181 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1182 ;
   1183 ; HASWELL-SSE-LABEL: test_movsldup:
   1184 ; HASWELL-SSE:       # %bb.0:
   1185 ; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
   1186 ; HASWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
   1187 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1188 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1189 ;
   1190 ; HASWELL-LABEL: test_movsldup:
   1191 ; HASWELL:       # %bb.0:
   1192 ; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
   1193 ; HASWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
   1194 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1195 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1196 ;
   1197 ; BROADWELL-SSE-LABEL: test_movsldup:
   1198 ; BROADWELL-SSE:       # %bb.0:
   1199 ; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
   1200 ; BROADWELL-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50]
   1201 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1202 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1203 ;
   1204 ; BROADWELL-LABEL: test_movsldup:
   1205 ; BROADWELL:       # %bb.0:
   1206 ; BROADWELL-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
   1207 ; BROADWELL-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50]
   1208 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1209 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1210 ;
   1211 ; SKYLAKE-SSE-LABEL: test_movsldup:
   1212 ; SKYLAKE-SSE:       # %bb.0:
   1213 ; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
   1214 ; SKYLAKE-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
   1215 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   1216 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1217 ;
   1218 ; SKYLAKE-LABEL: test_movsldup:
   1219 ; SKYLAKE:       # %bb.0:
   1220 ; SKYLAKE-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
   1221 ; SKYLAKE-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
   1222 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1223 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1224 ;
   1225 ; SKX-SSE-LABEL: test_movsldup:
   1226 ; SKX-SSE:       # %bb.0:
   1227 ; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
   1228 ; SKX-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
   1229 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   1230 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1231 ;
   1232 ; SKX-LABEL: test_movsldup:
   1233 ; SKX:       # %bb.0:
   1234 ; SKX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
   1235 ; SKX-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
   1236 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1237 ; SKX-NEXT:    retq # sched: [7:1.00]
   1238 ;
   1239 ; BTVER2-SSE-LABEL: test_movsldup:
   1240 ; BTVER2-SSE:       # %bb.0:
   1241 ; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
   1242 ; BTVER2-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00]
   1243 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1244 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1245 ;
   1246 ; BTVER2-LABEL: test_movsldup:
   1247 ; BTVER2:       # %bb.0:
   1248 ; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
   1249 ; BTVER2-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
   1250 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1251 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1252 ;
   1253 ; ZNVER1-SSE-LABEL: test_movsldup:
   1254 ; ZNVER1-SSE:       # %bb.0:
   1255 ; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25]
   1256 ; ZNVER1-SSE-NEXT:    movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25]
   1257 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   1258 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1259 ;
   1260 ; ZNVER1-LABEL: test_movsldup:
   1261 ; ZNVER1:       # %bb.0:
   1262 ; ZNVER1-NEXT:    vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
   1263 ; ZNVER1-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
   1264 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1265 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1266   %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
   1267   %2 = load <4 x float>, <4 x float> *%a1, align 16
   1268   %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
   1269   %4 = fadd <4 x float> %1, %3
   1270   ret <4 x float> %4
   1271 }
   1272 
   1273 define void @test_mwait(i32 %a0, i32 %a1) {
   1274 ; GENERIC-LABEL: test_mwait:
   1275 ; GENERIC:       # %bb.0:
   1276 ; GENERIC-NEXT:    movl %edi, %ecx # sched: [1:0.33]
   1277 ; GENERIC-NEXT:    movl %esi, %eax # sched: [1:0.33]
   1278 ; GENERIC-NEXT:    mwait # sched: [100:0.33]
   1279 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1280 ;
   1281 ; ATOM-LABEL: test_mwait:
   1282 ; ATOM:       # %bb.0:
   1283 ; ATOM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
   1284 ; ATOM-NEXT:    movl %esi, %eax # sched: [1:0.50]
   1285 ; ATOM-NEXT:    mwait # sched: [46:23.00]
   1286 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1287 ;
   1288 ; SLM-LABEL: test_mwait:
   1289 ; SLM:       # %bb.0:
   1290 ; SLM-NEXT:    movl %edi, %ecx # sched: [1:0.50]
   1291 ; SLM-NEXT:    movl %esi, %eax # sched: [1:0.50]
   1292 ; SLM-NEXT:    mwait # sched: [100:1.00]
   1293 ; SLM-NEXT:    retq # sched: [4:1.00]
   1294 ;
   1295 ; SANDY-SSE-LABEL: test_mwait:
   1296 ; SANDY-SSE:       # %bb.0:
   1297 ; SANDY-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.33]
   1298 ; SANDY-SSE-NEXT:    movl %esi, %eax # sched: [1:0.33]
   1299 ; SANDY-SSE-NEXT:    mwait # sched: [100:0.33]
   1300 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1301 ;
   1302 ; SANDY-LABEL: test_mwait:
   1303 ; SANDY:       # %bb.0:
   1304 ; SANDY-NEXT:    movl %edi, %ecx # sched: [1:0.33]
   1305 ; SANDY-NEXT:    movl %esi, %eax # sched: [1:0.33]
   1306 ; SANDY-NEXT:    mwait # sched: [100:0.33]
   1307 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1308 ;
   1309 ; HASWELL-SSE-LABEL: test_mwait:
   1310 ; HASWELL-SSE:       # %bb.0:
   1311 ; HASWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1312 ; HASWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1313 ; HASWELL-SSE-NEXT:    mwait # sched: [20:2.50]
   1314 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1315 ;
   1316 ; HASWELL-LABEL: test_mwait:
   1317 ; HASWELL:       # %bb.0:
   1318 ; HASWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1319 ; HASWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1320 ; HASWELL-NEXT:    mwait # sched: [20:2.50]
   1321 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1322 ;
   1323 ; BROADWELL-SSE-LABEL: test_mwait:
   1324 ; BROADWELL-SSE:       # %bb.0:
   1325 ; BROADWELL-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1326 ; BROADWELL-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1327 ; BROADWELL-SSE-NEXT:    mwait # sched: [100:0.25]
   1328 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1329 ;
   1330 ; BROADWELL-LABEL: test_mwait:
   1331 ; BROADWELL:       # %bb.0:
   1332 ; BROADWELL-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1333 ; BROADWELL-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1334 ; BROADWELL-NEXT:    mwait # sched: [100:0.25]
   1335 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1336 ;
   1337 ; SKYLAKE-SSE-LABEL: test_mwait:
   1338 ; SKYLAKE-SSE:       # %bb.0:
   1339 ; SKYLAKE-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1340 ; SKYLAKE-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1341 ; SKYLAKE-SSE-NEXT:    mwait # sched: [20:2.50]
   1342 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1343 ;
   1344 ; SKYLAKE-LABEL: test_mwait:
   1345 ; SKYLAKE:       # %bb.0:
   1346 ; SKYLAKE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1347 ; SKYLAKE-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1348 ; SKYLAKE-NEXT:    mwait # sched: [20:2.50]
   1349 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1350 ;
   1351 ; SKX-SSE-LABEL: test_mwait:
   1352 ; SKX-SSE:       # %bb.0:
   1353 ; SKX-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1354 ; SKX-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1355 ; SKX-SSE-NEXT:    mwait # sched: [20:2.50]
   1356 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1357 ;
   1358 ; SKX-LABEL: test_mwait:
   1359 ; SKX:       # %bb.0:
   1360 ; SKX-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1361 ; SKX-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1362 ; SKX-NEXT:    mwait # sched: [20:2.50]
   1363 ; SKX-NEXT:    retq # sched: [7:1.00]
   1364 ;
   1365 ; BTVER2-SSE-LABEL: test_mwait:
   1366 ; BTVER2-SSE:       # %bb.0:
   1367 ; BTVER2-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.50]
   1368 ; BTVER2-SSE-NEXT:    movl %esi, %eax # sched: [1:0.50]
   1369 ; BTVER2-SSE-NEXT:    mwait # sched: [100:0.50]
   1370 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1371 ;
   1372 ; BTVER2-LABEL: test_mwait:
   1373 ; BTVER2:       # %bb.0:
   1374 ; BTVER2-NEXT:    movl %edi, %ecx # sched: [1:0.50]
   1375 ; BTVER2-NEXT:    movl %esi, %eax # sched: [1:0.50]
   1376 ; BTVER2-NEXT:    mwait # sched: [100:0.50]
   1377 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1378 ;
   1379 ; ZNVER1-SSE-LABEL: test_mwait:
   1380 ; ZNVER1-SSE:       # %bb.0:
   1381 ; ZNVER1-SSE-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1382 ; ZNVER1-SSE-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1383 ; ZNVER1-SSE-NEXT:    mwait # sched: [100:0.25]
   1384 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1385 ;
   1386 ; ZNVER1-LABEL: test_mwait:
   1387 ; ZNVER1:       # %bb.0:
   1388 ; ZNVER1-NEXT:    movl %edi, %ecx # sched: [1:0.25]
   1389 ; ZNVER1-NEXT:    movl %esi, %eax # sched: [1:0.25]
   1390 ; ZNVER1-NEXT:    mwait # sched: [100:0.25]
   1391 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1392   tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
   1393   ret void
   1394 }
   1395 declare void @llvm.x86.sse3.mwait(i32, i32)
   1396