Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
     10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
     11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
     12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
     13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
     14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
     15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
     16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
     17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
     18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
     19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
     20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
     21 
     22 ; FIXME: we should really use -mattr=-sse2 here but some of the comparison tests don't work without access to legal <4 x i32> types.
     23 
     24 define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
     25 ; GENERIC-LABEL: test_addps:
     26 ; GENERIC:       # %bb.0:
     27 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
     28 ; GENERIC-NEXT:    addps (%rdi), %xmm0 # sched: [9:1.00]
     29 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     30 ;
     31 ; ATOM-LABEL: test_addps:
     32 ; ATOM:       # %bb.0:
     33 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
     34 ; ATOM-NEXT:    addps (%rdi), %xmm0 # sched: [5:5.00]
     35 ; ATOM-NEXT:    retq # sched: [79:39.50]
     36 ;
     37 ; SLM-LABEL: test_addps:
     38 ; SLM:       # %bb.0:
     39 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
     40 ; SLM-NEXT:    addps (%rdi), %xmm0 # sched: [6:1.00]
     41 ; SLM-NEXT:    retq # sched: [4:1.00]
     42 ;
     43 ; SANDY-SSE-LABEL: test_addps:
     44 ; SANDY-SSE:       # %bb.0:
     45 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
     46 ; SANDY-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [9:1.00]
     47 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
     48 ;
     49 ; SANDY-LABEL: test_addps:
     50 ; SANDY:       # %bb.0:
     51 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
     52 ; SANDY-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
     53 ; SANDY-NEXT:    retq # sched: [1:1.00]
     54 ;
     55 ; HASWELL-SSE-LABEL: test_addps:
     56 ; HASWELL-SSE:       # %bb.0:
     57 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
     58 ; HASWELL-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [9:1.00]
     59 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
     60 ;
     61 ; HASWELL-LABEL: test_addps:
     62 ; HASWELL:       # %bb.0:
     63 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
     64 ; HASWELL-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
     65 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     66 ;
     67 ; BROADWELL-SSE-LABEL: test_addps:
     68 ; BROADWELL-SSE:       # %bb.0:
     69 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
     70 ; BROADWELL-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [8:1.00]
     71 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
     72 ;
     73 ; BROADWELL-LABEL: test_addps:
     74 ; BROADWELL:       # %bb.0:
     75 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
     76 ; BROADWELL-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
     77 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     78 ;
     79 ; SKYLAKE-SSE-LABEL: test_addps:
     80 ; SKYLAKE-SSE:       # %bb.0:
     81 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
     82 ; SKYLAKE-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [10:0.50]
     83 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
     84 ;
     85 ; SKYLAKE-LABEL: test_addps:
     86 ; SKYLAKE:       # %bb.0:
     87 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
     88 ; SKYLAKE-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
     89 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     90 ;
     91 ; SKX-SSE-LABEL: test_addps:
     92 ; SKX-SSE:       # %bb.0:
     93 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
     94 ; SKX-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [10:0.50]
     95 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
     96 ;
     97 ; SKX-LABEL: test_addps:
     98 ; SKX:       # %bb.0:
     99 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    100 ; SKX-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    101 ; SKX-NEXT:    retq # sched: [7:1.00]
    102 ;
    103 ; BTVER2-SSE-LABEL: test_addps:
    104 ; BTVER2-SSE:       # %bb.0:
    105 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    106 ; BTVER2-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [8:1.00]
    107 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    108 ;
    109 ; BTVER2-LABEL: test_addps:
    110 ; BTVER2:       # %bb.0:
    111 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    112 ; BTVER2-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    113 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    114 ;
    115 ; ZNVER1-SSE-LABEL: test_addps:
    116 ; ZNVER1-SSE:       # %bb.0:
    117 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
    118 ; ZNVER1-SSE-NEXT:    addps (%rdi), %xmm0 # sched: [10:1.00]
    119 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    120 ;
    121 ; ZNVER1-LABEL: test_addps:
    122 ; ZNVER1:       # %bb.0:
    123 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    124 ; ZNVER1-NEXT:    vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    125 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    126   %1 = fadd <4 x float> %a0, %a1
    127   %2 = load <4 x float>, <4 x float> *%a2, align 16
    128   %3 = fadd <4 x float> %1, %2
    129   ret <4 x float> %3
    130 }
    131 
    132 define float @test_addss(float %a0, float %a1, float *%a2) {
    133 ; GENERIC-LABEL: test_addss:
    134 ; GENERIC:       # %bb.0:
    135 ; GENERIC-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    136 ; GENERIC-NEXT:    addss (%rdi), %xmm0 # sched: [9:1.00]
    137 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    138 ;
    139 ; ATOM-LABEL: test_addss:
    140 ; ATOM:       # %bb.0:
    141 ; ATOM-NEXT:    addss %xmm1, %xmm0 # sched: [5:5.00]
    142 ; ATOM-NEXT:    addss (%rdi), %xmm0 # sched: [5:5.00]
    143 ; ATOM-NEXT:    retq # sched: [79:39.50]
    144 ;
    145 ; SLM-LABEL: test_addss:
    146 ; SLM:       # %bb.0:
    147 ; SLM-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    148 ; SLM-NEXT:    addss (%rdi), %xmm0 # sched: [6:1.00]
    149 ; SLM-NEXT:    retq # sched: [4:1.00]
    150 ;
    151 ; SANDY-SSE-LABEL: test_addss:
    152 ; SANDY-SSE:       # %bb.0:
    153 ; SANDY-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    154 ; SANDY-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [9:1.00]
    155 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    156 ;
    157 ; SANDY-LABEL: test_addss:
    158 ; SANDY:       # %bb.0:
    159 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    160 ; SANDY-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    161 ; SANDY-NEXT:    retq # sched: [1:1.00]
    162 ;
    163 ; HASWELL-SSE-LABEL: test_addss:
    164 ; HASWELL-SSE:       # %bb.0:
    165 ; HASWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    166 ; HASWELL-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [8:1.00]
    167 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    168 ;
    169 ; HASWELL-LABEL: test_addss:
    170 ; HASWELL:       # %bb.0:
    171 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    172 ; HASWELL-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    173 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    174 ;
    175 ; BROADWELL-SSE-LABEL: test_addss:
    176 ; BROADWELL-SSE:       # %bb.0:
    177 ; BROADWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    178 ; BROADWELL-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [8:1.00]
    179 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    180 ;
    181 ; BROADWELL-LABEL: test_addss:
    182 ; BROADWELL:       # %bb.0:
    183 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    184 ; BROADWELL-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    185 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    186 ;
    187 ; SKYLAKE-SSE-LABEL: test_addss:
    188 ; SKYLAKE-SSE:       # %bb.0:
    189 ; SKYLAKE-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
    190 ; SKYLAKE-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [9:0.50]
    191 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    192 ;
    193 ; SKYLAKE-LABEL: test_addss:
    194 ; SKYLAKE:       # %bb.0:
    195 ; SKYLAKE-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    196 ; SKYLAKE-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
    197 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    198 ;
    199 ; SKX-SSE-LABEL: test_addss:
    200 ; SKX-SSE:       # %bb.0:
    201 ; SKX-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
    202 ; SKX-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [9:0.50]
    203 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    204 ;
    205 ; SKX-LABEL: test_addss:
    206 ; SKX:       # %bb.0:
    207 ; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    208 ; SKX-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
    209 ; SKX-NEXT:    retq # sched: [7:1.00]
    210 ;
    211 ; BTVER2-SSE-LABEL: test_addss:
    212 ; BTVER2-SSE:       # %bb.0:
    213 ; BTVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    214 ; BTVER2-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [8:1.00]
    215 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    216 ;
    217 ; BTVER2-LABEL: test_addss:
    218 ; BTVER2:       # %bb.0:
    219 ; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    220 ; BTVER2-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    221 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    222 ;
    223 ; ZNVER1-SSE-LABEL: test_addss:
    224 ; ZNVER1-SSE:       # %bb.0:
    225 ; ZNVER1-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    226 ; ZNVER1-SSE-NEXT:    addss (%rdi), %xmm0 # sched: [10:1.00]
    227 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    228 ;
    229 ; ZNVER1-LABEL: test_addss:
    230 ; ZNVER1:       # %bb.0:
    231 ; ZNVER1-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    232 ; ZNVER1-NEXT:    vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    233 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    234   %1 = fadd float %a0, %a1
    235   %2 = load float, float *%a2, align 4
    236   %3 = fadd float %1, %2
    237   ret float %3
    238 }
    239 
    240 define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
    241 ; GENERIC-LABEL: test_andps:
    242 ; GENERIC:       # %bb.0:
    243 ; GENERIC-NEXT:    andps %xmm1, %xmm0 # sched: [1:1.00]
    244 ; GENERIC-NEXT:    andps (%rdi), %xmm0 # sched: [7:1.00]
    245 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    246 ;
    247 ; ATOM-LABEL: test_andps:
    248 ; ATOM:       # %bb.0:
    249 ; ATOM-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.50]
    250 ; ATOM-NEXT:    andps (%rdi), %xmm0 # sched: [1:1.00]
    251 ; ATOM-NEXT:    nop # sched: [1:0.50]
    252 ; ATOM-NEXT:    nop # sched: [1:0.50]
    253 ; ATOM-NEXT:    nop # sched: [1:0.50]
    254 ; ATOM-NEXT:    nop # sched: [1:0.50]
    255 ; ATOM-NEXT:    retq # sched: [79:39.50]
    256 ;
    257 ; SLM-LABEL: test_andps:
    258 ; SLM:       # %bb.0:
    259 ; SLM-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.50]
    260 ; SLM-NEXT:    andps (%rdi), %xmm0 # sched: [4:1.00]
    261 ; SLM-NEXT:    retq # sched: [4:1.00]
    262 ;
    263 ; SANDY-SSE-LABEL: test_andps:
    264 ; SANDY-SSE:       # %bb.0:
    265 ; SANDY-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:1.00]
    266 ; SANDY-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [7:1.00]
    267 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    268 ;
    269 ; SANDY-LABEL: test_andps:
    270 ; SANDY:       # %bb.0:
    271 ; SANDY-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    272 ; SANDY-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    273 ; SANDY-NEXT:    retq # sched: [1:1.00]
    274 ;
    275 ; HASWELL-SSE-LABEL: test_andps:
    276 ; HASWELL-SSE:       # %bb.0:
    277 ; HASWELL-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:1.00]
    278 ; HASWELL-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [7:1.00]
    279 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    280 ;
    281 ; HASWELL-LABEL: test_andps:
    282 ; HASWELL:       # %bb.0:
    283 ; HASWELL-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    284 ; HASWELL-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    285 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    286 ;
    287 ; BROADWELL-SSE-LABEL: test_andps:
    288 ; BROADWELL-SSE:       # %bb.0:
    289 ; BROADWELL-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:1.00]
    290 ; BROADWELL-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [6:1.00]
    291 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    292 ;
    293 ; BROADWELL-LABEL: test_andps:
    294 ; BROADWELL:       # %bb.0:
    295 ; BROADWELL-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    296 ; BROADWELL-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    297 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    298 ;
    299 ; SKYLAKE-SSE-LABEL: test_andps:
    300 ; SKYLAKE-SSE:       # %bb.0:
    301 ; SKYLAKE-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.33]
    302 ; SKYLAKE-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [7:0.50]
    303 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    304 ;
    305 ; SKYLAKE-LABEL: test_andps:
    306 ; SKYLAKE:       # %bb.0:
    307 ; SKYLAKE-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    308 ; SKYLAKE-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
    309 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    310 ;
    311 ; SKX-SSE-LABEL: test_andps:
    312 ; SKX-SSE:       # %bb.0:
    313 ; SKX-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.33]
    314 ; SKX-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [7:0.50]
    315 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    316 ;
    317 ; SKX-LABEL: test_andps:
    318 ; SKX:       # %bb.0:
    319 ; SKX-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    320 ; SKX-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
    321 ; SKX-NEXT:    retq # sched: [7:1.00]
    322 ;
    323 ; BTVER2-SSE-LABEL: test_andps:
    324 ; BTVER2-SSE:       # %bb.0:
    325 ; BTVER2-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.50]
    326 ; BTVER2-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [6:1.00]
    327 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    328 ;
    329 ; BTVER2-LABEL: test_andps:
    330 ; BTVER2:       # %bb.0:
    331 ; BTVER2-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    332 ; BTVER2-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    333 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    334 ;
    335 ; ZNVER1-SSE-LABEL: test_andps:
    336 ; ZNVER1-SSE:       # %bb.0:
    337 ; ZNVER1-SSE-NEXT:    andps %xmm1, %xmm0 # sched: [1:0.25]
    338 ; ZNVER1-SSE-NEXT:    andps (%rdi), %xmm0 # sched: [8:0.50]
    339 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    340 ;
    341 ; ZNVER1-LABEL: test_andps:
    342 ; ZNVER1:       # %bb.0:
    343 ; ZNVER1-NEXT:    vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
    344 ; ZNVER1-NEXT:    vandps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
    345 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    346   %1 = bitcast <4 x float> %a0 to <4 x i32>
    347   %2 = bitcast <4 x float> %a1 to <4 x i32>
    348   %3 = and <4 x i32> %1, %2
    349   %4 = load <4 x float>, <4 x float> *%a2, align 16
    350   %5 = bitcast <4 x float> %4 to <4 x i32>
    351   %6 = and <4 x i32> %3, %5
    352   %7 = bitcast <4 x i32> %6 to <4 x float>
    353   ret <4 x float> %7
    354 }
    355 
    356 define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
    357 ; GENERIC-LABEL: test_andnotps:
    358 ; GENERIC:       # %bb.0:
    359 ; GENERIC-NEXT:    andnps %xmm1, %xmm0 # sched: [1:1.00]
    360 ; GENERIC-NEXT:    andnps (%rdi), %xmm0 # sched: [7:1.00]
    361 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    362 ;
    363 ; ATOM-LABEL: test_andnotps:
    364 ; ATOM:       # %bb.0:
    365 ; ATOM-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.50]
    366 ; ATOM-NEXT:    andnps (%rdi), %xmm0 # sched: [1:1.00]
    367 ; ATOM-NEXT:    nop # sched: [1:0.50]
    368 ; ATOM-NEXT:    nop # sched: [1:0.50]
    369 ; ATOM-NEXT:    nop # sched: [1:0.50]
    370 ; ATOM-NEXT:    nop # sched: [1:0.50]
    371 ; ATOM-NEXT:    retq # sched: [79:39.50]
    372 ;
    373 ; SLM-LABEL: test_andnotps:
    374 ; SLM:       # %bb.0:
    375 ; SLM-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.50]
    376 ; SLM-NEXT:    andnps (%rdi), %xmm0 # sched: [4:1.00]
    377 ; SLM-NEXT:    retq # sched: [4:1.00]
    378 ;
    379 ; SANDY-SSE-LABEL: test_andnotps:
    380 ; SANDY-SSE:       # %bb.0:
    381 ; SANDY-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:1.00]
    382 ; SANDY-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [7:1.00]
    383 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    384 ;
    385 ; SANDY-LABEL: test_andnotps:
    386 ; SANDY:       # %bb.0:
    387 ; SANDY-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    388 ; SANDY-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    389 ; SANDY-NEXT:    retq # sched: [1:1.00]
    390 ;
    391 ; HASWELL-SSE-LABEL: test_andnotps:
    392 ; HASWELL-SSE:       # %bb.0:
    393 ; HASWELL-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:1.00]
    394 ; HASWELL-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [7:1.00]
    395 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    396 ;
    397 ; HASWELL-LABEL: test_andnotps:
    398 ; HASWELL:       # %bb.0:
    399 ; HASWELL-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    400 ; HASWELL-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    401 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    402 ;
    403 ; BROADWELL-SSE-LABEL: test_andnotps:
    404 ; BROADWELL-SSE:       # %bb.0:
    405 ; BROADWELL-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:1.00]
    406 ; BROADWELL-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [6:1.00]
    407 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    408 ;
    409 ; BROADWELL-LABEL: test_andnotps:
    410 ; BROADWELL:       # %bb.0:
    411 ; BROADWELL-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
    412 ; BROADWELL-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    413 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    414 ;
    415 ; SKYLAKE-SSE-LABEL: test_andnotps:
    416 ; SKYLAKE-SSE:       # %bb.0:
    417 ; SKYLAKE-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.33]
    418 ; SKYLAKE-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [7:0.50]
    419 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    420 ;
    421 ; SKYLAKE-LABEL: test_andnotps:
    422 ; SKYLAKE:       # %bb.0:
    423 ; SKYLAKE-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    424 ; SKYLAKE-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
    425 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    426 ;
    427 ; SKX-SSE-LABEL: test_andnotps:
    428 ; SKX-SSE:       # %bb.0:
    429 ; SKX-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.33]
    430 ; SKX-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [7:0.50]
    431 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    432 ;
    433 ; SKX-LABEL: test_andnotps:
    434 ; SKX:       # %bb.0:
    435 ; SKX-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
    436 ; SKX-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
    437 ; SKX-NEXT:    retq # sched: [7:1.00]
    438 ;
    439 ; BTVER2-SSE-LABEL: test_andnotps:
    440 ; BTVER2-SSE:       # %bb.0:
    441 ; BTVER2-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.50]
    442 ; BTVER2-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [6:1.00]
    443 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    444 ;
    445 ; BTVER2-LABEL: test_andnotps:
    446 ; BTVER2:       # %bb.0:
    447 ; BTVER2-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
    448 ; BTVER2-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
    449 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    450 ;
    451 ; ZNVER1-SSE-LABEL: test_andnotps:
    452 ; ZNVER1-SSE:       # %bb.0:
    453 ; ZNVER1-SSE-NEXT:    andnps %xmm1, %xmm0 # sched: [1:0.25]
    454 ; ZNVER1-SSE-NEXT:    andnps (%rdi), %xmm0 # sched: [8:0.50]
    455 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    456 ;
    457 ; ZNVER1-LABEL: test_andnotps:
    458 ; ZNVER1:       # %bb.0:
    459 ; ZNVER1-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
    460 ; ZNVER1-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
    461 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    462   %1 = bitcast <4 x float> %a0 to <4 x i32>
    463   %2 = bitcast <4 x float> %a1 to <4 x i32>
    464   %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
    465   %4 = and <4 x i32> %3, %2
    466   %5 = load <4 x float>, <4 x float> *%a2, align 16
    467   %6 = bitcast <4 x float> %5 to <4 x i32>
    468   %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
    469   %8 = and <4 x i32> %6, %7
    470   %9 = bitcast <4 x i32> %8 to <4 x float>
    471   ret <4 x float> %9
    472 }
    473 
    474 define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
    475 ; GENERIC-LABEL: test_cmpps:
    476 ; GENERIC:       # %bb.0:
    477 ; GENERIC-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
    478 ; GENERIC-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
    479 ; GENERIC-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
    480 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    481 ;
    482 ; ATOM-LABEL: test_cmpps:
    483 ; ATOM:       # %bb.0:
    484 ; ATOM-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [5:5.00]
    485 ; ATOM-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [5:5.00]
    486 ; ATOM-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.50]
    487 ; ATOM-NEXT:    retq # sched: [79:39.50]
    488 ;
    489 ; SLM-LABEL: test_cmpps:
    490 ; SLM:       # %bb.0:
    491 ; SLM-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
    492 ; SLM-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [6:1.00]
    493 ; SLM-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.50]
    494 ; SLM-NEXT:    retq # sched: [4:1.00]
    495 ;
    496 ; SANDY-SSE-LABEL: test_cmpps:
    497 ; SANDY-SSE:       # %bb.0:
    498 ; SANDY-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
    499 ; SANDY-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
    500 ; SANDY-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
    501 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    502 ;
    503 ; SANDY-LABEL: test_cmpps:
    504 ; SANDY:       # %bb.0:
    505 ; SANDY-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
    506 ; SANDY-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    507 ; SANDY-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
    508 ; SANDY-NEXT:    retq # sched: [1:1.00]
    509 ;
    510 ; HASWELL-SSE-LABEL: test_cmpps:
    511 ; HASWELL-SSE:       # %bb.0:
    512 ; HASWELL-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
    513 ; HASWELL-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
    514 ; HASWELL-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
    515 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    516 ;
    517 ; HASWELL-LABEL: test_cmpps:
    518 ; HASWELL:       # %bb.0:
    519 ; HASWELL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
    520 ; HASWELL-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    521 ; HASWELL-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
    522 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    523 ;
    524 ; BROADWELL-SSE-LABEL: test_cmpps:
    525 ; BROADWELL-SSE:       # %bb.0:
    526 ; BROADWELL-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
    527 ; BROADWELL-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [8:1.00]
    528 ; BROADWELL-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
    529 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    530 ;
    531 ; BROADWELL-LABEL: test_cmpps:
    532 ; BROADWELL:       # %bb.0:
    533 ; BROADWELL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
    534 ; BROADWELL-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    535 ; BROADWELL-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
    536 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    537 ;
    538 ; SKYLAKE-SSE-LABEL: test_cmpps:
    539 ; SKYLAKE-SSE:       # %bb.0:
    540 ; SKYLAKE-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [4:0.50]
    541 ; SKYLAKE-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
    542 ; SKYLAKE-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.33]
    543 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    544 ;
    545 ; SKYLAKE-LABEL: test_cmpps:
    546 ; SKYLAKE:       # %bb.0:
    547 ; SKYLAKE-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
    548 ; SKYLAKE-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    549 ; SKYLAKE-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
    550 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    551 ;
    552 ; SKX-SSE-LABEL: test_cmpps:
    553 ; SKX-SSE:       # %bb.0:
    554 ; SKX-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [4:0.50]
    555 ; SKX-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
    556 ; SKX-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.33]
    557 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    558 ;
    559 ; SKX-LABEL: test_cmpps:
    560 ; SKX:       # %bb.0:
    561 ; SKX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
    562 ; SKX-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
    563 ; SKX-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
    564 ; SKX-NEXT:    retq # sched: [7:1.00]
    565 ;
    566 ; BTVER2-SSE-LABEL: test_cmpps:
    567 ; BTVER2-SSE:       # %bb.0:
    568 ; BTVER2-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [2:1.00]
    569 ; BTVER2-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [7:1.00]
    570 ; BTVER2-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.50]
    571 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    572 ;
    573 ; BTVER2-LABEL: test_cmpps:
    574 ; BTVER2:       # %bb.0:
    575 ; BTVER2-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
    576 ; BTVER2-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    577 ; BTVER2-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
    578 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    579 ;
    580 ; ZNVER1-SSE-LABEL: test_cmpps:
    581 ; ZNVER1-SSE:       # %bb.0:
    582 ; ZNVER1-SSE-NEXT:    cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
    583 ; ZNVER1-SSE-NEXT:    cmpeqps (%rdi), %xmm0 # sched: [10:1.00]
    584 ; ZNVER1-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.25]
    585 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    586 ;
    587 ; ZNVER1-LABEL: test_cmpps:
    588 ; ZNVER1:       # %bb.0:
    589 ; ZNVER1-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
    590 ; ZNVER1-NEXT:    vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    591 ; ZNVER1-NEXT:    vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
    592 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    593   %1 = fcmp oeq <4 x float> %a0, %a1
    594   %2 = load <4 x float>, <4 x float> *%a2, align 16
    595   %3 = fcmp oeq <4 x float> %a0, %2
    596   %4 = sext <4 x i1> %1 to <4 x i32>
    597   %5 = sext <4 x i1> %3 to <4 x i32>
    598   %6 = or <4 x i32> %4, %5
    599   %7 = bitcast <4 x i32> %6 to <4 x float>
    600   ret <4 x float> %7
    601 }
    602 
    603 define float @test_cmpss(float %a0, float %a1, float *%a2) {
    604 ; GENERIC-LABEL: test_cmpss:
    605 ; GENERIC:       # %bb.0:
    606 ; GENERIC-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
    607 ; GENERIC-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [9:1.00]
    608 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    609 ;
    610 ; ATOM-LABEL: test_cmpss:
    611 ; ATOM:       # %bb.0:
    612 ; ATOM-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [5:5.00]
    613 ; ATOM-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [5:5.00]
    614 ; ATOM-NEXT:    retq # sched: [79:39.50]
    615 ;
    616 ; SLM-LABEL: test_cmpss:
    617 ; SLM:       # %bb.0:
    618 ; SLM-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
    619 ; SLM-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
    620 ; SLM-NEXT:    retq # sched: [4:1.00]
    621 ;
    622 ; SANDY-SSE-LABEL: test_cmpss:
    623 ; SANDY-SSE:       # %bb.0:
    624 ; SANDY-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
    625 ; SANDY-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [9:1.00]
    626 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    627 ;
    628 ; SANDY-LABEL: test_cmpss:
    629 ; SANDY:       # %bb.0:
    630 ; SANDY-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    631 ; SANDY-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
    632 ; SANDY-NEXT:    retq # sched: [1:1.00]
    633 ;
    634 ; HASWELL-SSE-LABEL: test_cmpss:
    635 ; HASWELL-SSE:       # %bb.0:
    636 ; HASWELL-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
    637 ; HASWELL-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [8:1.00]
    638 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    639 ;
    640 ; HASWELL-LABEL: test_cmpss:
    641 ; HASWELL:       # %bb.0:
    642 ; HASWELL-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    643 ; HASWELL-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    644 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    645 ;
    646 ; BROADWELL-SSE-LABEL: test_cmpss:
    647 ; BROADWELL-SSE:       # %bb.0:
    648 ; BROADWELL-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
    649 ; BROADWELL-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [8:1.00]
    650 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    651 ;
    652 ; BROADWELL-LABEL: test_cmpss:
    653 ; BROADWELL:       # %bb.0:
    654 ; BROADWELL-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    655 ; BROADWELL-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
    656 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    657 ;
    658 ; SKYLAKE-SSE-LABEL: test_cmpss:
    659 ; SKYLAKE-SSE:       # %bb.0:
    660 ; SKYLAKE-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [4:0.50]
    661 ; SKYLAKE-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
    662 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    663 ;
    664 ; SKYLAKE-LABEL: test_cmpss:
    665 ; SKYLAKE:       # %bb.0:
    666 ; SKYLAKE-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    667 ; SKYLAKE-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
    668 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    669 ;
    670 ; SKX-SSE-LABEL: test_cmpss:
    671 ; SKX-SSE:       # %bb.0:
    672 ; SKX-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [4:0.50]
    673 ; SKX-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
    674 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    675 ;
    676 ; SKX-LABEL: test_cmpss:
    677 ; SKX:       # %bb.0:
    678 ; SKX-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
    679 ; SKX-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
    680 ; SKX-NEXT:    retq # sched: [7:1.00]
    681 ;
    682 ; BTVER2-SSE-LABEL: test_cmpss:
    683 ; BTVER2-SSE:       # %bb.0:
    684 ; BTVER2-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [2:1.00]
    685 ; BTVER2-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
    686 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    687 ;
    688 ; BTVER2-LABEL: test_cmpss:
    689 ; BTVER2:       # %bb.0:
    690 ; BTVER2-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
    691 ; BTVER2-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
    692 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    693 ;
    694 ; ZNVER1-SSE-LABEL: test_cmpss:
    695 ; ZNVER1-SSE:       # %bb.0:
    696 ; ZNVER1-SSE-NEXT:    cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
    697 ; ZNVER1-SSE-NEXT:    cmpeqss (%rdi), %xmm0 # sched: [10:1.00]
    698 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    699 ;
    700 ; ZNVER1-LABEL: test_cmpss:
    701 ; ZNVER1:       # %bb.0:
    702 ; ZNVER1-NEXT:    vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    703 ; ZNVER1-NEXT:    vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
    704 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    705   %1 = insertelement <4 x float> undef, float %a0, i32 0
    706   %2 = insertelement <4 x float> undef, float %a1, i32 0
    707   %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0)
    708   %4 = load float, float *%a2, align 4
    709   %5 = insertelement <4 x float> undef, float %4, i32 0
    710   %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0)
    711   %7 = extractelement <4 x float> %6, i32 0
    712   ret float %7
    713 }
    714 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
    715 
    716 define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
    717 ; GENERIC-LABEL: test_comiss:
    718 ; GENERIC:       # %bb.0:
    719 ; GENERIC-NEXT:    comiss %xmm1, %xmm0 # sched: [2:1.00]
    720 ; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
    721 ; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
    722 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
    723 ; GENERIC-NEXT:    comiss (%rdi), %xmm0 # sched: [8:1.00]
    724 ; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
    725 ; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
    726 ; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
    727 ; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
    728 ; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
    729 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    730 ;
    731 ; ATOM-LABEL: test_comiss:
    732 ; ATOM:       # %bb.0:
    733 ; ATOM-NEXT:    comiss %xmm1, %xmm0 # sched: [9:4.50]
    734 ; ATOM-NEXT:    setnp %al # sched: [1:0.50]
    735 ; ATOM-NEXT:    sete %cl # sched: [1:0.50]
    736 ; ATOM-NEXT:    andb %al, %cl # sched: [1:0.50]
    737 ; ATOM-NEXT:    comiss (%rdi), %xmm0 # sched: [10:5.00]
    738 ; ATOM-NEXT:    setnp %al # sched: [1:0.50]
    739 ; ATOM-NEXT:    sete %dl # sched: [1:0.50]
    740 ; ATOM-NEXT:    andb %al, %dl # sched: [1:0.50]
    741 ; ATOM-NEXT:    orb %cl, %dl # sched: [1:0.50]
    742 ; ATOM-NEXT:    movzbl %dl, %eax # sched: [1:1.00]
    743 ; ATOM-NEXT:    retq # sched: [79:39.50]
    744 ;
    745 ; SLM-LABEL: test_comiss:
    746 ; SLM:       # %bb.0:
    747 ; SLM-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
    748 ; SLM-NEXT:    setnp %al # sched: [1:0.50]
    749 ; SLM-NEXT:    sete %cl # sched: [1:0.50]
    750 ; SLM-NEXT:    andb %al, %cl # sched: [1:0.50]
    751 ; SLM-NEXT:    comiss (%rdi), %xmm0 # sched: [6:1.00]
    752 ; SLM-NEXT:    setnp %al # sched: [1:0.50]
    753 ; SLM-NEXT:    sete %dl # sched: [1:0.50]
    754 ; SLM-NEXT:    andb %al, %dl # sched: [1:0.50]
    755 ; SLM-NEXT:    orb %cl, %dl # sched: [1:0.50]
    756 ; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
    757 ; SLM-NEXT:    retq # sched: [4:1.00]
    758 ;
    759 ; SANDY-SSE-LABEL: test_comiss:
    760 ; SANDY-SSE:       # %bb.0:
    761 ; SANDY-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [2:1.00]
    762 ; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
    763 ; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
    764 ; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
    765 ; SANDY-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [8:1.00]
    766 ; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
    767 ; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
    768 ; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
    769 ; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
    770 ; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
    771 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    772 ;
    773 ; SANDY-LABEL: test_comiss:
    774 ; SANDY:       # %bb.0:
    775 ; SANDY-NEXT:    vcomiss %xmm1, %xmm0 # sched: [2:1.00]
    776 ; SANDY-NEXT:    setnp %al # sched: [1:0.50]
    777 ; SANDY-NEXT:    sete %cl # sched: [1:0.50]
    778 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
    779 ; SANDY-NEXT:    vcomiss (%rdi), %xmm0 # sched: [8:1.00]
    780 ; SANDY-NEXT:    setnp %al # sched: [1:0.50]
    781 ; SANDY-NEXT:    sete %dl # sched: [1:0.50]
    782 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
    783 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
    784 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
    785 ; SANDY-NEXT:    retq # sched: [1:1.00]
    786 ;
    787 ; HASWELL-SSE-LABEL: test_comiss:
    788 ; HASWELL-SSE:       # %bb.0:
    789 ; HASWELL-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
    790 ; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
    791 ; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
    792 ; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
    793 ; HASWELL-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [8:1.00]
    794 ; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
    795 ; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
    796 ; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
    797 ; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    798 ; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    799 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
    800 ;
    801 ; HASWELL-LABEL: test_comiss:
    802 ; HASWELL:       # %bb.0:
    803 ; HASWELL-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
    804 ; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
    805 ; HASWELL-NEXT:    sete %cl # sched: [1:0.50]
    806 ; HASWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
    807 ; HASWELL-NEXT:    vcomiss (%rdi), %xmm0 # sched: [8:1.00]
    808 ; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
    809 ; HASWELL-NEXT:    sete %dl # sched: [1:0.50]
    810 ; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
    811 ; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
    812 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    813 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    814 ;
    815 ; BROADWELL-SSE-LABEL: test_comiss:
    816 ; BROADWELL-SSE:       # %bb.0:
    817 ; BROADWELL-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
    818 ; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
    819 ; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
    820 ; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
    821 ; BROADWELL-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [8:1.00]
    822 ; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
    823 ; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
    824 ; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
    825 ; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    826 ; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    827 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
    828 ;
    829 ; BROADWELL-LABEL: test_comiss:
    830 ; BROADWELL:       # %bb.0:
    831 ; BROADWELL-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
    832 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
    833 ; BROADWELL-NEXT:    sete %cl # sched: [1:0.50]
    834 ; BROADWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
    835 ; BROADWELL-NEXT:    vcomiss (%rdi), %xmm0 # sched: [8:1.00]
    836 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
    837 ; BROADWELL-NEXT:    sete %dl # sched: [1:0.50]
    838 ; BROADWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
    839 ; BROADWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
    840 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    841 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    842 ;
    843 ; SKYLAKE-SSE-LABEL: test_comiss:
    844 ; SKYLAKE-SSE:       # %bb.0:
    845 ; SKYLAKE-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [2:1.00]
    846 ; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
    847 ; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
    848 ; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
    849 ; SKYLAKE-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [7:1.00]
    850 ; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
    851 ; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
    852 ; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
    853 ; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    854 ; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    855 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
    856 ;
    857 ; SKYLAKE-LABEL: test_comiss:
    858 ; SKYLAKE:       # %bb.0:
    859 ; SKYLAKE-NEXT:    vcomiss %xmm1, %xmm0 # sched: [2:1.00]
    860 ; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
    861 ; SKYLAKE-NEXT:    sete %cl # sched: [1:0.50]
    862 ; SKYLAKE-NEXT:    andb %al, %cl # sched: [1:0.25]
    863 ; SKYLAKE-NEXT:    vcomiss (%rdi), %xmm0 # sched: [7:1.00]
    864 ; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
    865 ; SKYLAKE-NEXT:    sete %dl # sched: [1:0.50]
    866 ; SKYLAKE-NEXT:    andb %al, %dl # sched: [1:0.25]
    867 ; SKYLAKE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    868 ; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    869 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    870 ;
    871 ; SKX-SSE-LABEL: test_comiss:
    872 ; SKX-SSE:       # %bb.0:
    873 ; SKX-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [2:1.00]
    874 ; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
    875 ; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
    876 ; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
    877 ; SKX-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [7:1.00]
    878 ; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
    879 ; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
    880 ; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
    881 ; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    882 ; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    883 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
    884 ;
    885 ; SKX-LABEL: test_comiss:
    886 ; SKX:       # %bb.0:
    887 ; SKX-NEXT:    vcomiss %xmm1, %xmm0 # sched: [2:1.00]
    888 ; SKX-NEXT:    setnp %al # sched: [1:0.50]
    889 ; SKX-NEXT:    sete %cl # sched: [1:0.50]
    890 ; SKX-NEXT:    andb %al, %cl # sched: [1:0.25]
    891 ; SKX-NEXT:    vcomiss (%rdi), %xmm0 # sched: [7:1.00]
    892 ; SKX-NEXT:    setnp %al # sched: [1:0.50]
    893 ; SKX-NEXT:    sete %dl # sched: [1:0.50]
    894 ; SKX-NEXT:    andb %al, %dl # sched: [1:0.25]
    895 ; SKX-NEXT:    orb %cl, %dl # sched: [1:0.25]
    896 ; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    897 ; SKX-NEXT:    retq # sched: [7:1.00]
    898 ;
    899 ; BTVER2-SSE-LABEL: test_comiss:
    900 ; BTVER2-SSE:       # %bb.0:
    901 ; BTVER2-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
    902 ; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
    903 ; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
    904 ; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
    905 ; BTVER2-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [8:1.00]
    906 ; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
    907 ; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
    908 ; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
    909 ; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
    910 ; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
    911 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
    912 ;
    913 ; BTVER2-LABEL: test_comiss:
    914 ; BTVER2:       # %bb.0:
    915 ; BTVER2-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
    916 ; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
    917 ; BTVER2-NEXT:    sete %cl # sched: [1:0.50]
    918 ; BTVER2-NEXT:    andb %al, %cl # sched: [1:0.50]
    919 ; BTVER2-NEXT:    vcomiss (%rdi), %xmm0 # sched: [8:1.00]
    920 ; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
    921 ; BTVER2-NEXT:    sete %dl # sched: [1:0.50]
    922 ; BTVER2-NEXT:    andb %al, %dl # sched: [1:0.50]
    923 ; BTVER2-NEXT:    orb %cl, %dl # sched: [1:0.50]
    924 ; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
    925 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    926 ;
    927 ; ZNVER1-SSE-LABEL: test_comiss:
    928 ; ZNVER1-SSE:       # %bb.0:
    929 ; ZNVER1-SSE-NEXT:    comiss %xmm1, %xmm0 # sched: [3:1.00]
    930 ; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
    931 ; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
    932 ; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
    933 ; ZNVER1-SSE-NEXT:    comiss (%rdi), %xmm0 # sched: [10:1.00]
    934 ; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
    935 ; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
    936 ; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
    937 ; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
    938 ; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    939 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
    940 ;
    941 ; ZNVER1-LABEL: test_comiss:
    942 ; ZNVER1:       # %bb.0:
    943 ; ZNVER1-NEXT:    vcomiss %xmm1, %xmm0 # sched: [3:1.00]
    944 ; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
    945 ; ZNVER1-NEXT:    sete %cl # sched: [1:0.25]
    946 ; ZNVER1-NEXT:    andb %al, %cl # sched: [1:0.25]
    947 ; ZNVER1-NEXT:    vcomiss (%rdi), %xmm0 # sched: [10:1.00]
    948 ; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
    949 ; ZNVER1-NEXT:    sete %dl # sched: [1:0.25]
    950 ; ZNVER1-NEXT:    andb %al, %dl # sched: [1:0.25]
    951 ; ZNVER1-NEXT:    orb %cl, %dl # sched: [1:0.25]
    952 ; ZNVER1-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
    953 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    954   %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
    955   %2 = load <4 x float>, <4 x float> *%a2, align 4
    956   %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2)
    957   %4 = or i32 %1, %3
    958   ret i32 %4
    959 }
    960 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
    961 
    962 define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
    963 ; GENERIC-LABEL: test_cvtsi2ss:
    964 ; GENERIC:       # %bb.0:
    965 ; GENERIC-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
    966 ; GENERIC-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
    967 ; GENERIC-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    968 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    969 ;
    970 ; ATOM-LABEL: test_cvtsi2ss:
    971 ; ATOM:       # %bb.0:
    972 ; ATOM-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [7:3.50]
    973 ; ATOM-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [6:3.00]
    974 ; ATOM-NEXT:    addss %xmm1, %xmm0 # sched: [5:5.00]
    975 ; ATOM-NEXT:    retq # sched: [79:39.50]
    976 ;
    977 ; SLM-LABEL: test_cvtsi2ss:
    978 ; SLM:       # %bb.0:
    979 ; SLM-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00]
    980 ; SLM-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [4:0.50]
    981 ; SLM-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    982 ; SLM-NEXT:    retq # sched: [4:1.00]
    983 ;
    984 ; SANDY-SSE-LABEL: test_cvtsi2ss:
    985 ; SANDY-SSE:       # %bb.0:
    986 ; SANDY-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
    987 ; SANDY-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
    988 ; SANDY-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
    989 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
    990 ;
    991 ; SANDY-LABEL: test_cvtsi2ss:
    992 ; SANDY:       # %bb.0:
    993 ; SANDY-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
    994 ; SANDY-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
    995 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
    996 ; SANDY-NEXT:    retq # sched: [1:1.00]
    997 ;
    998 ; HASWELL-SSE-LABEL: test_cvtsi2ss:
    999 ; HASWELL-SSE:       # %bb.0:
   1000 ; HASWELL-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [4:1.00]
   1001 ; HASWELL-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
   1002 ; HASWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1003 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1004 ;
   1005 ; HASWELL-LABEL: test_cvtsi2ss:
   1006 ; HASWELL:       # %bb.0:
   1007 ; HASWELL-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
   1008 ; HASWELL-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   1009 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1010 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1011 ;
   1012 ; BROADWELL-SSE-LABEL: test_cvtsi2ss:
   1013 ; BROADWELL-SSE:       # %bb.0:
   1014 ; BROADWELL-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [4:1.00]
   1015 ; BROADWELL-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
   1016 ; BROADWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1017 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1018 ;
   1019 ; BROADWELL-LABEL: test_cvtsi2ss:
   1020 ; BROADWELL:       # %bb.0:
   1021 ; BROADWELL-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
   1022 ; BROADWELL-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   1023 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1024 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1025 ;
   1026 ; SKYLAKE-SSE-LABEL: test_cvtsi2ss:
   1027 ; SKYLAKE-SSE:       # %bb.0:
   1028 ; SKYLAKE-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
   1029 ; SKYLAKE-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
   1030 ; SKYLAKE-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
   1031 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1032 ;
   1033 ; SKYLAKE-LABEL: test_cvtsi2ss:
   1034 ; SKYLAKE:       # %bb.0:
   1035 ; SKYLAKE-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
   1036 ; SKYLAKE-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   1037 ; SKYLAKE-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1038 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1039 ;
   1040 ; SKX-SSE-LABEL: test_cvtsi2ss:
   1041 ; SKX-SSE:       # %bb.0:
   1042 ; SKX-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
   1043 ; SKX-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
   1044 ; SKX-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
   1045 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1046 ;
   1047 ; SKX-LABEL: test_cvtsi2ss:
   1048 ; SKX:       # %bb.0:
   1049 ; SKX-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
   1050 ; SKX-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   1051 ; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1052 ; SKX-NEXT:    retq # sched: [7:1.00]
   1053 ;
   1054 ; BTVER2-SSE-LABEL: test_cvtsi2ss:
   1055 ; BTVER2-SSE:       # %bb.0:
   1056 ; BTVER2-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [14:1.00]
   1057 ; BTVER2-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [9:1.00]
   1058 ; BTVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1059 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1060 ;
   1061 ; BTVER2-LABEL: test_cvtsi2ss:
   1062 ; BTVER2:       # %bb.0:
   1063 ; BTVER2-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [9:1.00]
   1064 ; BTVER2-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [14:1.00]
   1065 ; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1066 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1067 ;
   1068 ; ZNVER1-SSE-LABEL: test_cvtsi2ss:
   1069 ; ZNVER1-SSE:       # %bb.0:
   1070 ; ZNVER1-SSE-NEXT:    cvtsi2ssl (%rsi), %xmm0 # sched: [12:1.00]
   1071 ; ZNVER1-SSE-NEXT:    cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
   1072 ; ZNVER1-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1073 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1074 ;
   1075 ; ZNVER1-LABEL: test_cvtsi2ss:
   1076 ; ZNVER1:       # %bb.0:
   1077 ; ZNVER1-NEXT:    vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
   1078 ; ZNVER1-NEXT:    vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
   1079 ; ZNVER1-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1080 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1081   %1 = sitofp i32 %a0 to float
   1082   %2 = load i32, i32 *%a1, align 4
   1083   %3 = sitofp i32 %2 to float
   1084   %4 = fadd float %1, %3
   1085   ret float %4
   1086 }
   1087 
   1088 define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
   1089 ; GENERIC-LABEL: test_cvtsi2ssq:
   1090 ; GENERIC:       # %bb.0:
   1091 ; GENERIC-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
   1092 ; GENERIC-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
   1093 ; GENERIC-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1094 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1095 ;
   1096 ; ATOM-LABEL: test_cvtsi2ssq:
   1097 ; ATOM:       # %bb.0:
   1098 ; ATOM-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [7:3.50]
   1099 ; ATOM-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [6:3.00]
   1100 ; ATOM-NEXT:    addss %xmm1, %xmm0 # sched: [5:5.00]
   1101 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1102 ;
   1103 ; SLM-LABEL: test_cvtsi2ssq:
   1104 ; SLM:       # %bb.0:
   1105 ; SLM-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00]
   1106 ; SLM-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50]
   1107 ; SLM-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1108 ; SLM-NEXT:    retq # sched: [4:1.00]
   1109 ;
   1110 ; SANDY-SSE-LABEL: test_cvtsi2ssq:
   1111 ; SANDY-SSE:       # %bb.0:
   1112 ; SANDY-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
   1113 ; SANDY-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
   1114 ; SANDY-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1115 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1116 ;
   1117 ; SANDY-LABEL: test_cvtsi2ssq:
   1118 ; SANDY:       # %bb.0:
   1119 ; SANDY-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
   1120 ; SANDY-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
   1121 ; SANDY-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1122 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1123 ;
   1124 ; HASWELL-SSE-LABEL: test_cvtsi2ssq:
   1125 ; HASWELL-SSE:       # %bb.0:
   1126 ; HASWELL-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
   1127 ; HASWELL-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
   1128 ; HASWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1129 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1130 ;
   1131 ; HASWELL-LABEL: test_cvtsi2ssq:
   1132 ; HASWELL:       # %bb.0:
   1133 ; HASWELL-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
   1134 ; HASWELL-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   1135 ; HASWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1136 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1137 ;
   1138 ; BROADWELL-SSE-LABEL: test_cvtsi2ssq:
   1139 ; BROADWELL-SSE:       # %bb.0:
   1140 ; BROADWELL-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
   1141 ; BROADWELL-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
   1142 ; BROADWELL-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1143 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1144 ;
   1145 ; BROADWELL-LABEL: test_cvtsi2ssq:
   1146 ; BROADWELL:       # %bb.0:
   1147 ; BROADWELL-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
   1148 ; BROADWELL-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   1149 ; BROADWELL-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1150 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1151 ;
   1152 ; SKYLAKE-SSE-LABEL: test_cvtsi2ssq:
   1153 ; SKYLAKE-SSE:       # %bb.0:
   1154 ; SKYLAKE-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
   1155 ; SKYLAKE-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
   1156 ; SKYLAKE-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
   1157 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1158 ;
   1159 ; SKYLAKE-LABEL: test_cvtsi2ssq:
   1160 ; SKYLAKE:       # %bb.0:
   1161 ; SKYLAKE-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
   1162 ; SKYLAKE-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   1163 ; SKYLAKE-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1164 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1165 ;
   1166 ; SKX-SSE-LABEL: test_cvtsi2ssq:
   1167 ; SKX-SSE:       # %bb.0:
   1168 ; SKX-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
   1169 ; SKX-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
   1170 ; SKX-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [4:0.50]
   1171 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1172 ;
   1173 ; SKX-LABEL: test_cvtsi2ssq:
   1174 ; SKX:       # %bb.0:
   1175 ; SKX-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
   1176 ; SKX-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
   1177 ; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   1178 ; SKX-NEXT:    retq # sched: [7:1.00]
   1179 ;
   1180 ; BTVER2-SSE-LABEL: test_cvtsi2ssq:
   1181 ; BTVER2-SSE:       # %bb.0:
   1182 ; BTVER2-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [14:1.00]
   1183 ; BTVER2-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [9:1.00]
   1184 ; BTVER2-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1185 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1186 ;
   1187 ; BTVER2-LABEL: test_cvtsi2ssq:
   1188 ; BTVER2:       # %bb.0:
   1189 ; BTVER2-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [9:1.00]
   1190 ; BTVER2-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [14:1.00]
   1191 ; BTVER2-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1192 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1193 ;
   1194 ; ZNVER1-SSE-LABEL: test_cvtsi2ssq:
   1195 ; ZNVER1-SSE:       # %bb.0:
   1196 ; ZNVER1-SSE-NEXT:    cvtsi2ssq (%rsi), %xmm0 # sched: [12:1.00]
   1197 ; ZNVER1-SSE-NEXT:    cvtsi2ssq %rdi, %xmm1 # sched: [5:1.00]
   1198 ; ZNVER1-SSE-NEXT:    addss %xmm1, %xmm0 # sched: [3:1.00]
   1199 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1200 ;
   1201 ; ZNVER1-LABEL: test_cvtsi2ssq:
   1202 ; ZNVER1:       # %bb.0:
   1203 ; ZNVER1-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
   1204 ; ZNVER1-NEXT:    vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
   1205 ; ZNVER1-NEXT:    vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   1206 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1207   %1 = sitofp i64 %a0 to float
   1208   %2 = load i64, i64 *%a1, align 8
   1209   %3 = sitofp i64 %2 to float
   1210   %4 = fadd float %1, %3
   1211   ret float %4
   1212 }
   1213 
   1214 define i32 @test_cvtss2si(float %a0, float *%a1) {
   1215 ; GENERIC-LABEL: test_cvtss2si:
   1216 ; GENERIC:       # %bb.0:
   1217 ; GENERIC-NEXT:    cvtss2si %xmm0, %ecx # sched: [5:1.00]
   1218 ; GENERIC-NEXT:    cvtss2si (%rdi), %eax # sched: [9:1.00]
   1219 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   1220 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1221 ;
   1222 ; ATOM-LABEL: test_cvtss2si:
   1223 ; ATOM:       # %bb.0:
   1224 ; ATOM-NEXT:    cvtss2si (%rdi), %eax # sched: [9:4.50]
   1225 ; ATOM-NEXT:    cvtss2si %xmm0, %ecx # sched: [8:4.00]
   1226 ; ATOM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1227 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1228 ;
   1229 ; SLM-LABEL: test_cvtss2si:
   1230 ; SLM:       # %bb.0:
   1231 ; SLM-NEXT:    cvtss2si (%rdi), %eax # sched: [7:1.00]
   1232 ; SLM-NEXT:    cvtss2si %xmm0, %ecx # sched: [4:0.50]
   1233 ; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1234 ; SLM-NEXT:    retq # sched: [4:1.00]
   1235 ;
   1236 ; SANDY-SSE-LABEL: test_cvtss2si:
   1237 ; SANDY-SSE:       # %bb.0:
   1238 ; SANDY-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [5:1.00]
   1239 ; SANDY-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [9:1.00]
   1240 ; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   1241 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1242 ;
   1243 ; SANDY-LABEL: test_cvtss2si:
   1244 ; SANDY:       # %bb.0:
   1245 ; SANDY-NEXT:    vcvtss2si %xmm0, %ecx # sched: [5:1.00]
   1246 ; SANDY-NEXT:    vcvtss2si (%rdi), %eax # sched: [10:1.00]
   1247 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   1248 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1249 ;
   1250 ; HASWELL-SSE-LABEL: test_cvtss2si:
   1251 ; HASWELL-SSE:       # %bb.0:
   1252 ; HASWELL-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [4:1.00]
   1253 ; HASWELL-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [9:1.00]
   1254 ; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1255 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1256 ;
   1257 ; HASWELL-LABEL: test_cvtss2si:
   1258 ; HASWELL:       # %bb.0:
   1259 ; HASWELL-NEXT:    vcvtss2si %xmm0, %ecx # sched: [4:1.00]
   1260 ; HASWELL-NEXT:    vcvtss2si (%rdi), %eax # sched: [9:1.00]
   1261 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1262 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1263 ;
   1264 ; BROADWELL-SSE-LABEL: test_cvtss2si:
   1265 ; BROADWELL-SSE:       # %bb.0:
   1266 ; BROADWELL-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [9:1.00]
   1267 ; BROADWELL-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [4:1.00]
   1268 ; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1269 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1270 ;
   1271 ; BROADWELL-LABEL: test_cvtss2si:
   1272 ; BROADWELL:       # %bb.0:
   1273 ; BROADWELL-NEXT:    vcvtss2si (%rdi), %eax # sched: [9:1.00]
   1274 ; BROADWELL-NEXT:    vcvtss2si %xmm0, %ecx # sched: [4:1.00]
   1275 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1276 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1277 ;
   1278 ; SKYLAKE-SSE-LABEL: test_cvtss2si:
   1279 ; SKYLAKE-SSE:       # %bb.0:
   1280 ; SKYLAKE-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [6:1.00]
   1281 ; SKYLAKE-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [11:1.00]
   1282 ; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1283 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1284 ;
   1285 ; SKYLAKE-LABEL: test_cvtss2si:
   1286 ; SKYLAKE:       # %bb.0:
   1287 ; SKYLAKE-NEXT:    vcvtss2si %xmm0, %ecx # sched: [6:1.00]
   1288 ; SKYLAKE-NEXT:    vcvtss2si (%rdi), %eax # sched: [11:1.00]
   1289 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1290 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1291 ;
   1292 ; SKX-SSE-LABEL: test_cvtss2si:
   1293 ; SKX-SSE:       # %bb.0:
   1294 ; SKX-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [6:1.00]
   1295 ; SKX-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [11:1.00]
   1296 ; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1297 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1298 ;
   1299 ; SKX-LABEL: test_cvtss2si:
   1300 ; SKX:       # %bb.0:
   1301 ; SKX-NEXT:    vcvtss2si %xmm0, %ecx # sched: [6:1.00]
   1302 ; SKX-NEXT:    vcvtss2si (%rdi), %eax # sched: [11:1.00]
   1303 ; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1304 ; SKX-NEXT:    retq # sched: [7:1.00]
   1305 ;
   1306 ; BTVER2-SSE-LABEL: test_cvtss2si:
   1307 ; BTVER2-SSE:       # %bb.0:
   1308 ; BTVER2-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [12:1.00]
   1309 ; BTVER2-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [7:1.00]
   1310 ; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1311 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1312 ;
   1313 ; BTVER2-LABEL: test_cvtss2si:
   1314 ; BTVER2:       # %bb.0:
   1315 ; BTVER2-NEXT:    vcvtss2si (%rdi), %eax # sched: [12:1.00]
   1316 ; BTVER2-NEXT:    vcvtss2si %xmm0, %ecx # sched: [7:1.00]
   1317 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1318 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1319 ;
   1320 ; ZNVER1-SSE-LABEL: test_cvtss2si:
   1321 ; ZNVER1-SSE:       # %bb.0:
   1322 ; ZNVER1-SSE-NEXT:    cvtss2si (%rdi), %eax # sched: [12:1.00]
   1323 ; ZNVER1-SSE-NEXT:    cvtss2si %xmm0, %ecx # sched: [5:1.00]
   1324 ; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1325 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1326 ;
   1327 ; ZNVER1-LABEL: test_cvtss2si:
   1328 ; ZNVER1:       # %bb.0:
   1329 ; ZNVER1-NEXT:    vcvtss2si (%rdi), %eax # sched: [12:1.00]
   1330 ; ZNVER1-NEXT:    vcvtss2si %xmm0, %ecx # sched: [5:1.00]
   1331 ; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1332 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1333   %1 = insertelement <4 x float> undef, float %a0, i32 0
   1334   %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1)
   1335   %3 = load float, float *%a1, align 4
   1336   %4 = insertelement <4 x float> undef, float %3, i32 0
   1337   %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4)
   1338   %6 = add i32 %2, %5
   1339   ret i32 %6
   1340 }
   1341 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
   1342 
   1343 define i64 @test_cvtss2siq(float %a0, float *%a1) {
   1344 ; GENERIC-LABEL: test_cvtss2siq:
   1345 ; GENERIC:       # %bb.0:
   1346 ; GENERIC-NEXT:    cvtss2si %xmm0, %rcx # sched: [5:1.00]
   1347 ; GENERIC-NEXT:    cvtss2si (%rdi), %rax # sched: [9:1.00]
   1348 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   1349 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1350 ;
   1351 ; ATOM-LABEL: test_cvtss2siq:
   1352 ; ATOM:       # %bb.0:
   1353 ; ATOM-NEXT:    cvtss2si (%rdi), %rax # sched: [10:5.00]
   1354 ; ATOM-NEXT:    cvtss2si %xmm0, %rcx # sched: [9:4.50]
   1355 ; ATOM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   1356 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1357 ;
   1358 ; SLM-LABEL: test_cvtss2siq:
   1359 ; SLM:       # %bb.0:
   1360 ; SLM-NEXT:    cvtss2si (%rdi), %rax # sched: [7:1.00]
   1361 ; SLM-NEXT:    cvtss2si %xmm0, %rcx # sched: [4:0.50]
   1362 ; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   1363 ; SLM-NEXT:    retq # sched: [4:1.00]
   1364 ;
   1365 ; SANDY-SSE-LABEL: test_cvtss2siq:
   1366 ; SANDY-SSE:       # %bb.0:
   1367 ; SANDY-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [5:1.00]
   1368 ; SANDY-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [9:1.00]
   1369 ; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   1370 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1371 ;
   1372 ; SANDY-LABEL: test_cvtss2siq:
   1373 ; SANDY:       # %bb.0:
   1374 ; SANDY-NEXT:    vcvtss2si %xmm0, %rcx # sched: [5:1.00]
   1375 ; SANDY-NEXT:    vcvtss2si (%rdi), %rax # sched: [10:1.00]
   1376 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   1377 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1378 ;
   1379 ; HASWELL-SSE-LABEL: test_cvtss2siq:
   1380 ; HASWELL-SSE:       # %bb.0:
   1381 ; HASWELL-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [4:1.00]
   1382 ; HASWELL-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [9:1.00]
   1383 ; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1384 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1385 ;
   1386 ; HASWELL-LABEL: test_cvtss2siq:
   1387 ; HASWELL:       # %bb.0:
   1388 ; HASWELL-NEXT:    vcvtss2si %xmm0, %rcx # sched: [4:1.00]
   1389 ; HASWELL-NEXT:    vcvtss2si (%rdi), %rax # sched: [9:1.00]
   1390 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1391 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1392 ;
   1393 ; BROADWELL-SSE-LABEL: test_cvtss2siq:
   1394 ; BROADWELL-SSE:       # %bb.0:
   1395 ; BROADWELL-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [9:1.00]
   1396 ; BROADWELL-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [4:1.00]
   1397 ; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1398 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1399 ;
   1400 ; BROADWELL-LABEL: test_cvtss2siq:
   1401 ; BROADWELL:       # %bb.0:
   1402 ; BROADWELL-NEXT:    vcvtss2si (%rdi), %rax # sched: [9:1.00]
   1403 ; BROADWELL-NEXT:    vcvtss2si %xmm0, %rcx # sched: [4:1.00]
   1404 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1405 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1406 ;
   1407 ; SKYLAKE-SSE-LABEL: test_cvtss2siq:
   1408 ; SKYLAKE-SSE:       # %bb.0:
   1409 ; SKYLAKE-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [6:1.00]
   1410 ; SKYLAKE-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [11:1.00]
   1411 ; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1412 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1413 ;
   1414 ; SKYLAKE-LABEL: test_cvtss2siq:
   1415 ; SKYLAKE:       # %bb.0:
   1416 ; SKYLAKE-NEXT:    vcvtss2si %xmm0, %rcx # sched: [6:1.00]
   1417 ; SKYLAKE-NEXT:    vcvtss2si (%rdi), %rax # sched: [11:1.00]
   1418 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1419 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1420 ;
   1421 ; SKX-SSE-LABEL: test_cvtss2siq:
   1422 ; SKX-SSE:       # %bb.0:
   1423 ; SKX-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [7:1.00]
   1424 ; SKX-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [11:1.00]
   1425 ; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1426 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1427 ;
   1428 ; SKX-LABEL: test_cvtss2siq:
   1429 ; SKX:       # %bb.0:
   1430 ; SKX-NEXT:    vcvtss2si %xmm0, %rcx # sched: [7:1.00]
   1431 ; SKX-NEXT:    vcvtss2si (%rdi), %rax # sched: [11:1.00]
   1432 ; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1433 ; SKX-NEXT:    retq # sched: [7:1.00]
   1434 ;
   1435 ; BTVER2-SSE-LABEL: test_cvtss2siq:
   1436 ; BTVER2-SSE:       # %bb.0:
   1437 ; BTVER2-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [12:1.00]
   1438 ; BTVER2-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [7:1.00]
   1439 ; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   1440 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1441 ;
   1442 ; BTVER2-LABEL: test_cvtss2siq:
   1443 ; BTVER2:       # %bb.0:
   1444 ; BTVER2-NEXT:    vcvtss2si (%rdi), %rax # sched: [12:1.00]
   1445 ; BTVER2-NEXT:    vcvtss2si %xmm0, %rcx # sched: [7:1.00]
   1446 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   1447 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1448 ;
   1449 ; ZNVER1-SSE-LABEL: test_cvtss2siq:
   1450 ; ZNVER1-SSE:       # %bb.0:
   1451 ; ZNVER1-SSE-NEXT:    cvtss2si (%rdi), %rax # sched: [12:1.00]
   1452 ; ZNVER1-SSE-NEXT:    cvtss2si %xmm0, %rcx # sched: [5:1.00]
   1453 ; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1454 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1455 ;
   1456 ; ZNVER1-LABEL: test_cvtss2siq:
   1457 ; ZNVER1:       # %bb.0:
   1458 ; ZNVER1-NEXT:    vcvtss2si (%rdi), %rax # sched: [12:1.00]
   1459 ; ZNVER1-NEXT:    vcvtss2si %xmm0, %rcx # sched: [5:1.00]
   1460 ; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1461 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1462   %1 = insertelement <4 x float> undef, float %a0, i32 0
   1463   %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1)
   1464   %3 = load float, float *%a1, align 4
   1465   %4 = insertelement <4 x float> undef, float %3, i32 0
   1466   %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4)
   1467   %6 = add i64 %2, %5
   1468   ret i64 %6
   1469 }
   1470 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
   1471 
   1472 define i32 @test_cvttss2si(float %a0, float *%a1) {
   1473 ; GENERIC-LABEL: test_cvttss2si:
   1474 ; GENERIC:       # %bb.0:
   1475 ; GENERIC-NEXT:    cvttss2si %xmm0, %ecx # sched: [5:1.00]
   1476 ; GENERIC-NEXT:    cvttss2si (%rdi), %eax # sched: [9:1.00]
   1477 ; GENERIC-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   1478 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1479 ;
   1480 ; ATOM-LABEL: test_cvttss2si:
   1481 ; ATOM:       # %bb.0:
   1482 ; ATOM-NEXT:    cvttss2si (%rdi), %eax # sched: [9:4.50]
   1483 ; ATOM-NEXT:    cvttss2si %xmm0, %ecx # sched: [8:4.00]
   1484 ; ATOM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1485 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1486 ;
   1487 ; SLM-LABEL: test_cvttss2si:
   1488 ; SLM:       # %bb.0:
   1489 ; SLM-NEXT:    cvttss2si (%rdi), %eax # sched: [7:1.00]
   1490 ; SLM-NEXT:    cvttss2si %xmm0, %ecx # sched: [4:0.50]
   1491 ; SLM-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1492 ; SLM-NEXT:    retq # sched: [4:1.00]
   1493 ;
   1494 ; SANDY-SSE-LABEL: test_cvttss2si:
   1495 ; SANDY-SSE:       # %bb.0:
   1496 ; SANDY-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [5:1.00]
   1497 ; SANDY-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [9:1.00]
   1498 ; SANDY-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   1499 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1500 ;
   1501 ; SANDY-LABEL: test_cvttss2si:
   1502 ; SANDY:       # %bb.0:
   1503 ; SANDY-NEXT:    vcvttss2si %xmm0, %ecx # sched: [5:1.00]
   1504 ; SANDY-NEXT:    vcvttss2si (%rdi), %eax # sched: [10:1.00]
   1505 ; SANDY-NEXT:    addl %ecx, %eax # sched: [1:0.33]
   1506 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1507 ;
   1508 ; HASWELL-SSE-LABEL: test_cvttss2si:
   1509 ; HASWELL-SSE:       # %bb.0:
   1510 ; HASWELL-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [4:1.00]
   1511 ; HASWELL-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [9:1.00]
   1512 ; HASWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1513 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1514 ;
   1515 ; HASWELL-LABEL: test_cvttss2si:
   1516 ; HASWELL:       # %bb.0:
   1517 ; HASWELL-NEXT:    vcvttss2si %xmm0, %ecx # sched: [4:1.00]
   1518 ; HASWELL-NEXT:    vcvttss2si (%rdi), %eax # sched: [9:1.00]
   1519 ; HASWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1520 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1521 ;
   1522 ; BROADWELL-SSE-LABEL: test_cvttss2si:
   1523 ; BROADWELL-SSE:       # %bb.0:
   1524 ; BROADWELL-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [9:1.00]
   1525 ; BROADWELL-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [4:1.00]
   1526 ; BROADWELL-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1527 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1528 ;
   1529 ; BROADWELL-LABEL: test_cvttss2si:
   1530 ; BROADWELL:       # %bb.0:
   1531 ; BROADWELL-NEXT:    vcvttss2si (%rdi), %eax # sched: [9:1.00]
   1532 ; BROADWELL-NEXT:    vcvttss2si %xmm0, %ecx # sched: [4:1.00]
   1533 ; BROADWELL-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1534 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1535 ;
   1536 ; SKYLAKE-SSE-LABEL: test_cvttss2si:
   1537 ; SKYLAKE-SSE:       # %bb.0:
   1538 ; SKYLAKE-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [7:1.00]
   1539 ; SKYLAKE-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [11:1.00]
   1540 ; SKYLAKE-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1541 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1542 ;
   1543 ; SKYLAKE-LABEL: test_cvttss2si:
   1544 ; SKYLAKE:       # %bb.0:
   1545 ; SKYLAKE-NEXT:    vcvttss2si %xmm0, %ecx # sched: [7:1.00]
   1546 ; SKYLAKE-NEXT:    vcvttss2si (%rdi), %eax # sched: [11:1.00]
   1547 ; SKYLAKE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1548 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1549 ;
   1550 ; SKX-SSE-LABEL: test_cvttss2si:
   1551 ; SKX-SSE:       # %bb.0:
   1552 ; SKX-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [6:1.00]
   1553 ; SKX-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [11:1.00]
   1554 ; SKX-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1555 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1556 ;
   1557 ; SKX-LABEL: test_cvttss2si:
   1558 ; SKX:       # %bb.0:
   1559 ; SKX-NEXT:    vcvttss2si %xmm0, %ecx # sched: [6:1.00]
   1560 ; SKX-NEXT:    vcvttss2si (%rdi), %eax # sched: [11:1.00]
   1561 ; SKX-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1562 ; SKX-NEXT:    retq # sched: [7:1.00]
   1563 ;
   1564 ; BTVER2-SSE-LABEL: test_cvttss2si:
   1565 ; BTVER2-SSE:       # %bb.0:
   1566 ; BTVER2-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [12:1.00]
   1567 ; BTVER2-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [7:1.00]
   1568 ; BTVER2-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1569 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1570 ;
   1571 ; BTVER2-LABEL: test_cvttss2si:
   1572 ; BTVER2:       # %bb.0:
   1573 ; BTVER2-NEXT:    vcvttss2si (%rdi), %eax # sched: [12:1.00]
   1574 ; BTVER2-NEXT:    vcvttss2si %xmm0, %ecx # sched: [7:1.00]
   1575 ; BTVER2-NEXT:    addl %ecx, %eax # sched: [1:0.50]
   1576 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1577 ;
   1578 ; ZNVER1-SSE-LABEL: test_cvttss2si:
   1579 ; ZNVER1-SSE:       # %bb.0:
   1580 ; ZNVER1-SSE-NEXT:    cvttss2si (%rdi), %eax # sched: [12:1.00]
   1581 ; ZNVER1-SSE-NEXT:    cvttss2si %xmm0, %ecx # sched: [5:1.00]
   1582 ; ZNVER1-SSE-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1583 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1584 ;
   1585 ; ZNVER1-LABEL: test_cvttss2si:
   1586 ; ZNVER1:       # %bb.0:
   1587 ; ZNVER1-NEXT:    vcvttss2si (%rdi), %eax # sched: [12:1.00]
   1588 ; ZNVER1-NEXT:    vcvttss2si %xmm0, %ecx # sched: [5:1.00]
   1589 ; ZNVER1-NEXT:    addl %ecx, %eax # sched: [1:0.25]
   1590 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1591   %1 = fptosi float %a0 to i32
   1592   %2 = load float, float *%a1, align 4
   1593   %3 = fptosi float %2 to i32
   1594   %4 = add i32 %1, %3
   1595   ret i32 %4
   1596 }
   1597 
   1598 define i64 @test_cvttss2siq(float %a0, float *%a1) {
   1599 ; GENERIC-LABEL: test_cvttss2siq:
   1600 ; GENERIC:       # %bb.0:
   1601 ; GENERIC-NEXT:    cvttss2si %xmm0, %rcx # sched: [5:1.00]
   1602 ; GENERIC-NEXT:    cvttss2si (%rdi), %rax # sched: [9:1.00]
   1603 ; GENERIC-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   1604 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1605 ;
   1606 ; ATOM-LABEL: test_cvttss2siq:
   1607 ; ATOM:       # %bb.0:
   1608 ; ATOM-NEXT:    cvttss2si (%rdi), %rax # sched: [10:5.00]
   1609 ; ATOM-NEXT:    cvttss2si %xmm0, %rcx # sched: [9:4.50]
   1610 ; ATOM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   1611 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1612 ;
   1613 ; SLM-LABEL: test_cvttss2siq:
   1614 ; SLM:       # %bb.0:
   1615 ; SLM-NEXT:    cvttss2si (%rdi), %rax # sched: [7:1.00]
   1616 ; SLM-NEXT:    cvttss2si %xmm0, %rcx # sched: [4:0.50]
   1617 ; SLM-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   1618 ; SLM-NEXT:    retq # sched: [4:1.00]
   1619 ;
   1620 ; SANDY-SSE-LABEL: test_cvttss2siq:
   1621 ; SANDY-SSE:       # %bb.0:
   1622 ; SANDY-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [5:1.00]
   1623 ; SANDY-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [9:1.00]
   1624 ; SANDY-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   1625 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1626 ;
   1627 ; SANDY-LABEL: test_cvttss2siq:
   1628 ; SANDY:       # %bb.0:
   1629 ; SANDY-NEXT:    vcvttss2si %xmm0, %rcx # sched: [5:1.00]
   1630 ; SANDY-NEXT:    vcvttss2si (%rdi), %rax # sched: [10:1.00]
   1631 ; SANDY-NEXT:    addq %rcx, %rax # sched: [1:0.33]
   1632 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1633 ;
   1634 ; HASWELL-SSE-LABEL: test_cvttss2siq:
   1635 ; HASWELL-SSE:       # %bb.0:
   1636 ; HASWELL-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [10:1.00]
   1637 ; HASWELL-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [4:1.00]
   1638 ; HASWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1639 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1640 ;
   1641 ; HASWELL-LABEL: test_cvttss2siq:
   1642 ; HASWELL:       # %bb.0:
   1643 ; HASWELL-NEXT:    vcvttss2si %xmm0, %rcx # sched: [4:1.00]
   1644 ; HASWELL-NEXT:    vcvttss2si (%rdi), %rax # sched: [9:1.00]
   1645 ; HASWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1646 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1647 ;
   1648 ; BROADWELL-SSE-LABEL: test_cvttss2siq:
   1649 ; BROADWELL-SSE:       # %bb.0:
   1650 ; BROADWELL-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [10:1.00]
   1651 ; BROADWELL-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [4:1.00]
   1652 ; BROADWELL-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1653 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1654 ;
   1655 ; BROADWELL-LABEL: test_cvttss2siq:
   1656 ; BROADWELL:       # %bb.0:
   1657 ; BROADWELL-NEXT:    vcvttss2si (%rdi), %rax # sched: [9:1.00]
   1658 ; BROADWELL-NEXT:    vcvttss2si %xmm0, %rcx # sched: [4:1.00]
   1659 ; BROADWELL-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1660 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1661 ;
   1662 ; SKYLAKE-SSE-LABEL: test_cvttss2siq:
   1663 ; SKYLAKE-SSE:       # %bb.0:
   1664 ; SKYLAKE-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [7:1.00]
   1665 ; SKYLAKE-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [12:1.00]
   1666 ; SKYLAKE-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1667 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1668 ;
   1669 ; SKYLAKE-LABEL: test_cvttss2siq:
   1670 ; SKYLAKE:       # %bb.0:
   1671 ; SKYLAKE-NEXT:    vcvttss2si %xmm0, %rcx # sched: [7:1.00]
   1672 ; SKYLAKE-NEXT:    vcvttss2si (%rdi), %rax # sched: [11:1.00]
   1673 ; SKYLAKE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1674 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1675 ;
   1676 ; SKX-SSE-LABEL: test_cvttss2siq:
   1677 ; SKX-SSE:       # %bb.0:
   1678 ; SKX-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [7:1.00]
   1679 ; SKX-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [12:1.00]
   1680 ; SKX-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1681 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1682 ;
   1683 ; SKX-LABEL: test_cvttss2siq:
   1684 ; SKX:       # %bb.0:
   1685 ; SKX-NEXT:    vcvttss2si %xmm0, %rcx # sched: [7:1.00]
   1686 ; SKX-NEXT:    vcvttss2si (%rdi), %rax # sched: [11:1.00]
   1687 ; SKX-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1688 ; SKX-NEXT:    retq # sched: [7:1.00]
   1689 ;
   1690 ; BTVER2-SSE-LABEL: test_cvttss2siq:
   1691 ; BTVER2-SSE:       # %bb.0:
   1692 ; BTVER2-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [12:1.00]
   1693 ; BTVER2-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [7:1.00]
   1694 ; BTVER2-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   1695 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1696 ;
   1697 ; BTVER2-LABEL: test_cvttss2siq:
   1698 ; BTVER2:       # %bb.0:
   1699 ; BTVER2-NEXT:    vcvttss2si (%rdi), %rax # sched: [12:1.00]
   1700 ; BTVER2-NEXT:    vcvttss2si %xmm0, %rcx # sched: [7:1.00]
   1701 ; BTVER2-NEXT:    addq %rcx, %rax # sched: [1:0.50]
   1702 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1703 ;
   1704 ; ZNVER1-SSE-LABEL: test_cvttss2siq:
   1705 ; ZNVER1-SSE:       # %bb.0:
   1706 ; ZNVER1-SSE-NEXT:    cvttss2si (%rdi), %rax # sched: [12:1.00]
   1707 ; ZNVER1-SSE-NEXT:    cvttss2si %xmm0, %rcx # sched: [5:1.00]
   1708 ; ZNVER1-SSE-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1709 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1710 ;
   1711 ; ZNVER1-LABEL: test_cvttss2siq:
   1712 ; ZNVER1:       # %bb.0:
   1713 ; ZNVER1-NEXT:    vcvttss2si (%rdi), %rax # sched: [12:1.00]
   1714 ; ZNVER1-NEXT:    vcvttss2si %xmm0, %rcx # sched: [5:1.00]
   1715 ; ZNVER1-NEXT:    addq %rcx, %rax # sched: [1:0.25]
   1716 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1717   %1 = fptosi float %a0 to i64
   1718   %2 = load float, float *%a1, align 4
   1719   %3 = fptosi float %2 to i64
   1720   %4 = add i64 %1, %3
   1721   ret i64 %4
   1722 }
   1723 
   1724 define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   1725 ; GENERIC-LABEL: test_divps:
   1726 ; GENERIC:       # %bb.0:
   1727 ; GENERIC-NEXT:    divps %xmm1, %xmm0 # sched: [14:14.00]
   1728 ; GENERIC-NEXT:    divps (%rdi), %xmm0 # sched: [20:14.00]
   1729 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1730 ;
   1731 ; ATOM-LABEL: test_divps:
   1732 ; ATOM:       # %bb.0:
   1733 ; ATOM-NEXT:    divps %xmm1, %xmm0 # sched: [70:35.00]
   1734 ; ATOM-NEXT:    divps (%rdi), %xmm0 # sched: [70:35.00]
   1735 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1736 ;
   1737 ; SLM-LABEL: test_divps:
   1738 ; SLM:       # %bb.0:
   1739 ; SLM-NEXT:    divps %xmm1, %xmm0 # sched: [39:39.00]
   1740 ; SLM-NEXT:    divps (%rdi), %xmm0 # sched: [42:39.00]
   1741 ; SLM-NEXT:    retq # sched: [4:1.00]
   1742 ;
   1743 ; SANDY-SSE-LABEL: test_divps:
   1744 ; SANDY-SSE:       # %bb.0:
   1745 ; SANDY-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [14:14.00]
   1746 ; SANDY-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [20:14.00]
   1747 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1748 ;
   1749 ; SANDY-LABEL: test_divps:
   1750 ; SANDY:       # %bb.0:
   1751 ; SANDY-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
   1752 ; SANDY-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
   1753 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1754 ;
   1755 ; HASWELL-SSE-LABEL: test_divps:
   1756 ; HASWELL-SSE:       # %bb.0:
   1757 ; HASWELL-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [13:7.00]
   1758 ; HASWELL-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [19:7.00]
   1759 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1760 ;
   1761 ; HASWELL-LABEL: test_divps:
   1762 ; HASWELL:       # %bb.0:
   1763 ; HASWELL-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [13:7.00]
   1764 ; HASWELL-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [19:7.00]
   1765 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1766 ;
   1767 ; BROADWELL-SSE-LABEL: test_divps:
   1768 ; BROADWELL-SSE:       # %bb.0:
   1769 ; BROADWELL-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [11:5.00]
   1770 ; BROADWELL-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [16:5.00]
   1771 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1772 ;
   1773 ; BROADWELL-LABEL: test_divps:
   1774 ; BROADWELL:       # %bb.0:
   1775 ; BROADWELL-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [11:5.00]
   1776 ; BROADWELL-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [16:5.00]
   1777 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1778 ;
   1779 ; SKYLAKE-SSE-LABEL: test_divps:
   1780 ; SKYLAKE-SSE:       # %bb.0:
   1781 ; SKYLAKE-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [11:3.00]
   1782 ; SKYLAKE-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [17:5.00]
   1783 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1784 ;
   1785 ; SKYLAKE-LABEL: test_divps:
   1786 ; SKYLAKE:       # %bb.0:
   1787 ; SKYLAKE-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
   1788 ; SKYLAKE-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00]
   1789 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1790 ;
   1791 ; SKX-SSE-LABEL: test_divps:
   1792 ; SKX-SSE:       # %bb.0:
   1793 ; SKX-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [11:3.00]
   1794 ; SKX-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [17:5.00]
   1795 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1796 ;
   1797 ; SKX-LABEL: test_divps:
   1798 ; SKX:       # %bb.0:
   1799 ; SKX-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
   1800 ; SKX-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00]
   1801 ; SKX-NEXT:    retq # sched: [7:1.00]
   1802 ;
   1803 ; BTVER2-SSE-LABEL: test_divps:
   1804 ; BTVER2-SSE:       # %bb.0:
   1805 ; BTVER2-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [19:19.00]
   1806 ; BTVER2-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [24:19.00]
   1807 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1808 ;
   1809 ; BTVER2-LABEL: test_divps:
   1810 ; BTVER2:       # %bb.0:
   1811 ; BTVER2-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
   1812 ; BTVER2-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
   1813 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1814 ;
   1815 ; ZNVER1-SSE-LABEL: test_divps:
   1816 ; ZNVER1-SSE:       # %bb.0:
   1817 ; ZNVER1-SSE-NEXT:    divps %xmm1, %xmm0 # sched: [15:1.00]
   1818 ; ZNVER1-SSE-NEXT:    divps (%rdi), %xmm0 # sched: [22:1.00]
   1819 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1820 ;
   1821 ; ZNVER1-LABEL: test_divps:
   1822 ; ZNVER1:       # %bb.0:
   1823 ; ZNVER1-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
   1824 ; ZNVER1-NEXT:    vdivps (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
   1825 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1826   %1 = fdiv <4 x float> %a0, %a1
   1827   %2 = load <4 x float>, <4 x float> *%a2, align 16
   1828   %3 = fdiv <4 x float> %1, %2
   1829   ret <4 x float> %3
   1830 }
   1831 
   1832 define float @test_divss(float %a0, float %a1, float *%a2) {
   1833 ; GENERIC-LABEL: test_divss:
   1834 ; GENERIC:       # %bb.0:
   1835 ; GENERIC-NEXT:    divss %xmm1, %xmm0 # sched: [14:14.00]
   1836 ; GENERIC-NEXT:    divss (%rdi), %xmm0 # sched: [20:14.00]
   1837 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1838 ;
   1839 ; ATOM-LABEL: test_divss:
   1840 ; ATOM:       # %bb.0:
   1841 ; ATOM-NEXT:    divss %xmm1, %xmm0 # sched: [34:17.00]
   1842 ; ATOM-NEXT:    divss (%rdi), %xmm0 # sched: [34:17.00]
   1843 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1844 ;
   1845 ; SLM-LABEL: test_divss:
   1846 ; SLM:       # %bb.0:
   1847 ; SLM-NEXT:    divss %xmm1, %xmm0 # sched: [19:17.00]
   1848 ; SLM-NEXT:    divss (%rdi), %xmm0 # sched: [22:17.00]
   1849 ; SLM-NEXT:    retq # sched: [4:1.00]
   1850 ;
   1851 ; SANDY-SSE-LABEL: test_divss:
   1852 ; SANDY-SSE:       # %bb.0:
   1853 ; SANDY-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [14:14.00]
   1854 ; SANDY-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [20:14.00]
   1855 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1856 ;
   1857 ; SANDY-LABEL: test_divss:
   1858 ; SANDY:       # %bb.0:
   1859 ; SANDY-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
   1860 ; SANDY-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
   1861 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1862 ;
   1863 ; HASWELL-SSE-LABEL: test_divss:
   1864 ; HASWELL-SSE:       # %bb.0:
   1865 ; HASWELL-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [13:7.00]
   1866 ; HASWELL-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [18:7.00]
   1867 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1868 ;
   1869 ; HASWELL-LABEL: test_divss:
   1870 ; HASWELL:       # %bb.0:
   1871 ; HASWELL-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [13:7.00]
   1872 ; HASWELL-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [18:7.00]
   1873 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1874 ;
   1875 ; BROADWELL-SSE-LABEL: test_divss:
   1876 ; BROADWELL-SSE:       # %bb.0:
   1877 ; BROADWELL-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [11:3.00]
   1878 ; BROADWELL-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [16:5.00]
   1879 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1880 ;
   1881 ; BROADWELL-LABEL: test_divss:
   1882 ; BROADWELL:       # %bb.0:
   1883 ; BROADWELL-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
   1884 ; BROADWELL-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [16:5.00]
   1885 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1886 ;
   1887 ; SKYLAKE-SSE-LABEL: test_divss:
   1888 ; SKYLAKE-SSE:       # %bb.0:
   1889 ; SKYLAKE-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [11:3.00]
   1890 ; SKYLAKE-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [16:3.00]
   1891 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   1892 ;
   1893 ; SKYLAKE-LABEL: test_divss:
   1894 ; SKYLAKE:       # %bb.0:
   1895 ; SKYLAKE-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
   1896 ; SKYLAKE-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
   1897 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1898 ;
   1899 ; SKX-SSE-LABEL: test_divss:
   1900 ; SKX-SSE:       # %bb.0:
   1901 ; SKX-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [11:3.00]
   1902 ; SKX-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [16:3.00]
   1903 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   1904 ;
   1905 ; SKX-LABEL: test_divss:
   1906 ; SKX:       # %bb.0:
   1907 ; SKX-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
   1908 ; SKX-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
   1909 ; SKX-NEXT:    retq # sched: [7:1.00]
   1910 ;
   1911 ; BTVER2-SSE-LABEL: test_divss:
   1912 ; BTVER2-SSE:       # %bb.0:
   1913 ; BTVER2-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [19:19.00]
   1914 ; BTVER2-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [24:19.00]
   1915 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   1916 ;
   1917 ; BTVER2-LABEL: test_divss:
   1918 ; BTVER2:       # %bb.0:
   1919 ; BTVER2-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
   1920 ; BTVER2-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
   1921 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1922 ;
   1923 ; ZNVER1-SSE-LABEL: test_divss:
   1924 ; ZNVER1-SSE:       # %bb.0:
   1925 ; ZNVER1-SSE-NEXT:    divss %xmm1, %xmm0 # sched: [15:1.00]
   1926 ; ZNVER1-SSE-NEXT:    divss (%rdi), %xmm0 # sched: [22:1.00]
   1927 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   1928 ;
   1929 ; ZNVER1-LABEL: test_divss:
   1930 ; ZNVER1:       # %bb.0:
   1931 ; ZNVER1-NEXT:    vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
   1932 ; ZNVER1-NEXT:    vdivss (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
   1933 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1934   %1 = fdiv float %a0, %a1
   1935   %2 = load float, float *%a2, align 4
   1936   %3 = fdiv float %1, %2
   1937   ret float %3
   1938 }
   1939 
   1940 define void @test_ldmxcsr(i32 %a0) {
   1941 ; GENERIC-LABEL: test_ldmxcsr:
   1942 ; GENERIC:       # %bb.0:
   1943 ; GENERIC-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1944 ; GENERIC-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
   1945 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1946 ;
   1947 ; ATOM-LABEL: test_ldmxcsr:
   1948 ; ATOM:       # %bb.0:
   1949 ; ATOM-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1950 ; ATOM-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:2.50]
   1951 ; ATOM-NEXT:    retq # sched: [79:39.50]
   1952 ;
   1953 ; SLM-LABEL: test_ldmxcsr:
   1954 ; SLM:       # %bb.0:
   1955 ; SLM-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1956 ; SLM-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
   1957 ; SLM-NEXT:    retq # sched: [4:1.00]
   1958 ;
   1959 ; SANDY-SSE-LABEL: test_ldmxcsr:
   1960 ; SANDY-SSE:       # %bb.0:
   1961 ; SANDY-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1962 ; SANDY-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
   1963 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   1964 ;
   1965 ; SANDY-LABEL: test_ldmxcsr:
   1966 ; SANDY:       # %bb.0:
   1967 ; SANDY-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1968 ; SANDY-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
   1969 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1970 ;
   1971 ; HASWELL-SSE-LABEL: test_ldmxcsr:
   1972 ; HASWELL-SSE:       # %bb.0:
   1973 ; HASWELL-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1974 ; HASWELL-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
   1975 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1976 ;
   1977 ; HASWELL-LABEL: test_ldmxcsr:
   1978 ; HASWELL:       # %bb.0:
   1979 ; HASWELL-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1980 ; HASWELL-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
   1981 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1982 ;
   1983 ; BROADWELL-SSE-LABEL: test_ldmxcsr:
   1984 ; BROADWELL-SSE:       # %bb.0:
   1985 ; BROADWELL-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1986 ; BROADWELL-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
   1987 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   1988 ;
   1989 ; BROADWELL-LABEL: test_ldmxcsr:
   1990 ; BROADWELL:       # %bb.0:
   1991 ; BROADWELL-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1992 ; BROADWELL-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
   1993 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1994 ;
   1995 ; SKYLAKE-SSE-LABEL: test_ldmxcsr:
   1996 ; SKYLAKE-SSE:       # %bb.0:
   1997 ; SKYLAKE-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   1998 ; SKYLAKE-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
   1999 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2000 ;
   2001 ; SKYLAKE-LABEL: test_ldmxcsr:
   2002 ; SKYLAKE:       # %bb.0:
   2003 ; SKYLAKE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   2004 ; SKYLAKE-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
   2005 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2006 ;
   2007 ; SKX-SSE-LABEL: test_ldmxcsr:
   2008 ; SKX-SSE:       # %bb.0:
   2009 ; SKX-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   2010 ; SKX-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
   2011 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2012 ;
   2013 ; SKX-LABEL: test_ldmxcsr:
   2014 ; SKX:       # %bb.0:
   2015 ; SKX-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   2016 ; SKX-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
   2017 ; SKX-NEXT:    retq # sched: [7:1.00]
   2018 ;
   2019 ; BTVER2-SSE-LABEL: test_ldmxcsr:
   2020 ; BTVER2-SSE:       # %bb.0:
   2021 ; BTVER2-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   2022 ; BTVER2-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
   2023 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2024 ;
   2025 ; BTVER2-LABEL: test_ldmxcsr:
   2026 ; BTVER2:       # %bb.0:
   2027 ; BTVER2-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   2028 ; BTVER2-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
   2029 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2030 ;
   2031 ; ZNVER1-SSE-LABEL: test_ldmxcsr:
   2032 ; ZNVER1-SSE:       # %bb.0:
   2033 ; ZNVER1-SSE-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
   2034 ; ZNVER1-SSE-NEXT:    ldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25]
   2035 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2036 ;
   2037 ; ZNVER1-LABEL: test_ldmxcsr:
   2038 ; ZNVER1:       # %bb.0:
   2039 ; ZNVER1-NEXT:    movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
   2040 ; ZNVER1-NEXT:    vldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25]
   2041 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2042   %1 = alloca i32, align 4
   2043   %2 = bitcast i32* %1 to i8*
   2044   store i32 %a0, i32* %1
   2045   call void @llvm.x86.sse.ldmxcsr(i8* %2)
   2046   ret void
   2047 }
   2048 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
   2049 
   2050 define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   2051 ; GENERIC-LABEL: test_maxps:
   2052 ; GENERIC:       # %bb.0:
   2053 ; GENERIC-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
   2054 ; GENERIC-NEXT:    maxps (%rdi), %xmm0 # sched: [9:1.00]
   2055 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2056 ;
   2057 ; ATOM-LABEL: test_maxps:
   2058 ; ATOM:       # %bb.0:
   2059 ; ATOM-NEXT:    maxps %xmm1, %xmm0 # sched: [5:5.00]
   2060 ; ATOM-NEXT:    maxps (%rdi), %xmm0 # sched: [5:5.00]
   2061 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2062 ;
   2063 ; SLM-LABEL: test_maxps:
   2064 ; SLM:       # %bb.0:
   2065 ; SLM-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
   2066 ; SLM-NEXT:    maxps (%rdi), %xmm0 # sched: [6:1.00]
   2067 ; SLM-NEXT:    retq # sched: [4:1.00]
   2068 ;
   2069 ; SANDY-SSE-LABEL: test_maxps:
   2070 ; SANDY-SSE:       # %bb.0:
   2071 ; SANDY-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
   2072 ; SANDY-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [9:1.00]
   2073 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2074 ;
   2075 ; SANDY-LABEL: test_maxps:
   2076 ; SANDY:       # %bb.0:
   2077 ; SANDY-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2078 ; SANDY-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   2079 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2080 ;
   2081 ; HASWELL-SSE-LABEL: test_maxps:
   2082 ; HASWELL-SSE:       # %bb.0:
   2083 ; HASWELL-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
   2084 ; HASWELL-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [9:1.00]
   2085 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2086 ;
   2087 ; HASWELL-LABEL: test_maxps:
   2088 ; HASWELL:       # %bb.0:
   2089 ; HASWELL-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2090 ; HASWELL-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   2091 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2092 ;
   2093 ; BROADWELL-SSE-LABEL: test_maxps:
   2094 ; BROADWELL-SSE:       # %bb.0:
   2095 ; BROADWELL-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
   2096 ; BROADWELL-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [8:1.00]
   2097 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2098 ;
   2099 ; BROADWELL-LABEL: test_maxps:
   2100 ; BROADWELL:       # %bb.0:
   2101 ; BROADWELL-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2102 ; BROADWELL-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   2103 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2104 ;
   2105 ; SKYLAKE-SSE-LABEL: test_maxps:
   2106 ; SKYLAKE-SSE:       # %bb.0:
   2107 ; SKYLAKE-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [4:0.50]
   2108 ; SKYLAKE-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [10:0.50]
   2109 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2110 ;
   2111 ; SKYLAKE-LABEL: test_maxps:
   2112 ; SKYLAKE:       # %bb.0:
   2113 ; SKYLAKE-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2114 ; SKYLAKE-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   2115 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2116 ;
   2117 ; SKX-SSE-LABEL: test_maxps:
   2118 ; SKX-SSE:       # %bb.0:
   2119 ; SKX-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [4:0.50]
   2120 ; SKX-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [10:0.50]
   2121 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2122 ;
   2123 ; SKX-LABEL: test_maxps:
   2124 ; SKX:       # %bb.0:
   2125 ; SKX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2126 ; SKX-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   2127 ; SKX-NEXT:    retq # sched: [7:1.00]
   2128 ;
   2129 ; BTVER2-SSE-LABEL: test_maxps:
   2130 ; BTVER2-SSE:       # %bb.0:
   2131 ; BTVER2-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [2:1.00]
   2132 ; BTVER2-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [7:1.00]
   2133 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2134 ;
   2135 ; BTVER2-LABEL: test_maxps:
   2136 ; BTVER2:       # %bb.0:
   2137 ; BTVER2-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   2138 ; BTVER2-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   2139 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2140 ;
   2141 ; ZNVER1-SSE-LABEL: test_maxps:
   2142 ; ZNVER1-SSE:       # %bb.0:
   2143 ; ZNVER1-SSE-NEXT:    maxps %xmm1, %xmm0 # sched: [3:1.00]
   2144 ; ZNVER1-SSE-NEXT:    maxps (%rdi), %xmm0 # sched: [10:1.00]
   2145 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2146 ;
   2147 ; ZNVER1-LABEL: test_maxps:
   2148 ; ZNVER1:       # %bb.0:
   2149 ; ZNVER1-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2150 ; ZNVER1-NEXT:    vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   2151 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2152   %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
   2153   %2 = load <4 x float>, <4 x float> *%a2, align 16
   2154   %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2)
   2155   ret <4 x float> %3
   2156 }
   2157 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
   2158 
   2159 define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   2160 ; GENERIC-LABEL: test_maxss:
   2161 ; GENERIC:       # %bb.0:
   2162 ; GENERIC-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
   2163 ; GENERIC-NEXT:    maxss (%rdi), %xmm0 # sched: [9:1.00]
   2164 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2165 ;
   2166 ; ATOM-LABEL: test_maxss:
   2167 ; ATOM:       # %bb.0:
   2168 ; ATOM-NEXT:    maxss %xmm1, %xmm0 # sched: [5:5.00]
   2169 ; ATOM-NEXT:    maxss (%rdi), %xmm0 # sched: [5:5.00]
   2170 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2171 ;
   2172 ; SLM-LABEL: test_maxss:
   2173 ; SLM:       # %bb.0:
   2174 ; SLM-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
   2175 ; SLM-NEXT:    maxss (%rdi), %xmm0 # sched: [6:1.00]
   2176 ; SLM-NEXT:    retq # sched: [4:1.00]
   2177 ;
   2178 ; SANDY-SSE-LABEL: test_maxss:
   2179 ; SANDY-SSE:       # %bb.0:
   2180 ; SANDY-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
   2181 ; SANDY-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [9:1.00]
   2182 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2183 ;
   2184 ; SANDY-LABEL: test_maxss:
   2185 ; SANDY:       # %bb.0:
   2186 ; SANDY-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2187 ; SANDY-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   2188 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2189 ;
   2190 ; HASWELL-SSE-LABEL: test_maxss:
   2191 ; HASWELL-SSE:       # %bb.0:
   2192 ; HASWELL-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
   2193 ; HASWELL-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [8:1.00]
   2194 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2195 ;
   2196 ; HASWELL-LABEL: test_maxss:
   2197 ; HASWELL:       # %bb.0:
   2198 ; HASWELL-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2199 ; HASWELL-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   2200 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2201 ;
   2202 ; BROADWELL-SSE-LABEL: test_maxss:
   2203 ; BROADWELL-SSE:       # %bb.0:
   2204 ; BROADWELL-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
   2205 ; BROADWELL-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [8:1.00]
   2206 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2207 ;
   2208 ; BROADWELL-LABEL: test_maxss:
   2209 ; BROADWELL:       # %bb.0:
   2210 ; BROADWELL-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2211 ; BROADWELL-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   2212 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2213 ;
   2214 ; SKYLAKE-SSE-LABEL: test_maxss:
   2215 ; SKYLAKE-SSE:       # %bb.0:
   2216 ; SKYLAKE-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [4:0.50]
   2217 ; SKYLAKE-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [9:0.50]
   2218 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2219 ;
   2220 ; SKYLAKE-LABEL: test_maxss:
   2221 ; SKYLAKE:       # %bb.0:
   2222 ; SKYLAKE-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2223 ; SKYLAKE-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   2224 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2225 ;
   2226 ; SKX-SSE-LABEL: test_maxss:
   2227 ; SKX-SSE:       # %bb.0:
   2228 ; SKX-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [4:0.50]
   2229 ; SKX-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [9:0.50]
   2230 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2231 ;
   2232 ; SKX-LABEL: test_maxss:
   2233 ; SKX:       # %bb.0:
   2234 ; SKX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2235 ; SKX-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   2236 ; SKX-NEXT:    retq # sched: [7:1.00]
   2237 ;
   2238 ; BTVER2-SSE-LABEL: test_maxss:
   2239 ; BTVER2-SSE:       # %bb.0:
   2240 ; BTVER2-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [2:1.00]
   2241 ; BTVER2-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [7:1.00]
   2242 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2243 ;
   2244 ; BTVER2-LABEL: test_maxss:
   2245 ; BTVER2:       # %bb.0:
   2246 ; BTVER2-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   2247 ; BTVER2-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   2248 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2249 ;
   2250 ; ZNVER1-SSE-LABEL: test_maxss:
   2251 ; ZNVER1-SSE:       # %bb.0:
   2252 ; ZNVER1-SSE-NEXT:    maxss %xmm1, %xmm0 # sched: [3:1.00]
   2253 ; ZNVER1-SSE-NEXT:    maxss (%rdi), %xmm0 # sched: [10:1.00]
   2254 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2255 ;
   2256 ; ZNVER1-LABEL: test_maxss:
   2257 ; ZNVER1:       # %bb.0:
   2258 ; ZNVER1-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2259 ; ZNVER1-NEXT:    vmaxss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   2260 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2261   %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
   2262   %2 = load <4 x float>, <4 x float> *%a2, align 16
   2263   %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
   2264   ret <4 x float> %3
   2265 }
   2266 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
   2267 
   2268 define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   2269 ; GENERIC-LABEL: test_minps:
   2270 ; GENERIC:       # %bb.0:
   2271 ; GENERIC-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
   2272 ; GENERIC-NEXT:    minps (%rdi), %xmm0 # sched: [9:1.00]
   2273 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2274 ;
   2275 ; ATOM-LABEL: test_minps:
   2276 ; ATOM:       # %bb.0:
   2277 ; ATOM-NEXT:    minps %xmm1, %xmm0 # sched: [5:5.00]
   2278 ; ATOM-NEXT:    minps (%rdi), %xmm0 # sched: [5:5.00]
   2279 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2280 ;
   2281 ; SLM-LABEL: test_minps:
   2282 ; SLM:       # %bb.0:
   2283 ; SLM-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
   2284 ; SLM-NEXT:    minps (%rdi), %xmm0 # sched: [6:1.00]
   2285 ; SLM-NEXT:    retq # sched: [4:1.00]
   2286 ;
   2287 ; SANDY-SSE-LABEL: test_minps:
   2288 ; SANDY-SSE:       # %bb.0:
   2289 ; SANDY-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
   2290 ; SANDY-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [9:1.00]
   2291 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2292 ;
   2293 ; SANDY-LABEL: test_minps:
   2294 ; SANDY:       # %bb.0:
   2295 ; SANDY-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2296 ; SANDY-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   2297 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2298 ;
   2299 ; HASWELL-SSE-LABEL: test_minps:
   2300 ; HASWELL-SSE:       # %bb.0:
   2301 ; HASWELL-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
   2302 ; HASWELL-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [9:1.00]
   2303 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2304 ;
   2305 ; HASWELL-LABEL: test_minps:
   2306 ; HASWELL:       # %bb.0:
   2307 ; HASWELL-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2308 ; HASWELL-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   2309 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2310 ;
   2311 ; BROADWELL-SSE-LABEL: test_minps:
   2312 ; BROADWELL-SSE:       # %bb.0:
   2313 ; BROADWELL-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
   2314 ; BROADWELL-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [8:1.00]
   2315 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2316 ;
   2317 ; BROADWELL-LABEL: test_minps:
   2318 ; BROADWELL:       # %bb.0:
   2319 ; BROADWELL-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2320 ; BROADWELL-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   2321 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2322 ;
   2323 ; SKYLAKE-SSE-LABEL: test_minps:
   2324 ; SKYLAKE-SSE:       # %bb.0:
   2325 ; SKYLAKE-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [4:0.50]
   2326 ; SKYLAKE-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [10:0.50]
   2327 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2328 ;
   2329 ; SKYLAKE-LABEL: test_minps:
   2330 ; SKYLAKE:       # %bb.0:
   2331 ; SKYLAKE-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2332 ; SKYLAKE-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   2333 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2334 ;
   2335 ; SKX-SSE-LABEL: test_minps:
   2336 ; SKX-SSE:       # %bb.0:
   2337 ; SKX-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [4:0.50]
   2338 ; SKX-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [10:0.50]
   2339 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2340 ;
   2341 ; SKX-LABEL: test_minps:
   2342 ; SKX:       # %bb.0:
   2343 ; SKX-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2344 ; SKX-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   2345 ; SKX-NEXT:    retq # sched: [7:1.00]
   2346 ;
   2347 ; BTVER2-SSE-LABEL: test_minps:
   2348 ; BTVER2-SSE:       # %bb.0:
   2349 ; BTVER2-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [2:1.00]
   2350 ; BTVER2-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [7:1.00]
   2351 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2352 ;
   2353 ; BTVER2-LABEL: test_minps:
   2354 ; BTVER2:       # %bb.0:
   2355 ; BTVER2-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   2356 ; BTVER2-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   2357 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2358 ;
   2359 ; ZNVER1-SSE-LABEL: test_minps:
   2360 ; ZNVER1-SSE:       # %bb.0:
   2361 ; ZNVER1-SSE-NEXT:    minps %xmm1, %xmm0 # sched: [3:1.00]
   2362 ; ZNVER1-SSE-NEXT:    minps (%rdi), %xmm0 # sched: [10:1.00]
   2363 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2364 ;
   2365 ; ZNVER1-LABEL: test_minps:
   2366 ; ZNVER1:       # %bb.0:
   2367 ; ZNVER1-NEXT:    vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2368 ; ZNVER1-NEXT:    vminps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   2369 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2370   %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
   2371   %2 = load <4 x float>, <4 x float> *%a2, align 16
   2372   %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2)
   2373   ret <4 x float> %3
   2374 }
   2375 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
   2376 
   2377 define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   2378 ; GENERIC-LABEL: test_minss:
   2379 ; GENERIC:       # %bb.0:
   2380 ; GENERIC-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
   2381 ; GENERIC-NEXT:    minss (%rdi), %xmm0 # sched: [9:1.00]
   2382 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2383 ;
   2384 ; ATOM-LABEL: test_minss:
   2385 ; ATOM:       # %bb.0:
   2386 ; ATOM-NEXT:    minss %xmm1, %xmm0 # sched: [5:5.00]
   2387 ; ATOM-NEXT:    minss (%rdi), %xmm0 # sched: [5:5.00]
   2388 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2389 ;
   2390 ; SLM-LABEL: test_minss:
   2391 ; SLM:       # %bb.0:
   2392 ; SLM-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
   2393 ; SLM-NEXT:    minss (%rdi), %xmm0 # sched: [6:1.00]
   2394 ; SLM-NEXT:    retq # sched: [4:1.00]
   2395 ;
   2396 ; SANDY-SSE-LABEL: test_minss:
   2397 ; SANDY-SSE:       # %bb.0:
   2398 ; SANDY-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
   2399 ; SANDY-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [9:1.00]
   2400 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2401 ;
   2402 ; SANDY-LABEL: test_minss:
   2403 ; SANDY:       # %bb.0:
   2404 ; SANDY-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2405 ; SANDY-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   2406 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2407 ;
   2408 ; HASWELL-SSE-LABEL: test_minss:
   2409 ; HASWELL-SSE:       # %bb.0:
   2410 ; HASWELL-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
   2411 ; HASWELL-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [8:1.00]
   2412 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2413 ;
   2414 ; HASWELL-LABEL: test_minss:
   2415 ; HASWELL:       # %bb.0:
   2416 ; HASWELL-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2417 ; HASWELL-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   2418 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2419 ;
   2420 ; BROADWELL-SSE-LABEL: test_minss:
   2421 ; BROADWELL-SSE:       # %bb.0:
   2422 ; BROADWELL-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
   2423 ; BROADWELL-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [8:1.00]
   2424 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2425 ;
   2426 ; BROADWELL-LABEL: test_minss:
   2427 ; BROADWELL:       # %bb.0:
   2428 ; BROADWELL-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2429 ; BROADWELL-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   2430 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2431 ;
   2432 ; SKYLAKE-SSE-LABEL: test_minss:
   2433 ; SKYLAKE-SSE:       # %bb.0:
   2434 ; SKYLAKE-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [4:0.50]
   2435 ; SKYLAKE-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [9:0.50]
   2436 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2437 ;
   2438 ; SKYLAKE-LABEL: test_minss:
   2439 ; SKYLAKE:       # %bb.0:
   2440 ; SKYLAKE-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2441 ; SKYLAKE-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   2442 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2443 ;
   2444 ; SKX-SSE-LABEL: test_minss:
   2445 ; SKX-SSE:       # %bb.0:
   2446 ; SKX-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [4:0.50]
   2447 ; SKX-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [9:0.50]
   2448 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2449 ;
   2450 ; SKX-LABEL: test_minss:
   2451 ; SKX:       # %bb.0:
   2452 ; SKX-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2453 ; SKX-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   2454 ; SKX-NEXT:    retq # sched: [7:1.00]
   2455 ;
   2456 ; BTVER2-SSE-LABEL: test_minss:
   2457 ; BTVER2-SSE:       # %bb.0:
   2458 ; BTVER2-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [2:1.00]
   2459 ; BTVER2-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [7:1.00]
   2460 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2461 ;
   2462 ; BTVER2-LABEL: test_minss:
   2463 ; BTVER2:       # %bb.0:
   2464 ; BTVER2-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   2465 ; BTVER2-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   2466 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2467 ;
   2468 ; ZNVER1-SSE-LABEL: test_minss:
   2469 ; ZNVER1-SSE:       # %bb.0:
   2470 ; ZNVER1-SSE-NEXT:    minss %xmm1, %xmm0 # sched: [3:1.00]
   2471 ; ZNVER1-SSE-NEXT:    minss (%rdi), %xmm0 # sched: [10:1.00]
   2472 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2473 ;
   2474 ; ZNVER1-LABEL: test_minss:
   2475 ; ZNVER1:       # %bb.0:
   2476 ; ZNVER1-NEXT:    vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2477 ; ZNVER1-NEXT:    vminss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   2478 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2479   %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
   2480   %2 = load <4 x float>, <4 x float> *%a2, align 16
   2481   %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
   2482   ret <4 x float> %3
   2483 }
   2484 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
   2485 
   2486 define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
   2487 ; GENERIC-LABEL: test_movaps:
   2488 ; GENERIC:       # %bb.0:
   2489 ; GENERIC-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
   2490 ; GENERIC-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   2491 ; GENERIC-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
   2492 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2493 ;
   2494 ; ATOM-LABEL: test_movaps:
   2495 ; ATOM:       # %bb.0:
   2496 ; ATOM-NEXT:    movaps (%rdi), %xmm0 # sched: [1:1.00]
   2497 ; ATOM-NEXT:    addps %xmm0, %xmm0 # sched: [5:5.00]
   2498 ; ATOM-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
   2499 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2500 ;
   2501 ; SLM-LABEL: test_movaps:
   2502 ; SLM:       # %bb.0:
   2503 ; SLM-NEXT:    movaps (%rdi), %xmm0 # sched: [3:1.00]
   2504 ; SLM-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   2505 ; SLM-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
   2506 ; SLM-NEXT:    retq # sched: [4:1.00]
   2507 ;
   2508 ; SANDY-SSE-LABEL: test_movaps:
   2509 ; SANDY-SSE:       # %bb.0:
   2510 ; SANDY-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
   2511 ; SANDY-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   2512 ; SANDY-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
   2513 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2514 ;
   2515 ; SANDY-LABEL: test_movaps:
   2516 ; SANDY:       # %bb.0:
   2517 ; SANDY-NEXT:    vmovaps (%rdi), %xmm0 # sched: [6:0.50]
   2518 ; SANDY-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   2519 ; SANDY-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
   2520 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2521 ;
   2522 ; HASWELL-SSE-LABEL: test_movaps:
   2523 ; HASWELL-SSE:       # %bb.0:
   2524 ; HASWELL-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
   2525 ; HASWELL-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   2526 ; HASWELL-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
   2527 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2528 ;
   2529 ; HASWELL-LABEL: test_movaps:
   2530 ; HASWELL:       # %bb.0:
   2531 ; HASWELL-NEXT:    vmovaps (%rdi), %xmm0 # sched: [6:0.50]
   2532 ; HASWELL-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   2533 ; HASWELL-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
   2534 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2535 ;
   2536 ; BROADWELL-SSE-LABEL: test_movaps:
   2537 ; BROADWELL-SSE:       # %bb.0:
   2538 ; BROADWELL-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [5:0.50]
   2539 ; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   2540 ; BROADWELL-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
   2541 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2542 ;
   2543 ; BROADWELL-LABEL: test_movaps:
   2544 ; BROADWELL:       # %bb.0:
   2545 ; BROADWELL-NEXT:    vmovaps (%rdi), %xmm0 # sched: [5:0.50]
   2546 ; BROADWELL-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   2547 ; BROADWELL-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
   2548 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2549 ;
   2550 ; SKYLAKE-SSE-LABEL: test_movaps:
   2551 ; SKYLAKE-SSE:       # %bb.0:
   2552 ; SKYLAKE-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
   2553 ; SKYLAKE-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [4:0.50]
   2554 ; SKYLAKE-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
   2555 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2556 ;
   2557 ; SKYLAKE-LABEL: test_movaps:
   2558 ; SKYLAKE:       # %bb.0:
   2559 ; SKYLAKE-NEXT:    vmovaps (%rdi), %xmm0 # sched: [6:0.50]
   2560 ; SKYLAKE-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   2561 ; SKYLAKE-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
   2562 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2563 ;
   2564 ; SKX-SSE-LABEL: test_movaps:
   2565 ; SKX-SSE:       # %bb.0:
   2566 ; SKX-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [6:0.50]
   2567 ; SKX-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [4:0.50]
   2568 ; SKX-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
   2569 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2570 ;
   2571 ; SKX-LABEL: test_movaps:
   2572 ; SKX:       # %bb.0:
   2573 ; SKX-NEXT:    vmovaps (%rdi), %xmm0 # sched: [6:0.50]
   2574 ; SKX-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   2575 ; SKX-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
   2576 ; SKX-NEXT:    retq # sched: [7:1.00]
   2577 ;
   2578 ; BTVER2-SSE-LABEL: test_movaps:
   2579 ; BTVER2-SSE:       # %bb.0:
   2580 ; BTVER2-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [5:1.00]
   2581 ; BTVER2-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   2582 ; BTVER2-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:1.00]
   2583 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2584 ;
   2585 ; BTVER2-LABEL: test_movaps:
   2586 ; BTVER2:       # %bb.0:
   2587 ; BTVER2-NEXT:    vmovaps (%rdi), %xmm0 # sched: [5:1.00]
   2588 ; BTVER2-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   2589 ; BTVER2-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:1.00]
   2590 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2591 ;
   2592 ; ZNVER1-SSE-LABEL: test_movaps:
   2593 ; ZNVER1-SSE:       # %bb.0:
   2594 ; ZNVER1-SSE-NEXT:    movaps (%rdi), %xmm0 # sched: [8:0.50]
   2595 ; ZNVER1-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   2596 ; ZNVER1-SSE-NEXT:    movaps %xmm0, (%rsi) # sched: [1:0.50]
   2597 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2598 ;
   2599 ; ZNVER1-LABEL: test_movaps:
   2600 ; ZNVER1:       # %bb.0:
   2601 ; ZNVER1-NEXT:    vmovaps (%rdi), %xmm0 # sched: [8:0.50]
   2602 ; ZNVER1-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   2603 ; ZNVER1-NEXT:    vmovaps %xmm0, (%rsi) # sched: [1:0.50]
   2604 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2605   %1 = load <4 x float>, <4 x float> *%a0, align 16
   2606   %2 = fadd <4 x float> %1, %1
   2607   store <4 x float> %2, <4 x float> *%a1, align 16
   2608   ret void
   2609 }
   2610 
   2611 ; TODO (v)movhlps
   2612 
   2613 define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
   2614 ; GENERIC-LABEL: test_movhlps:
   2615 ; GENERIC:       # %bb.0:
   2616 ; GENERIC-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2617 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2618 ;
   2619 ; ATOM-LABEL: test_movhlps:
   2620 ; ATOM:       # %bb.0:
   2621 ; ATOM-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2622 ; ATOM-NEXT:    nop # sched: [1:0.50]
   2623 ; ATOM-NEXT:    nop # sched: [1:0.50]
   2624 ; ATOM-NEXT:    nop # sched: [1:0.50]
   2625 ; ATOM-NEXT:    nop # sched: [1:0.50]
   2626 ; ATOM-NEXT:    nop # sched: [1:0.50]
   2627 ; ATOM-NEXT:    nop # sched: [1:0.50]
   2628 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2629 ;
   2630 ; SLM-LABEL: test_movhlps:
   2631 ; SLM:       # %bb.0:
   2632 ; SLM-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2633 ; SLM-NEXT:    retq # sched: [4:1.00]
   2634 ;
   2635 ; SANDY-SSE-LABEL: test_movhlps:
   2636 ; SANDY-SSE:       # %bb.0:
   2637 ; SANDY-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2638 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2639 ;
   2640 ; SANDY-LABEL: test_movhlps:
   2641 ; SANDY:       # %bb.0:
   2642 ; SANDY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2643 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2644 ;
   2645 ; HASWELL-SSE-LABEL: test_movhlps:
   2646 ; HASWELL-SSE:       # %bb.0:
   2647 ; HASWELL-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2648 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2649 ;
   2650 ; HASWELL-LABEL: test_movhlps:
   2651 ; HASWELL:       # %bb.0:
   2652 ; HASWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2653 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2654 ;
   2655 ; BROADWELL-SSE-LABEL: test_movhlps:
   2656 ; BROADWELL-SSE:       # %bb.0:
   2657 ; BROADWELL-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2658 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2659 ;
   2660 ; BROADWELL-LABEL: test_movhlps:
   2661 ; BROADWELL:       # %bb.0:
   2662 ; BROADWELL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2663 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2664 ;
   2665 ; SKYLAKE-SSE-LABEL: test_movhlps:
   2666 ; SKYLAKE-SSE:       # %bb.0:
   2667 ; SKYLAKE-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2668 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2669 ;
   2670 ; SKYLAKE-LABEL: test_movhlps:
   2671 ; SKYLAKE:       # %bb.0:
   2672 ; SKYLAKE-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2673 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2674 ;
   2675 ; SKX-SSE-LABEL: test_movhlps:
   2676 ; SKX-SSE:       # %bb.0:
   2677 ; SKX-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2678 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2679 ;
   2680 ; SKX-LABEL: test_movhlps:
   2681 ; SKX:       # %bb.0:
   2682 ; SKX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
   2683 ; SKX-NEXT:    retq # sched: [7:1.00]
   2684 ;
   2685 ; BTVER2-SSE-LABEL: test_movhlps:
   2686 ; BTVER2-SSE:       # %bb.0:
   2687 ; BTVER2-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
   2688 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2689 ;
   2690 ; BTVER2-LABEL: test_movhlps:
   2691 ; BTVER2:       # %bb.0:
   2692 ; BTVER2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
   2693 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2694 ;
   2695 ; ZNVER1-SSE-LABEL: test_movhlps:
   2696 ; ZNVER1-SSE:       # %bb.0:
   2697 ; ZNVER1-SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
   2698 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2699 ;
   2700 ; ZNVER1-LABEL: test_movhlps:
   2701 ; ZNVER1:       # %bb.0:
   2702 ; ZNVER1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
   2703 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2704   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
   2705   ret <4 x float> %1
   2706 }
   2707 
   2708 ; TODO (v)movhps
   2709 
   2710 define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
   2711 ; GENERIC-LABEL: test_movhps:
   2712 ; GENERIC:       # %bb.0:
   2713 ; GENERIC-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   2714 ; GENERIC-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2715 ; GENERIC-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
   2716 ; GENERIC-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2717 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2718 ;
   2719 ; ATOM-LABEL: test_movhps:
   2720 ; ATOM:       # %bb.0:
   2721 ; ATOM-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
   2722 ; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
   2723 ; ATOM-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
   2724 ; ATOM-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2725 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2726 ;
   2727 ; SLM-LABEL: test_movhps:
   2728 ; SLM:       # %bb.0:
   2729 ; SLM-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
   2730 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2731 ; SLM-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
   2732 ; SLM-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2733 ; SLM-NEXT:    retq # sched: [4:1.00]
   2734 ;
   2735 ; SANDY-SSE-LABEL: test_movhps:
   2736 ; SANDY-SSE:       # %bb.0:
   2737 ; SANDY-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   2738 ; SANDY-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2739 ; SANDY-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
   2740 ; SANDY-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2741 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2742 ;
   2743 ; SANDY-LABEL: test_movhps:
   2744 ; SANDY:       # %bb.0:
   2745 ; SANDY-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
   2746 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2747 ; SANDY-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
   2748 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2749 ;
   2750 ; HASWELL-SSE-LABEL: test_movhps:
   2751 ; HASWELL-SSE:       # %bb.0:
   2752 ; HASWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2753 ; HASWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2754 ; HASWELL-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
   2755 ; HASWELL-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2756 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2757 ;
   2758 ; HASWELL-LABEL: test_movhps:
   2759 ; HASWELL:       # %bb.0:
   2760 ; HASWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2761 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2762 ; HASWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   2763 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2764 ;
   2765 ; BROADWELL-SSE-LABEL: test_movhps:
   2766 ; BROADWELL-SSE:       # %bb.0:
   2767 ; BROADWELL-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2768 ; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2769 ; BROADWELL-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
   2770 ; BROADWELL-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2771 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2772 ;
   2773 ; BROADWELL-LABEL: test_movhps:
   2774 ; BROADWELL:       # %bb.0:
   2775 ; BROADWELL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2776 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2777 ; BROADWELL-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   2778 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2779 ;
   2780 ; SKYLAKE-SSE-LABEL: test_movhps:
   2781 ; SKYLAKE-SSE:       # %bb.0:
   2782 ; SKYLAKE-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2783 ; SKYLAKE-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [4:0.50]
   2784 ; SKYLAKE-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
   2785 ; SKYLAKE-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2786 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2787 ;
   2788 ; SKYLAKE-LABEL: test_movhps:
   2789 ; SKYLAKE:       # %bb.0:
   2790 ; SKYLAKE-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2791 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2792 ; SKYLAKE-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   2793 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2794 ;
   2795 ; SKX-SSE-LABEL: test_movhps:
   2796 ; SKX-SSE:       # %bb.0:
   2797 ; SKX-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2798 ; SKX-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [4:0.50]
   2799 ; SKX-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
   2800 ; SKX-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2801 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2802 ;
   2803 ; SKX-LABEL: test_movhps:
   2804 ; SKX:       # %bb.0:
   2805 ; SKX-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2806 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   2807 ; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
   2808 ; SKX-NEXT:    retq # sched: [7:1.00]
   2809 ;
   2810 ; BTVER2-SSE-LABEL: test_movhps:
   2811 ; BTVER2-SSE:       # %bb.0:
   2812 ; BTVER2-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2813 ; BTVER2-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2814 ; BTVER2-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
   2815 ; BTVER2-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [2:1.00]
   2816 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2817 ;
   2818 ; BTVER2-LABEL: test_movhps:
   2819 ; BTVER2:       # %bb.0:
   2820 ; BTVER2-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
   2821 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2822 ; BTVER2-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00]
   2823 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2824 ;
   2825 ; ZNVER1-SSE-LABEL: test_movhps:
   2826 ; ZNVER1-SSE:       # %bb.0:
   2827 ; ZNVER1-SSE-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
   2828 ; ZNVER1-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2829 ; ZNVER1-SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
   2830 ; ZNVER1-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:0.50]
   2831 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2832 ;
   2833 ; ZNVER1-LABEL: test_movhps:
   2834 ; ZNVER1:       # %bb.0:
   2835 ; ZNVER1-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
   2836 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2837 ; ZNVER1-NEXT:    vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
   2838 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2839   %1 = bitcast x86_mmx* %a2 to <2 x float>*
   2840   %2 = load <2 x float>, <2 x float> *%1, align 8
   2841   %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2842   %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   2843   %5 = fadd <4 x float> %a0, %4
   2844   %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
   2845   store <2 x float> %6, <2 x float>* %1
   2846   ret void
   2847 }
   2848 
   2849 ; TODO (v)movlhps
   2850 
   2851 define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
   2852 ; GENERIC-LABEL: test_movlhps:
   2853 ; GENERIC:       # %bb.0:
   2854 ; GENERIC-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2855 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   2856 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2857 ;
   2858 ; ATOM-LABEL: test_movlhps:
   2859 ; ATOM:       # %bb.0:
   2860 ; ATOM-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2861 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   2862 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2863 ;
   2864 ; SLM-LABEL: test_movlhps:
   2865 ; SLM:       # %bb.0:
   2866 ; SLM-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2867 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   2868 ; SLM-NEXT:    retq # sched: [4:1.00]
   2869 ;
   2870 ; SANDY-SSE-LABEL: test_movlhps:
   2871 ; SANDY-SSE:       # %bb.0:
   2872 ; SANDY-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2873 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   2874 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2875 ;
   2876 ; SANDY-LABEL: test_movlhps:
   2877 ; SANDY:       # %bb.0:
   2878 ; SANDY-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2879 ; SANDY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   2880 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2881 ;
   2882 ; HASWELL-SSE-LABEL: test_movlhps:
   2883 ; HASWELL-SSE:       # %bb.0:
   2884 ; HASWELL-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2885 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   2886 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2887 ;
   2888 ; HASWELL-LABEL: test_movlhps:
   2889 ; HASWELL:       # %bb.0:
   2890 ; HASWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2891 ; HASWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   2892 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2893 ;
   2894 ; BROADWELL-SSE-LABEL: test_movlhps:
   2895 ; BROADWELL-SSE:       # %bb.0:
   2896 ; BROADWELL-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2897 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   2898 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   2899 ;
   2900 ; BROADWELL-LABEL: test_movlhps:
   2901 ; BROADWELL:       # %bb.0:
   2902 ; BROADWELL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2903 ; BROADWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   2904 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2905 ;
   2906 ; SKYLAKE-SSE-LABEL: test_movlhps:
   2907 ; SKYLAKE-SSE:       # %bb.0:
   2908 ; SKYLAKE-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2909 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   2910 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   2911 ;
   2912 ; SKYLAKE-LABEL: test_movlhps:
   2913 ; SKYLAKE:       # %bb.0:
   2914 ; SKYLAKE-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2915 ; SKYLAKE-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   2916 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2917 ;
   2918 ; SKX-SSE-LABEL: test_movlhps:
   2919 ; SKX-SSE:       # %bb.0:
   2920 ; SKX-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2921 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   2922 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   2923 ;
   2924 ; SKX-LABEL: test_movlhps:
   2925 ; SKX:       # %bb.0:
   2926 ; SKX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
   2927 ; SKX-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
   2928 ; SKX-NEXT:    retq # sched: [7:1.00]
   2929 ;
   2930 ; BTVER2-SSE-LABEL: test_movlhps:
   2931 ; BTVER2-SSE:       # %bb.0:
   2932 ; BTVER2-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   2933 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   2934 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   2935 ;
   2936 ; BTVER2-LABEL: test_movlhps:
   2937 ; BTVER2:       # %bb.0:
   2938 ; BTVER2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   2939 ; BTVER2-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   2940 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2941 ;
   2942 ; ZNVER1-SSE-LABEL: test_movlhps:
   2943 ; ZNVER1-SSE:       # %bb.0:
   2944 ; ZNVER1-SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   2945 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   2946 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   2947 ;
   2948 ; ZNVER1-LABEL: test_movlhps:
   2949 ; ZNVER1:       # %bb.0:
   2950 ; ZNVER1-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
   2951 ; ZNVER1-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
   2952 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2953   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   2954   %2 = fadd <4 x float> %a1, %1
   2955   ret <4 x float> %2
   2956 }
   2957 
   2958 define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
   2959 ; GENERIC-LABEL: test_movlps:
   2960 ; GENERIC:       # %bb.0:
   2961 ; GENERIC-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
   2962 ; GENERIC-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2963 ; GENERIC-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2964 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2965 ;
   2966 ; ATOM-LABEL: test_movlps:
   2967 ; ATOM:       # %bb.0:
   2968 ; ATOM-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
   2969 ; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
   2970 ; ATOM-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2971 ; ATOM-NEXT:    retq # sched: [79:39.50]
   2972 ;
   2973 ; SLM-LABEL: test_movlps:
   2974 ; SLM:       # %bb.0:
   2975 ; SLM-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
   2976 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2977 ; SLM-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2978 ; SLM-NEXT:    retq # sched: [4:1.00]
   2979 ;
   2980 ; SANDY-SSE-LABEL: test_movlps:
   2981 ; SANDY-SSE:       # %bb.0:
   2982 ; SANDY-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
   2983 ; SANDY-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2984 ; SANDY-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2985 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   2986 ;
   2987 ; SANDY-LABEL: test_movlps:
   2988 ; SANDY:       # %bb.0:
   2989 ; SANDY-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
   2990 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   2991 ; SANDY-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
   2992 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2993 ;
   2994 ; HASWELL-SSE-LABEL: test_movlps:
   2995 ; HASWELL-SSE:       # %bb.0:
   2996 ; HASWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   2997 ; HASWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   2998 ; HASWELL-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   2999 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3000 ;
   3001 ; HASWELL-LABEL: test_movlps:
   3002 ; HASWELL:       # %bb.0:
   3003 ; HASWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   3004 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3005 ; HASWELL-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
   3006 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3007 ;
   3008 ; BROADWELL-SSE-LABEL: test_movlps:
   3009 ; BROADWELL-SSE:       # %bb.0:
   3010 ; BROADWELL-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   3011 ; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   3012 ; BROADWELL-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   3013 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3014 ;
   3015 ; BROADWELL-LABEL: test_movlps:
   3016 ; BROADWELL:       # %bb.0:
   3017 ; BROADWELL-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   3018 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3019 ; BROADWELL-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
   3020 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3021 ;
   3022 ; SKYLAKE-SSE-LABEL: test_movlps:
   3023 ; SKYLAKE-SSE:       # %bb.0:
   3024 ; SKYLAKE-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   3025 ; SKYLAKE-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [4:0.50]
   3026 ; SKYLAKE-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   3027 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3028 ;
   3029 ; SKYLAKE-LABEL: test_movlps:
   3030 ; SKYLAKE:       # %bb.0:
   3031 ; SKYLAKE-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   3032 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3033 ; SKYLAKE-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
   3034 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3035 ;
   3036 ; SKX-SSE-LABEL: test_movlps:
   3037 ; SKX-SSE:       # %bb.0:
   3038 ; SKX-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   3039 ; SKX-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [4:0.50]
   3040 ; SKX-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:1.00]
   3041 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3042 ;
   3043 ; SKX-LABEL: test_movlps:
   3044 ; SKX:       # %bb.0:
   3045 ; SKX-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   3046 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3047 ; SKX-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:1.00]
   3048 ; SKX-NEXT:    retq # sched: [7:1.00]
   3049 ;
   3050 ; BTVER2-SSE-LABEL: test_movlps:
   3051 ; BTVER2-SSE:       # %bb.0:
   3052 ; BTVER2-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   3053 ; BTVER2-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   3054 ; BTVER2-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [2:1.00]
   3055 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3056 ;
   3057 ; BTVER2-LABEL: test_movlps:
   3058 ; BTVER2:       # %bb.0:
   3059 ; BTVER2-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
   3060 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3061 ; BTVER2-NEXT:    vmovlps %xmm0, (%rdi) # sched: [2:1.00]
   3062 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3063 ;
   3064 ; ZNVER1-SSE-LABEL: test_movlps:
   3065 ; ZNVER1-SSE:       # %bb.0:
   3066 ; ZNVER1-SSE-NEXT:    movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
   3067 ; ZNVER1-SSE-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   3068 ; ZNVER1-SSE-NEXT:    movlps %xmm1, (%rdi) # sched: [1:0.50]
   3069 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3070 ;
   3071 ; ZNVER1-LABEL: test_movlps:
   3072 ; ZNVER1:       # %bb.0:
   3073 ; ZNVER1-NEXT:    vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
   3074 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3075 ; ZNVER1-NEXT:    vmovlps %xmm0, (%rdi) # sched: [1:0.50]
   3076 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3077   %1 = bitcast x86_mmx* %a2 to <2 x float>*
   3078   %2 = load <2 x float>, <2 x float> *%1, align 8
   3079   %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   3080   %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   3081   %5 = fadd <4 x float> %a0, %4
   3082   %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   3083   store <2 x float> %6, <2 x float>* %1
   3084   ret void
   3085 }
   3086 
   3087 define i32 @test_movmskps(<4 x float> %a0) {
   3088 ; GENERIC-LABEL: test_movmskps:
   3089 ; GENERIC:       # %bb.0:
   3090 ; GENERIC-NEXT:    movmskps %xmm0, %eax # sched: [2:1.00]
   3091 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3092 ;
   3093 ; ATOM-LABEL: test_movmskps:
   3094 ; ATOM:       # %bb.0:
   3095 ; ATOM-NEXT:    movmskps %xmm0, %eax # sched: [3:3.00]
   3096 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3097 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3098 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3099 ;
   3100 ; SLM-LABEL: test_movmskps:
   3101 ; SLM:       # %bb.0:
   3102 ; SLM-NEXT:    movmskps %xmm0, %eax # sched: [4:1.00]
   3103 ; SLM-NEXT:    retq # sched: [4:1.00]
   3104 ;
   3105 ; SANDY-SSE-LABEL: test_movmskps:
   3106 ; SANDY-SSE:       # %bb.0:
   3107 ; SANDY-SSE-NEXT:    movmskps %xmm0, %eax # sched: [2:1.00]
   3108 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3109 ;
   3110 ; SANDY-LABEL: test_movmskps:
   3111 ; SANDY:       # %bb.0:
   3112 ; SANDY-NEXT:    vmovmskps %xmm0, %eax # sched: [2:1.00]
   3113 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3114 ;
   3115 ; HASWELL-SSE-LABEL: test_movmskps:
   3116 ; HASWELL-SSE:       # %bb.0:
   3117 ; HASWELL-SSE-NEXT:    movmskps %xmm0, %eax # sched: [3:1.00]
   3118 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3119 ;
   3120 ; HASWELL-LABEL: test_movmskps:
   3121 ; HASWELL:       # %bb.0:
   3122 ; HASWELL-NEXT:    vmovmskps %xmm0, %eax # sched: [3:1.00]
   3123 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3124 ;
   3125 ; BROADWELL-SSE-LABEL: test_movmskps:
   3126 ; BROADWELL-SSE:       # %bb.0:
   3127 ; BROADWELL-SSE-NEXT:    movmskps %xmm0, %eax # sched: [3:1.00]
   3128 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3129 ;
   3130 ; BROADWELL-LABEL: test_movmskps:
   3131 ; BROADWELL:       # %bb.0:
   3132 ; BROADWELL-NEXT:    vmovmskps %xmm0, %eax # sched: [3:1.00]
   3133 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3134 ;
   3135 ; SKYLAKE-SSE-LABEL: test_movmskps:
   3136 ; SKYLAKE-SSE:       # %bb.0:
   3137 ; SKYLAKE-SSE-NEXT:    movmskps %xmm0, %eax # sched: [2:1.00]
   3138 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3139 ;
   3140 ; SKYLAKE-LABEL: test_movmskps:
   3141 ; SKYLAKE:       # %bb.0:
   3142 ; SKYLAKE-NEXT:    vmovmskps %xmm0, %eax # sched: [2:1.00]
   3143 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3144 ;
   3145 ; SKX-SSE-LABEL: test_movmskps:
   3146 ; SKX-SSE:       # %bb.0:
   3147 ; SKX-SSE-NEXT:    movmskps %xmm0, %eax # sched: [2:1.00]
   3148 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3149 ;
   3150 ; SKX-LABEL: test_movmskps:
   3151 ; SKX:       # %bb.0:
   3152 ; SKX-NEXT:    vmovmskps %xmm0, %eax # sched: [2:1.00]
   3153 ; SKX-NEXT:    retq # sched: [7:1.00]
   3154 ;
   3155 ; BTVER2-SSE-LABEL: test_movmskps:
   3156 ; BTVER2-SSE:       # %bb.0:
   3157 ; BTVER2-SSE-NEXT:    movmskps %xmm0, %eax # sched: [3:1.00]
   3158 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3159 ;
   3160 ; BTVER2-LABEL: test_movmskps:
   3161 ; BTVER2:       # %bb.0:
   3162 ; BTVER2-NEXT:    vmovmskps %xmm0, %eax # sched: [3:1.00]
   3163 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3164 ;
   3165 ; ZNVER1-SSE-LABEL: test_movmskps:
   3166 ; ZNVER1-SSE:       # %bb.0:
   3167 ; ZNVER1-SSE-NEXT:    movmskps %xmm0, %eax # sched: [1:1.00]
   3168 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3169 ;
   3170 ; ZNVER1-LABEL: test_movmskps:
   3171 ; ZNVER1:       # %bb.0:
   3172 ; ZNVER1-NEXT:    vmovmskps %xmm0, %eax # sched: [1:1.00]
   3173 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3174   %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
   3175   ret i32 %1
   3176 }
   3177 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
   3178 
   3179 define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
   3180 ; GENERIC-LABEL: test_movntps:
   3181 ; GENERIC:       # %bb.0:
   3182 ; GENERIC-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
   3183 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3184 ;
   3185 ; ATOM-LABEL: test_movntps:
   3186 ; ATOM:       # %bb.0:
   3187 ; ATOM-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
   3188 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3189 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3190 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3191 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3192 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3193 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3194 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3195 ;
   3196 ; SLM-LABEL: test_movntps:
   3197 ; SLM:       # %bb.0:
   3198 ; SLM-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
   3199 ; SLM-NEXT:    retq # sched: [4:1.00]
   3200 ;
   3201 ; SANDY-SSE-LABEL: test_movntps:
   3202 ; SANDY-SSE:       # %bb.0:
   3203 ; SANDY-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
   3204 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3205 ;
   3206 ; SANDY-LABEL: test_movntps:
   3207 ; SANDY:       # %bb.0:
   3208 ; SANDY-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
   3209 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3210 ;
   3211 ; HASWELL-SSE-LABEL: test_movntps:
   3212 ; HASWELL-SSE:       # %bb.0:
   3213 ; HASWELL-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
   3214 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3215 ;
   3216 ; HASWELL-LABEL: test_movntps:
   3217 ; HASWELL:       # %bb.0:
   3218 ; HASWELL-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
   3219 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3220 ;
   3221 ; BROADWELL-SSE-LABEL: test_movntps:
   3222 ; BROADWELL-SSE:       # %bb.0:
   3223 ; BROADWELL-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
   3224 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3225 ;
   3226 ; BROADWELL-LABEL: test_movntps:
   3227 ; BROADWELL:       # %bb.0:
   3228 ; BROADWELL-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
   3229 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3230 ;
   3231 ; SKYLAKE-SSE-LABEL: test_movntps:
   3232 ; SKYLAKE-SSE:       # %bb.0:
   3233 ; SKYLAKE-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
   3234 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3235 ;
   3236 ; SKYLAKE-LABEL: test_movntps:
   3237 ; SKYLAKE:       # %bb.0:
   3238 ; SKYLAKE-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
   3239 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3240 ;
   3241 ; SKX-SSE-LABEL: test_movntps:
   3242 ; SKX-SSE:       # %bb.0:
   3243 ; SKX-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:1.00]
   3244 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3245 ;
   3246 ; SKX-LABEL: test_movntps:
   3247 ; SKX:       # %bb.0:
   3248 ; SKX-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:1.00]
   3249 ; SKX-NEXT:    retq # sched: [7:1.00]
   3250 ;
   3251 ; BTVER2-SSE-LABEL: test_movntps:
   3252 ; BTVER2-SSE:       # %bb.0:
   3253 ; BTVER2-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [3:1.00]
   3254 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3255 ;
   3256 ; BTVER2-LABEL: test_movntps:
   3257 ; BTVER2:       # %bb.0:
   3258 ; BTVER2-NEXT:    vmovntps %xmm0, (%rdi) # sched: [3:1.00]
   3259 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3260 ;
   3261 ; ZNVER1-SSE-LABEL: test_movntps:
   3262 ; ZNVER1-SSE:       # %bb.0:
   3263 ; ZNVER1-SSE-NEXT:    movntps %xmm0, (%rdi) # sched: [1:0.50]
   3264 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3265 ;
   3266 ; ZNVER1-LABEL: test_movntps:
   3267 ; ZNVER1:       # %bb.0:
   3268 ; ZNVER1-NEXT:    vmovntps %xmm0, (%rdi) # sched: [1:0.50]
   3269 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3270   store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0
   3271   ret void
   3272 }
   3273 
   3274 define void @test_movss_mem(float* %a0, float* %a1) {
   3275 ; GENERIC-LABEL: test_movss_mem:
   3276 ; GENERIC:       # %bb.0:
   3277 ; GENERIC-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   3278 ; GENERIC-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
   3279 ; GENERIC-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
   3280 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3281 ;
   3282 ; ATOM-LABEL: test_movss_mem:
   3283 ; ATOM:       # %bb.0:
   3284 ; ATOM-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00]
   3285 ; ATOM-NEXT:    addss %xmm0, %xmm0 # sched: [5:5.00]
   3286 ; ATOM-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
   3287 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3288 ;
   3289 ; SLM-LABEL: test_movss_mem:
   3290 ; SLM:       # %bb.0:
   3291 ; SLM-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
   3292 ; SLM-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
   3293 ; SLM-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
   3294 ; SLM-NEXT:    retq # sched: [4:1.00]
   3295 ;
   3296 ; SANDY-SSE-LABEL: test_movss_mem:
   3297 ; SANDY-SSE:       # %bb.0:
   3298 ; SANDY-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   3299 ; SANDY-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
   3300 ; SANDY-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
   3301 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3302 ;
   3303 ; SANDY-LABEL: test_movss_mem:
   3304 ; SANDY:       # %bb.0:
   3305 ; SANDY-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
   3306 ; SANDY-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3307 ; SANDY-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
   3308 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3309 ;
   3310 ; HASWELL-SSE-LABEL: test_movss_mem:
   3311 ; HASWELL-SSE:       # %bb.0:
   3312 ; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   3313 ; HASWELL-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
   3314 ; HASWELL-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
   3315 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3316 ;
   3317 ; HASWELL-LABEL: test_movss_mem:
   3318 ; HASWELL:       # %bb.0:
   3319 ; HASWELL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   3320 ; HASWELL-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3321 ; HASWELL-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
   3322 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3323 ;
   3324 ; BROADWELL-SSE-LABEL: test_movss_mem:
   3325 ; BROADWELL-SSE:       # %bb.0:
   3326 ; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   3327 ; BROADWELL-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
   3328 ; BROADWELL-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
   3329 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3330 ;
   3331 ; BROADWELL-LABEL: test_movss_mem:
   3332 ; BROADWELL:       # %bb.0:
   3333 ; BROADWELL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   3334 ; BROADWELL-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3335 ; BROADWELL-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
   3336 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3337 ;
   3338 ; SKYLAKE-SSE-LABEL: test_movss_mem:
   3339 ; SKYLAKE-SSE:       # %bb.0:
   3340 ; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   3341 ; SKYLAKE-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [4:0.50]
   3342 ; SKYLAKE-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
   3343 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3344 ;
   3345 ; SKYLAKE-LABEL: test_movss_mem:
   3346 ; SKYLAKE:       # %bb.0:
   3347 ; SKYLAKE-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   3348 ; SKYLAKE-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   3349 ; SKYLAKE-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
   3350 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3351 ;
   3352 ; SKX-SSE-LABEL: test_movss_mem:
   3353 ; SKX-SSE:       # %bb.0:
   3354 ; SKX-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   3355 ; SKX-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [4:0.50]
   3356 ; SKX-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:1.00]
   3357 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3358 ;
   3359 ; SKX-LABEL: test_movss_mem:
   3360 ; SKX:       # %bb.0:
   3361 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
   3362 ; SKX-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   3363 ; SKX-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:1.00]
   3364 ; SKX-NEXT:    retq # sched: [7:1.00]
   3365 ;
   3366 ; BTVER2-SSE-LABEL: test_movss_mem:
   3367 ; BTVER2-SSE:       # %bb.0:
   3368 ; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
   3369 ; BTVER2-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
   3370 ; BTVER2-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [2:1.00]
   3371 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3372 ;
   3373 ; BTVER2-LABEL: test_movss_mem:
   3374 ; BTVER2:       # %bb.0:
   3375 ; BTVER2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
   3376 ; BTVER2-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3377 ; BTVER2-NEXT:    vmovss %xmm0, (%rsi) # sched: [2:1.00]
   3378 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3379 ;
   3380 ; ZNVER1-SSE-LABEL: test_movss_mem:
   3381 ; ZNVER1-SSE:       # %bb.0:
   3382 ; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
   3383 ; ZNVER1-SSE-NEXT:    addss %xmm0, %xmm0 # sched: [3:1.00]
   3384 ; ZNVER1-SSE-NEXT:    movss %xmm0, (%rsi) # sched: [1:0.50]
   3385 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3386 ;
   3387 ; ZNVER1-LABEL: test_movss_mem:
   3388 ; ZNVER1:       # %bb.0:
   3389 ; ZNVER1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
   3390 ; ZNVER1-NEXT:    vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3391 ; ZNVER1-NEXT:    vmovss %xmm0, (%rsi) # sched: [1:0.50]
   3392 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3393   %1 = load float, float* %a0, align 1
   3394   %2 = fadd float %1, %1
   3395   store float %2, float *%a1, align 1
   3396   ret void
   3397 }
   3398 
   3399 define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
   3400 ; GENERIC-LABEL: test_movss_reg:
   3401 ; GENERIC:       # %bb.0:
   3402 ; GENERIC-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
   3403 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3404 ;
   3405 ; ATOM-LABEL: test_movss_reg:
   3406 ; ATOM:       # %bb.0:
   3407 ; ATOM-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
   3408 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3409 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3410 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3411 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3412 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3413 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3414 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3415 ;
   3416 ; SLM-LABEL: test_movss_reg:
   3417 ; SLM:       # %bb.0:
   3418 ; SLM-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
   3419 ; SLM-NEXT:    retq # sched: [4:1.00]
   3420 ;
   3421 ; SANDY-SSE-LABEL: test_movss_reg:
   3422 ; SANDY-SSE:       # %bb.0:
   3423 ; SANDY-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
   3424 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3425 ;
   3426 ; SANDY-LABEL: test_movss_reg:
   3427 ; SANDY:       # %bb.0:
   3428 ; SANDY-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
   3429 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3430 ;
   3431 ; HASWELL-SSE-LABEL: test_movss_reg:
   3432 ; HASWELL-SSE:       # %bb.0:
   3433 ; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
   3434 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3435 ;
   3436 ; HASWELL-LABEL: test_movss_reg:
   3437 ; HASWELL:       # %bb.0:
   3438 ; HASWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
   3439 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3440 ;
   3441 ; BROADWELL-SSE-LABEL: test_movss_reg:
   3442 ; BROADWELL-SSE:       # %bb.0:
   3443 ; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
   3444 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3445 ;
   3446 ; BROADWELL-LABEL: test_movss_reg:
   3447 ; BROADWELL:       # %bb.0:
   3448 ; BROADWELL-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
   3449 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3450 ;
   3451 ; SKYLAKE-SSE-LABEL: test_movss_reg:
   3452 ; SKYLAKE-SSE:       # %bb.0:
   3453 ; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
   3454 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3455 ;
   3456 ; SKYLAKE-LABEL: test_movss_reg:
   3457 ; SKYLAKE:       # %bb.0:
   3458 ; SKYLAKE-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
   3459 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3460 ;
   3461 ; SKX-SSE-LABEL: test_movss_reg:
   3462 ; SKX-SSE:       # %bb.0:
   3463 ; SKX-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
   3464 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3465 ;
   3466 ; SKX-LABEL: test_movss_reg:
   3467 ; SKX:       # %bb.0:
   3468 ; SKX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
   3469 ; SKX-NEXT:    retq # sched: [7:1.00]
   3470 ;
   3471 ; BTVER2-SSE-LABEL: test_movss_reg:
   3472 ; BTVER2-SSE:       # %bb.0:
   3473 ; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
   3474 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3475 ;
   3476 ; BTVER2-LABEL: test_movss_reg:
   3477 ; BTVER2:       # %bb.0:
   3478 ; BTVER2-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
   3479 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3480 ;
   3481 ; ZNVER1-SSE-LABEL: test_movss_reg:
   3482 ; ZNVER1-SSE:       # %bb.0:
   3483 ; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
   3484 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3485 ;
   3486 ; ZNVER1-LABEL: test_movss_reg:
   3487 ; ZNVER1:       # %bb.0:
   3488 ; ZNVER1-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
   3489 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3490   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
   3491   ret <4 x float> %1
   3492 }
   3493 
   3494 define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
   3495 ; GENERIC-LABEL: test_movups:
   3496 ; GENERIC:       # %bb.0:
   3497 ; GENERIC-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
   3498 ; GENERIC-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   3499 ; GENERIC-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
   3500 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3501 ;
   3502 ; ATOM-LABEL: test_movups:
   3503 ; ATOM:       # %bb.0:
   3504 ; ATOM-NEXT:    movups (%rdi), %xmm0 # sched: [3:1.50]
   3505 ; ATOM-NEXT:    addps %xmm0, %xmm0 # sched: [5:5.00]
   3506 ; ATOM-NEXT:    movups %xmm0, (%rsi) # sched: [2:1.00]
   3507 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3508 ;
   3509 ; SLM-LABEL: test_movups:
   3510 ; SLM:       # %bb.0:
   3511 ; SLM-NEXT:    movups (%rdi), %xmm0 # sched: [3:1.00]
   3512 ; SLM-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   3513 ; SLM-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
   3514 ; SLM-NEXT:    retq # sched: [4:1.00]
   3515 ;
   3516 ; SANDY-SSE-LABEL: test_movups:
   3517 ; SANDY-SSE:       # %bb.0:
   3518 ; SANDY-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
   3519 ; SANDY-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   3520 ; SANDY-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
   3521 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3522 ;
   3523 ; SANDY-LABEL: test_movups:
   3524 ; SANDY:       # %bb.0:
   3525 ; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
   3526 ; SANDY-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3527 ; SANDY-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
   3528 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3529 ;
   3530 ; HASWELL-SSE-LABEL: test_movups:
   3531 ; HASWELL-SSE:       # %bb.0:
   3532 ; HASWELL-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
   3533 ; HASWELL-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   3534 ; HASWELL-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
   3535 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3536 ;
   3537 ; HASWELL-LABEL: test_movups:
   3538 ; HASWELL:       # %bb.0:
   3539 ; HASWELL-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
   3540 ; HASWELL-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3541 ; HASWELL-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
   3542 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3543 ;
   3544 ; BROADWELL-SSE-LABEL: test_movups:
   3545 ; BROADWELL-SSE:       # %bb.0:
   3546 ; BROADWELL-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [5:0.50]
   3547 ; BROADWELL-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   3548 ; BROADWELL-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
   3549 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3550 ;
   3551 ; BROADWELL-LABEL: test_movups:
   3552 ; BROADWELL:       # %bb.0:
   3553 ; BROADWELL-NEXT:    vmovups (%rdi), %xmm0 # sched: [5:0.50]
   3554 ; BROADWELL-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3555 ; BROADWELL-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
   3556 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3557 ;
   3558 ; SKYLAKE-SSE-LABEL: test_movups:
   3559 ; SKYLAKE-SSE:       # %bb.0:
   3560 ; SKYLAKE-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
   3561 ; SKYLAKE-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [4:0.50]
   3562 ; SKYLAKE-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
   3563 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3564 ;
   3565 ; SKYLAKE-LABEL: test_movups:
   3566 ; SKYLAKE:       # %bb.0:
   3567 ; SKYLAKE-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
   3568 ; SKYLAKE-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   3569 ; SKYLAKE-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
   3570 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3571 ;
   3572 ; SKX-SSE-LABEL: test_movups:
   3573 ; SKX-SSE:       # %bb.0:
   3574 ; SKX-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [6:0.50]
   3575 ; SKX-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [4:0.50]
   3576 ; SKX-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
   3577 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3578 ;
   3579 ; SKX-LABEL: test_movups:
   3580 ; SKX:       # %bb.0:
   3581 ; SKX-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
   3582 ; SKX-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
   3583 ; SKX-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
   3584 ; SKX-NEXT:    retq # sched: [7:1.00]
   3585 ;
   3586 ; BTVER2-SSE-LABEL: test_movups:
   3587 ; BTVER2-SSE:       # %bb.0:
   3588 ; BTVER2-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [5:1.00]
   3589 ; BTVER2-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   3590 ; BTVER2-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:1.00]
   3591 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3592 ;
   3593 ; BTVER2-LABEL: test_movups:
   3594 ; BTVER2:       # %bb.0:
   3595 ; BTVER2-NEXT:    vmovups (%rdi), %xmm0 # sched: [5:1.00]
   3596 ; BTVER2-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3597 ; BTVER2-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
   3598 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3599 ;
   3600 ; ZNVER1-SSE-LABEL: test_movups:
   3601 ; ZNVER1-SSE:       # %bb.0:
   3602 ; ZNVER1-SSE-NEXT:    movups (%rdi), %xmm0 # sched: [8:0.50]
   3603 ; ZNVER1-SSE-NEXT:    addps %xmm0, %xmm0 # sched: [3:1.00]
   3604 ; ZNVER1-SSE-NEXT:    movups %xmm0, (%rsi) # sched: [1:0.50]
   3605 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3606 ;
   3607 ; ZNVER1-LABEL: test_movups:
   3608 ; ZNVER1:       # %bb.0:
   3609 ; ZNVER1-NEXT:    vmovups (%rdi), %xmm0 # sched: [8:0.50]
   3610 ; ZNVER1-NEXT:    vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
   3611 ; ZNVER1-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:0.50]
   3612 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3613   %1 = load <4 x float>, <4 x float> *%a0, align 1
   3614   %2 = fadd <4 x float> %1, %1
   3615   store <4 x float> %2, <4 x float> *%a1, align 1
   3616   ret void
   3617 }
   3618 
   3619 define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   3620 ; GENERIC-LABEL: test_mulps:
   3621 ; GENERIC:       # %bb.0:
   3622 ; GENERIC-NEXT:    mulps %xmm1, %xmm0 # sched: [5:1.00]
   3623 ; GENERIC-NEXT:    mulps (%rdi), %xmm0 # sched: [11:1.00]
   3624 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3625 ;
   3626 ; ATOM-LABEL: test_mulps:
   3627 ; ATOM:       # %bb.0:
   3628 ; ATOM-NEXT:    mulps %xmm1, %xmm0 # sched: [5:5.00]
   3629 ; ATOM-NEXT:    mulps (%rdi), %xmm0 # sched: [5:5.00]
   3630 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3631 ;
   3632 ; SLM-LABEL: test_mulps:
   3633 ; SLM:       # %bb.0:
   3634 ; SLM-NEXT:    mulps %xmm1, %xmm0 # sched: [5:2.00]
   3635 ; SLM-NEXT:    mulps (%rdi), %xmm0 # sched: [8:2.00]
   3636 ; SLM-NEXT:    retq # sched: [4:1.00]
   3637 ;
   3638 ; SANDY-SSE-LABEL: test_mulps:
   3639 ; SANDY-SSE:       # %bb.0:
   3640 ; SANDY-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [5:1.00]
   3641 ; SANDY-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [11:1.00]
   3642 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3643 ;
   3644 ; SANDY-LABEL: test_mulps:
   3645 ; SANDY:       # %bb.0:
   3646 ; SANDY-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   3647 ; SANDY-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   3648 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3649 ;
   3650 ; HASWELL-SSE-LABEL: test_mulps:
   3651 ; HASWELL-SSE:       # %bb.0:
   3652 ; HASWELL-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [5:0.50]
   3653 ; HASWELL-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [11:0.50]
   3654 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3655 ;
   3656 ; HASWELL-LABEL: test_mulps:
   3657 ; HASWELL:       # %bb.0:
   3658 ; HASWELL-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
   3659 ; HASWELL-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
   3660 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3661 ;
   3662 ; BROADWELL-SSE-LABEL: test_mulps:
   3663 ; BROADWELL-SSE:       # %bb.0:
   3664 ; BROADWELL-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [3:0.50]
   3665 ; BROADWELL-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [8:0.50]
   3666 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3667 ;
   3668 ; BROADWELL-LABEL: test_mulps:
   3669 ; BROADWELL:       # %bb.0:
   3670 ; BROADWELL-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
   3671 ; BROADWELL-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   3672 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3673 ;
   3674 ; SKYLAKE-SSE-LABEL: test_mulps:
   3675 ; SKYLAKE-SSE:       # %bb.0:
   3676 ; SKYLAKE-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [4:0.50]
   3677 ; SKYLAKE-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [10:0.50]
   3678 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3679 ;
   3680 ; SKYLAKE-LABEL: test_mulps:
   3681 ; SKYLAKE:       # %bb.0:
   3682 ; SKYLAKE-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3683 ; SKYLAKE-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   3684 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3685 ;
   3686 ; SKX-SSE-LABEL: test_mulps:
   3687 ; SKX-SSE:       # %bb.0:
   3688 ; SKX-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [4:0.50]
   3689 ; SKX-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [10:0.50]
   3690 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3691 ;
   3692 ; SKX-LABEL: test_mulps:
   3693 ; SKX:       # %bb.0:
   3694 ; SKX-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3695 ; SKX-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   3696 ; SKX-NEXT:    retq # sched: [7:1.00]
   3697 ;
   3698 ; BTVER2-SSE-LABEL: test_mulps:
   3699 ; BTVER2-SSE:       # %bb.0:
   3700 ; BTVER2-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [2:1.00]
   3701 ; BTVER2-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [7:1.00]
   3702 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3703 ;
   3704 ; BTVER2-LABEL: test_mulps:
   3705 ; BTVER2:       # %bb.0:
   3706 ; BTVER2-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   3707 ; BTVER2-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3708 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3709 ;
   3710 ; ZNVER1-SSE-LABEL: test_mulps:
   3711 ; ZNVER1-SSE:       # %bb.0:
   3712 ; ZNVER1-SSE-NEXT:    mulps %xmm1, %xmm0 # sched: [3:0.50]
   3713 ; ZNVER1-SSE-NEXT:    mulps (%rdi), %xmm0 # sched: [10:0.50]
   3714 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3715 ;
   3716 ; ZNVER1-LABEL: test_mulps:
   3717 ; ZNVER1:       # %bb.0:
   3718 ; ZNVER1-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
   3719 ; ZNVER1-NEXT:    vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   3720 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3721   %1 = fmul <4 x float> %a0, %a1
   3722   %2 = load <4 x float>, <4 x float> *%a2, align 16
   3723   %3 = fmul <4 x float> %1, %2
   3724   ret <4 x float> %3
   3725 }
   3726 
   3727 define float @test_mulss(float %a0, float %a1, float *%a2) {
   3728 ; GENERIC-LABEL: test_mulss:
   3729 ; GENERIC:       # %bb.0:
   3730 ; GENERIC-NEXT:    mulss %xmm1, %xmm0 # sched: [5:1.00]
   3731 ; GENERIC-NEXT:    mulss (%rdi), %xmm0 # sched: [11:1.00]
   3732 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3733 ;
   3734 ; ATOM-LABEL: test_mulss:
   3735 ; ATOM:       # %bb.0:
   3736 ; ATOM-NEXT:    mulss %xmm1, %xmm0 # sched: [4:4.00]
   3737 ; ATOM-NEXT:    mulss (%rdi), %xmm0 # sched: [4:4.00]
   3738 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3739 ;
   3740 ; SLM-LABEL: test_mulss:
   3741 ; SLM:       # %bb.0:
   3742 ; SLM-NEXT:    mulss %xmm1, %xmm0 # sched: [5:2.00]
   3743 ; SLM-NEXT:    mulss (%rdi), %xmm0 # sched: [8:2.00]
   3744 ; SLM-NEXT:    retq # sched: [4:1.00]
   3745 ;
   3746 ; SANDY-SSE-LABEL: test_mulss:
   3747 ; SANDY-SSE:       # %bb.0:
   3748 ; SANDY-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [5:1.00]
   3749 ; SANDY-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [11:1.00]
   3750 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3751 ;
   3752 ; SANDY-LABEL: test_mulss:
   3753 ; SANDY:       # %bb.0:
   3754 ; SANDY-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
   3755 ; SANDY-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
   3756 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3757 ;
   3758 ; HASWELL-SSE-LABEL: test_mulss:
   3759 ; HASWELL-SSE:       # %bb.0:
   3760 ; HASWELL-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [5:0.50]
   3761 ; HASWELL-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [10:0.50]
   3762 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3763 ;
   3764 ; HASWELL-LABEL: test_mulss:
   3765 ; HASWELL:       # %bb.0:
   3766 ; HASWELL-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
   3767 ; HASWELL-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   3768 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3769 ;
   3770 ; BROADWELL-SSE-LABEL: test_mulss:
   3771 ; BROADWELL-SSE:       # %bb.0:
   3772 ; BROADWELL-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [3:0.50]
   3773 ; BROADWELL-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [8:0.50]
   3774 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3775 ;
   3776 ; BROADWELL-LABEL: test_mulss:
   3777 ; BROADWELL:       # %bb.0:
   3778 ; BROADWELL-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
   3779 ; BROADWELL-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   3780 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3781 ;
   3782 ; SKYLAKE-SSE-LABEL: test_mulss:
   3783 ; SKYLAKE-SSE:       # %bb.0:
   3784 ; SKYLAKE-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [4:0.50]
   3785 ; SKYLAKE-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [9:0.50]
   3786 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3787 ;
   3788 ; SKYLAKE-LABEL: test_mulss:
   3789 ; SKYLAKE:       # %bb.0:
   3790 ; SKYLAKE-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3791 ; SKYLAKE-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   3792 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3793 ;
   3794 ; SKX-SSE-LABEL: test_mulss:
   3795 ; SKX-SSE:       # %bb.0:
   3796 ; SKX-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [4:0.50]
   3797 ; SKX-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [9:0.50]
   3798 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3799 ;
   3800 ; SKX-LABEL: test_mulss:
   3801 ; SKX:       # %bb.0:
   3802 ; SKX-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3803 ; SKX-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   3804 ; SKX-NEXT:    retq # sched: [7:1.00]
   3805 ;
   3806 ; BTVER2-SSE-LABEL: test_mulss:
   3807 ; BTVER2-SSE:       # %bb.0:
   3808 ; BTVER2-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [2:1.00]
   3809 ; BTVER2-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [7:1.00]
   3810 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3811 ;
   3812 ; BTVER2-LABEL: test_mulss:
   3813 ; BTVER2:       # %bb.0:
   3814 ; BTVER2-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
   3815 ; BTVER2-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3816 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3817 ;
   3818 ; ZNVER1-SSE-LABEL: test_mulss:
   3819 ; ZNVER1-SSE:       # %bb.0:
   3820 ; ZNVER1-SSE-NEXT:    mulss %xmm1, %xmm0 # sched: [3:0.50]
   3821 ; ZNVER1-SSE-NEXT:    mulss (%rdi), %xmm0 # sched: [10:0.50]
   3822 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3823 ;
   3824 ; ZNVER1-LABEL: test_mulss:
   3825 ; ZNVER1:       # %bb.0:
   3826 ; ZNVER1-NEXT:    vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
   3827 ; ZNVER1-NEXT:    vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   3828 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3829   %1 = fmul float %a0, %a1
   3830   %2 = load float, float *%a2, align 4
   3831   %3 = fmul float %1, %2
   3832   ret float %3
   3833 }
   3834 
   3835 define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   3836 ; GENERIC-LABEL: test_orps:
   3837 ; GENERIC:       # %bb.0:
   3838 ; GENERIC-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
   3839 ; GENERIC-NEXT:    orps (%rdi), %xmm0 # sched: [7:1.00]
   3840 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3841 ;
   3842 ; ATOM-LABEL: test_orps:
   3843 ; ATOM:       # %bb.0:
   3844 ; ATOM-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.50]
   3845 ; ATOM-NEXT:    orps (%rdi), %xmm0 # sched: [1:1.00]
   3846 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3847 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3848 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3849 ; ATOM-NEXT:    nop # sched: [1:0.50]
   3850 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3851 ;
   3852 ; SLM-LABEL: test_orps:
   3853 ; SLM:       # %bb.0:
   3854 ; SLM-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.50]
   3855 ; SLM-NEXT:    orps (%rdi), %xmm0 # sched: [4:1.00]
   3856 ; SLM-NEXT:    retq # sched: [4:1.00]
   3857 ;
   3858 ; SANDY-SSE-LABEL: test_orps:
   3859 ; SANDY-SSE:       # %bb.0:
   3860 ; SANDY-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
   3861 ; SANDY-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [7:1.00]
   3862 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3863 ;
   3864 ; SANDY-LABEL: test_orps:
   3865 ; SANDY:       # %bb.0:
   3866 ; SANDY-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3867 ; SANDY-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3868 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3869 ;
   3870 ; HASWELL-SSE-LABEL: test_orps:
   3871 ; HASWELL-SSE:       # %bb.0:
   3872 ; HASWELL-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
   3873 ; HASWELL-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [7:1.00]
   3874 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3875 ;
   3876 ; HASWELL-LABEL: test_orps:
   3877 ; HASWELL:       # %bb.0:
   3878 ; HASWELL-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3879 ; HASWELL-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3880 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3881 ;
   3882 ; BROADWELL-SSE-LABEL: test_orps:
   3883 ; BROADWELL-SSE:       # %bb.0:
   3884 ; BROADWELL-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:1.00]
   3885 ; BROADWELL-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [6:1.00]
   3886 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   3887 ;
   3888 ; BROADWELL-LABEL: test_orps:
   3889 ; BROADWELL:       # %bb.0:
   3890 ; BROADWELL-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3891 ; BROADWELL-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   3892 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3893 ;
   3894 ; SKYLAKE-SSE-LABEL: test_orps:
   3895 ; SKYLAKE-SSE:       # %bb.0:
   3896 ; SKYLAKE-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.33]
   3897 ; SKYLAKE-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [7:0.50]
   3898 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   3899 ;
   3900 ; SKYLAKE-LABEL: test_orps:
   3901 ; SKYLAKE:       # %bb.0:
   3902 ; SKYLAKE-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3903 ; SKYLAKE-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3904 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3905 ;
   3906 ; SKX-SSE-LABEL: test_orps:
   3907 ; SKX-SSE:       # %bb.0:
   3908 ; SKX-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.33]
   3909 ; SKX-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [7:0.50]
   3910 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   3911 ;
   3912 ; SKX-LABEL: test_orps:
   3913 ; SKX:       # %bb.0:
   3914 ; SKX-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   3915 ; SKX-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   3916 ; SKX-NEXT:    retq # sched: [7:1.00]
   3917 ;
   3918 ; BTVER2-SSE-LABEL: test_orps:
   3919 ; BTVER2-SSE:       # %bb.0:
   3920 ; BTVER2-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.50]
   3921 ; BTVER2-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [6:1.00]
   3922 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   3923 ;
   3924 ; BTVER2-LABEL: test_orps:
   3925 ; BTVER2:       # %bb.0:
   3926 ; BTVER2-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3927 ; BTVER2-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   3928 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3929 ;
   3930 ; ZNVER1-SSE-LABEL: test_orps:
   3931 ; ZNVER1-SSE:       # %bb.0:
   3932 ; ZNVER1-SSE-NEXT:    orps %xmm1, %xmm0 # sched: [1:0.25]
   3933 ; ZNVER1-SSE-NEXT:    orps (%rdi), %xmm0 # sched: [8:0.50]
   3934 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   3935 ;
   3936 ; ZNVER1-LABEL: test_orps:
   3937 ; ZNVER1:       # %bb.0:
   3938 ; ZNVER1-NEXT:    vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   3939 ; ZNVER1-NEXT:    vorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   3940 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3941   %1 = bitcast <4 x float> %a0 to <4 x i32>
   3942   %2 = bitcast <4 x float> %a1 to <4 x i32>
   3943   %3 = or <4 x i32> %1, %2
   3944   %4 = load <4 x float>, <4 x float> *%a2, align 16
   3945   %5 = bitcast <4 x float> %4 to <4 x i32>
   3946   %6 = or <4 x i32> %3, %5
   3947   %7 = bitcast <4 x i32> %6 to <4 x float>
   3948   ret <4 x float> %7
   3949 }
   3950 
   3951 define void @test_prefetch(i8* %a0) optsize {
   3952 ; GENERIC-LABEL: test_prefetch:
   3953 ; GENERIC:       # %bb.0:
   3954 ; GENERIC-NEXT:    #APP
   3955 ; GENERIC-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   3956 ; GENERIC-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   3957 ; GENERIC-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   3958 ; GENERIC-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   3959 ; GENERIC-NEXT:    #NO_APP
   3960 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3961 ;
   3962 ; ATOM-LABEL: test_prefetch:
   3963 ; ATOM:       # %bb.0:
   3964 ; ATOM-NEXT:    #APP
   3965 ; ATOM-NEXT:    prefetchnta (%rdi) # sched: [1:1.00]
   3966 ; ATOM-NEXT:    prefetcht0 (%rdi) # sched: [1:1.00]
   3967 ; ATOM-NEXT:    prefetcht1 (%rdi) # sched: [1:1.00]
   3968 ; ATOM-NEXT:    prefetcht2 (%rdi) # sched: [1:1.00]
   3969 ; ATOM-NEXT:    #NO_APP
   3970 ; ATOM-NEXT:    retq # sched: [79:39.50]
   3971 ;
   3972 ; SLM-LABEL: test_prefetch:
   3973 ; SLM:       # %bb.0:
   3974 ; SLM-NEXT:    #APP
   3975 ; SLM-NEXT:    prefetchnta (%rdi) # sched: [3:1.00]
   3976 ; SLM-NEXT:    prefetcht0 (%rdi) # sched: [3:1.00]
   3977 ; SLM-NEXT:    prefetcht1 (%rdi) # sched: [3:1.00]
   3978 ; SLM-NEXT:    prefetcht2 (%rdi) # sched: [3:1.00]
   3979 ; SLM-NEXT:    #NO_APP
   3980 ; SLM-NEXT:    retq # sched: [4:1.00]
   3981 ;
   3982 ; SANDY-SSE-LABEL: test_prefetch:
   3983 ; SANDY-SSE:       # %bb.0:
   3984 ; SANDY-SSE-NEXT:    #APP
   3985 ; SANDY-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   3986 ; SANDY-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   3987 ; SANDY-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   3988 ; SANDY-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   3989 ; SANDY-SSE-NEXT:    #NO_APP
   3990 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   3991 ;
   3992 ; SANDY-LABEL: test_prefetch:
   3993 ; SANDY:       # %bb.0:
   3994 ; SANDY-NEXT:    #APP
   3995 ; SANDY-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   3996 ; SANDY-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   3997 ; SANDY-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   3998 ; SANDY-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   3999 ; SANDY-NEXT:    #NO_APP
   4000 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4001 ;
   4002 ; HASWELL-SSE-LABEL: test_prefetch:
   4003 ; HASWELL-SSE:       # %bb.0:
   4004 ; HASWELL-SSE-NEXT:    #APP
   4005 ; HASWELL-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   4006 ; HASWELL-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   4007 ; HASWELL-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   4008 ; HASWELL-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   4009 ; HASWELL-SSE-NEXT:    #NO_APP
   4010 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4011 ;
   4012 ; HASWELL-LABEL: test_prefetch:
   4013 ; HASWELL:       # %bb.0:
   4014 ; HASWELL-NEXT:    #APP
   4015 ; HASWELL-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   4016 ; HASWELL-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   4017 ; HASWELL-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   4018 ; HASWELL-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   4019 ; HASWELL-NEXT:    #NO_APP
   4020 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4021 ;
   4022 ; BROADWELL-SSE-LABEL: test_prefetch:
   4023 ; BROADWELL-SSE:       # %bb.0:
   4024 ; BROADWELL-SSE-NEXT:    #APP
   4025 ; BROADWELL-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   4026 ; BROADWELL-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   4027 ; BROADWELL-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   4028 ; BROADWELL-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   4029 ; BROADWELL-SSE-NEXT:    #NO_APP
   4030 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4031 ;
   4032 ; BROADWELL-LABEL: test_prefetch:
   4033 ; BROADWELL:       # %bb.0:
   4034 ; BROADWELL-NEXT:    #APP
   4035 ; BROADWELL-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   4036 ; BROADWELL-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   4037 ; BROADWELL-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   4038 ; BROADWELL-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   4039 ; BROADWELL-NEXT:    #NO_APP
   4040 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4041 ;
   4042 ; SKYLAKE-SSE-LABEL: test_prefetch:
   4043 ; SKYLAKE-SSE:       # %bb.0:
   4044 ; SKYLAKE-SSE-NEXT:    #APP
   4045 ; SKYLAKE-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   4046 ; SKYLAKE-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   4047 ; SKYLAKE-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   4048 ; SKYLAKE-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   4049 ; SKYLAKE-SSE-NEXT:    #NO_APP
   4050 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4051 ;
   4052 ; SKYLAKE-LABEL: test_prefetch:
   4053 ; SKYLAKE:       # %bb.0:
   4054 ; SKYLAKE-NEXT:    #APP
   4055 ; SKYLAKE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   4056 ; SKYLAKE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   4057 ; SKYLAKE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   4058 ; SKYLAKE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   4059 ; SKYLAKE-NEXT:    #NO_APP
   4060 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4061 ;
   4062 ; SKX-SSE-LABEL: test_prefetch:
   4063 ; SKX-SSE:       # %bb.0:
   4064 ; SKX-SSE-NEXT:    #APP
   4065 ; SKX-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   4066 ; SKX-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   4067 ; SKX-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   4068 ; SKX-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   4069 ; SKX-SSE-NEXT:    #NO_APP
   4070 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4071 ;
   4072 ; SKX-LABEL: test_prefetch:
   4073 ; SKX:       # %bb.0:
   4074 ; SKX-NEXT:    #APP
   4075 ; SKX-NEXT:    prefetchnta (%rdi) # sched: [5:0.50]
   4076 ; SKX-NEXT:    prefetcht0 (%rdi) # sched: [5:0.50]
   4077 ; SKX-NEXT:    prefetcht1 (%rdi) # sched: [5:0.50]
   4078 ; SKX-NEXT:    prefetcht2 (%rdi) # sched: [5:0.50]
   4079 ; SKX-NEXT:    #NO_APP
   4080 ; SKX-NEXT:    retq # sched: [7:1.00]
   4081 ;
   4082 ; BTVER2-SSE-LABEL: test_prefetch:
   4083 ; BTVER2-SSE:       # %bb.0:
   4084 ; BTVER2-SSE-NEXT:    #APP
   4085 ; BTVER2-SSE-NEXT:    prefetchnta (%rdi) # sched: [5:1.00]
   4086 ; BTVER2-SSE-NEXT:    prefetcht0 (%rdi) # sched: [5:1.00]
   4087 ; BTVER2-SSE-NEXT:    prefetcht1 (%rdi) # sched: [5:1.00]
   4088 ; BTVER2-SSE-NEXT:    prefetcht2 (%rdi) # sched: [5:1.00]
   4089 ; BTVER2-SSE-NEXT:    #NO_APP
   4090 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4091 ;
   4092 ; BTVER2-LABEL: test_prefetch:
   4093 ; BTVER2:       # %bb.0:
   4094 ; BTVER2-NEXT:    #APP
   4095 ; BTVER2-NEXT:    prefetchnta (%rdi) # sched: [5:1.00]
   4096 ; BTVER2-NEXT:    prefetcht0 (%rdi) # sched: [5:1.00]
   4097 ; BTVER2-NEXT:    prefetcht1 (%rdi) # sched: [5:1.00]
   4098 ; BTVER2-NEXT:    prefetcht2 (%rdi) # sched: [5:1.00]
   4099 ; BTVER2-NEXT:    #NO_APP
   4100 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4101 ;
   4102 ; ZNVER1-SSE-LABEL: test_prefetch:
   4103 ; ZNVER1-SSE:       # %bb.0:
   4104 ; ZNVER1-SSE-NEXT:    #APP
   4105 ; ZNVER1-SSE-NEXT:    prefetchnta (%rdi) # sched: [8:0.50]
   4106 ; ZNVER1-SSE-NEXT:    prefetcht0 (%rdi) # sched: [8:0.50]
   4107 ; ZNVER1-SSE-NEXT:    prefetcht1 (%rdi) # sched: [8:0.50]
   4108 ; ZNVER1-SSE-NEXT:    prefetcht2 (%rdi) # sched: [8:0.50]
   4109 ; ZNVER1-SSE-NEXT:    #NO_APP
   4110 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4111 ;
   4112 ; ZNVER1-LABEL: test_prefetch:
   4113 ; ZNVER1:       # %bb.0:
   4114 ; ZNVER1-NEXT:    #APP
   4115 ; ZNVER1-NEXT:    prefetchnta (%rdi) # sched: [8:0.50]
   4116 ; ZNVER1-NEXT:    prefetcht0 (%rdi) # sched: [8:0.50]
   4117 ; ZNVER1-NEXT:    prefetcht1 (%rdi) # sched: [8:0.50]
   4118 ; ZNVER1-NEXT:    prefetcht2 (%rdi) # sched: [8:0.50]
   4119 ; ZNVER1-NEXT:    #NO_APP
   4120 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4121   call void asm sideeffect "prefetchnta $0 \0A\09 prefetcht0 $0 \0A\09 prefetcht1 $0 \0A\09 prefetcht2 $0", "*m"(i8 *%a0)
   4122   ret void
   4123 }
   4124 
   4125 define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
   4126 ; GENERIC-LABEL: test_rcpps:
   4127 ; GENERIC:       # %bb.0:
   4128 ; GENERIC-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:1.00]
   4129 ; GENERIC-NEXT:    rcpps (%rdi), %xmm0 # sched: [11:1.00]
   4130 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4131 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4132 ;
   4133 ; ATOM-LABEL: test_rcpps:
   4134 ; ATOM:       # %bb.0:
   4135 ; ATOM-NEXT:    rcpps (%rdi), %xmm1 # sched: [10:5.00]
   4136 ; ATOM-NEXT:    rcpps %xmm0, %xmm0 # sched: [9:4.50]
   4137 ; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
   4138 ; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   4139 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4140 ;
   4141 ; SLM-LABEL: test_rcpps:
   4142 ; SLM:       # %bb.0:
   4143 ; SLM-NEXT:    rcpps (%rdi), %xmm1 # sched: [8:1.00]
   4144 ; SLM-NEXT:    rcpps %xmm0, %xmm0 # sched: [5:1.00]
   4145 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   4146 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   4147 ; SLM-NEXT:    retq # sched: [4:1.00]
   4148 ;
   4149 ; SANDY-SSE-LABEL: test_rcpps:
   4150 ; SANDY-SSE:       # %bb.0:
   4151 ; SANDY-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:1.00]
   4152 ; SANDY-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [11:1.00]
   4153 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4154 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4155 ;
   4156 ; SANDY-LABEL: test_rcpps:
   4157 ; SANDY:       # %bb.0:
   4158 ; SANDY-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
   4159 ; SANDY-NEXT:    vrcpps (%rdi), %xmm1 # sched: [11:1.00]
   4160 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4161 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4162 ;
   4163 ; HASWELL-SSE-LABEL: test_rcpps:
   4164 ; HASWELL-SSE:       # %bb.0:
   4165 ; HASWELL-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:1.00]
   4166 ; HASWELL-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [11:1.00]
   4167 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4168 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4169 ;
   4170 ; HASWELL-LABEL: test_rcpps:
   4171 ; HASWELL:       # %bb.0:
   4172 ; HASWELL-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
   4173 ; HASWELL-NEXT:    vrcpps (%rdi), %xmm1 # sched: [11:1.00]
   4174 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4175 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4176 ;
   4177 ; BROADWELL-SSE-LABEL: test_rcpps:
   4178 ; BROADWELL-SSE:       # %bb.0:
   4179 ; BROADWELL-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:1.00]
   4180 ; BROADWELL-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [10:1.00]
   4181 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4182 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4183 ;
   4184 ; BROADWELL-LABEL: test_rcpps:
   4185 ; BROADWELL:       # %bb.0:
   4186 ; BROADWELL-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:1.00]
   4187 ; BROADWELL-NEXT:    vrcpps (%rdi), %xmm1 # sched: [10:1.00]
   4188 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4189 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4190 ;
   4191 ; SKYLAKE-SSE-LABEL: test_rcpps:
   4192 ; SKYLAKE-SSE:       # %bb.0:
   4193 ; SKYLAKE-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [4:1.00]
   4194 ; SKYLAKE-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [10:1.00]
   4195 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4196 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4197 ;
   4198 ; SKYLAKE-LABEL: test_rcpps:
   4199 ; SKYLAKE:       # %bb.0:
   4200 ; SKYLAKE-NEXT:    vrcpps %xmm0, %xmm0 # sched: [4:1.00]
   4201 ; SKYLAKE-NEXT:    vrcpps (%rdi), %xmm1 # sched: [10:1.00]
   4202 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4203 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4204 ;
   4205 ; SKX-SSE-LABEL: test_rcpps:
   4206 ; SKX-SSE:       # %bb.0:
   4207 ; SKX-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [4:1.00]
   4208 ; SKX-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [10:1.00]
   4209 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4210 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4211 ;
   4212 ; SKX-LABEL: test_rcpps:
   4213 ; SKX:       # %bb.0:
   4214 ; SKX-NEXT:    vrcpps %xmm0, %xmm0 # sched: [4:1.00]
   4215 ; SKX-NEXT:    vrcpps (%rdi), %xmm1 # sched: [10:1.00]
   4216 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4217 ; SKX-NEXT:    retq # sched: [7:1.00]
   4218 ;
   4219 ; BTVER2-SSE-LABEL: test_rcpps:
   4220 ; BTVER2-SSE:       # %bb.0:
   4221 ; BTVER2-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [2:1.00]
   4222 ; BTVER2-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [7:1.00]
   4223 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4224 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4225 ;
   4226 ; BTVER2-LABEL: test_rcpps:
   4227 ; BTVER2:       # %bb.0:
   4228 ; BTVER2-NEXT:    vrcpps (%rdi), %xmm1 # sched: [7:1.00]
   4229 ; BTVER2-NEXT:    vrcpps %xmm0, %xmm0 # sched: [2:1.00]
   4230 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4231 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4232 ;
   4233 ; ZNVER1-SSE-LABEL: test_rcpps:
   4234 ; ZNVER1-SSE:       # %bb.0:
   4235 ; ZNVER1-SSE-NEXT:    rcpps %xmm0, %xmm1 # sched: [5:0.50]
   4236 ; ZNVER1-SSE-NEXT:    rcpps (%rdi), %xmm0 # sched: [12:0.50]
   4237 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4238 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4239 ;
   4240 ; ZNVER1-LABEL: test_rcpps:
   4241 ; ZNVER1:       # %bb.0:
   4242 ; ZNVER1-NEXT:    vrcpps (%rdi), %xmm1 # sched: [12:0.50]
   4243 ; ZNVER1-NEXT:    vrcpps %xmm0, %xmm0 # sched: [5:0.50]
   4244 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4245 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4246   %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
   4247   %2 = load <4 x float>, <4 x float> *%a1, align 16
   4248   %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2)
   4249   %4 = fadd <4 x float> %1, %3
   4250   ret <4 x float> %4
   4251 }
   4252 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
   4253 
   4254 ; TODO - rcpss_m
   4255 
   4256 define <4 x float> @test_rcpss(float %a0, float *%a1) {
   4257 ; GENERIC-LABEL: test_rcpss:
   4258 ; GENERIC:       # %bb.0:
   4259 ; GENERIC-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:1.00]
   4260 ; GENERIC-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
   4261 ; GENERIC-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:1.00]
   4262 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4263 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4264 ;
   4265 ; ATOM-LABEL: test_rcpss:
   4266 ; ATOM:       # %bb.0:
   4267 ; ATOM-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
   4268 ; ATOM-NEXT:    rcpss %xmm0, %xmm0 # sched: [4:4.00]
   4269 ; ATOM-NEXT:    rcpss %xmm1, %xmm1 # sched: [4:4.00]
   4270 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   4271 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4272 ;
   4273 ; SLM-LABEL: test_rcpss:
   4274 ; SLM:       # %bb.0:
   4275 ; SLM-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
   4276 ; SLM-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:1.00]
   4277 ; SLM-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:1.00]
   4278 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4279 ; SLM-NEXT:    retq # sched: [4:1.00]
   4280 ;
   4281 ; SANDY-SSE-LABEL: test_rcpss:
   4282 ; SANDY-SSE:       # %bb.0:
   4283 ; SANDY-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:1.00]
   4284 ; SANDY-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
   4285 ; SANDY-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:1.00]
   4286 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4287 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4288 ;
   4289 ; SANDY-LABEL: test_rcpss:
   4290 ; SANDY:       # %bb.0:
   4291 ; SANDY-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   4292 ; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
   4293 ; SANDY-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   4294 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4295 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4296 ;
   4297 ; HASWELL-SSE-LABEL: test_rcpss:
   4298 ; HASWELL-SSE:       # %bb.0:
   4299 ; HASWELL-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:1.00]
   4300 ; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4301 ; HASWELL-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:1.00]
   4302 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4303 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4304 ;
   4305 ; HASWELL-LABEL: test_rcpss:
   4306 ; HASWELL:       # %bb.0:
   4307 ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   4308 ; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4309 ; HASWELL-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   4310 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4311 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4312 ;
   4313 ; BROADWELL-SSE-LABEL: test_rcpss:
   4314 ; BROADWELL-SSE:       # %bb.0:
   4315 ; BROADWELL-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:1.00]
   4316 ; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4317 ; BROADWELL-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:1.00]
   4318 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4319 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4320 ;
   4321 ; BROADWELL-LABEL: test_rcpss:
   4322 ; BROADWELL:       # %bb.0:
   4323 ; BROADWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   4324 ; BROADWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4325 ; BROADWELL-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   4326 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4327 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4328 ;
   4329 ; SKYLAKE-SSE-LABEL: test_rcpss:
   4330 ; SKYLAKE-SSE:       # %bb.0:
   4331 ; SKYLAKE-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [4:1.00]
   4332 ; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4333 ; SKYLAKE-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [4:1.00]
   4334 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4335 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4336 ;
   4337 ; SKYLAKE-LABEL: test_rcpss:
   4338 ; SKYLAKE:       # %bb.0:
   4339 ; SKYLAKE-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   4340 ; SKYLAKE-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4341 ; SKYLAKE-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
   4342 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4343 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4344 ;
   4345 ; SKX-SSE-LABEL: test_rcpss:
   4346 ; SKX-SSE:       # %bb.0:
   4347 ; SKX-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [4:1.00]
   4348 ; SKX-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4349 ; SKX-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [4:1.00]
   4350 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4351 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4352 ;
   4353 ; SKX-LABEL: test_rcpss:
   4354 ; SKX:       # %bb.0:
   4355 ; SKX-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   4356 ; SKX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4357 ; SKX-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
   4358 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4359 ; SKX-NEXT:    retq # sched: [7:1.00]
   4360 ;
   4361 ; BTVER2-SSE-LABEL: test_rcpss:
   4362 ; BTVER2-SSE:       # %bb.0:
   4363 ; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
   4364 ; BTVER2-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [2:1.00]
   4365 ; BTVER2-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [2:1.00]
   4366 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4367 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4368 ;
   4369 ; BTVER2-LABEL: test_rcpss:
   4370 ; BTVER2:       # %bb.0:
   4371 ; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
   4372 ; BTVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
   4373 ; BTVER2-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
   4374 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4375 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4376 ;
   4377 ; ZNVER1-SSE-LABEL: test_rcpss:
   4378 ; ZNVER1-SSE:       # %bb.0:
   4379 ; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
   4380 ; ZNVER1-SSE-NEXT:    rcpss %xmm0, %xmm0 # sched: [5:0.50]
   4381 ; ZNVER1-SSE-NEXT:    rcpss %xmm1, %xmm1 # sched: [5:0.50]
   4382 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4383 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4384 ;
   4385 ; ZNVER1-LABEL: test_rcpss:
   4386 ; ZNVER1:       # %bb.0:
   4387 ; ZNVER1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
   4388 ; ZNVER1-NEXT:    vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:0.50]
   4389 ; ZNVER1-NEXT:    vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:0.50]
   4390 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4391 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4392   %1 = insertelement <4 x float> undef, float %a0, i32 0
   4393   %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1)
   4394   %3 = load float, float *%a1, align 4
   4395   %4 = insertelement <4 x float> undef, float %3, i32 0
   4396   %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
   4397   %6 = fadd <4 x float> %2, %5
   4398   ret <4 x float> %6
   4399 }
   4400 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
   4401 
   4402 define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
   4403 ; GENERIC-LABEL: test_rsqrtps:
   4404 ; GENERIC:       # %bb.0:
   4405 ; GENERIC-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
   4406 ; GENERIC-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
   4407 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4408 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4409 ;
   4410 ; ATOM-LABEL: test_rsqrtps:
   4411 ; ATOM:       # %bb.0:
   4412 ; ATOM-NEXT:    rsqrtps (%rdi), %xmm1 # sched: [10:5.00]
   4413 ; ATOM-NEXT:    rsqrtps %xmm0, %xmm0 # sched: [9:4.50]
   4414 ; ATOM-NEXT:    addps %xmm0, %xmm1 # sched: [5:5.00]
   4415 ; ATOM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   4416 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4417 ;
   4418 ; SLM-LABEL: test_rsqrtps:
   4419 ; SLM:       # %bb.0:
   4420 ; SLM-NEXT:    rsqrtps (%rdi), %xmm1 # sched: [8:1.00]
   4421 ; SLM-NEXT:    rsqrtps %xmm0, %xmm0 # sched: [5:1.00]
   4422 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   4423 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   4424 ; SLM-NEXT:    retq # sched: [4:1.00]
   4425 ;
   4426 ; SANDY-SSE-LABEL: test_rsqrtps:
   4427 ; SANDY-SSE:       # %bb.0:
   4428 ; SANDY-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
   4429 ; SANDY-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
   4430 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4431 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4432 ;
   4433 ; SANDY-LABEL: test_rsqrtps:
   4434 ; SANDY:       # %bb.0:
   4435 ; SANDY-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
   4436 ; SANDY-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
   4437 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4438 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4439 ;
   4440 ; HASWELL-SSE-LABEL: test_rsqrtps:
   4441 ; HASWELL-SSE:       # %bb.0:
   4442 ; HASWELL-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
   4443 ; HASWELL-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
   4444 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4445 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4446 ;
   4447 ; HASWELL-LABEL: test_rsqrtps:
   4448 ; HASWELL:       # %bb.0:
   4449 ; HASWELL-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
   4450 ; HASWELL-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
   4451 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4452 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4453 ;
   4454 ; BROADWELL-SSE-LABEL: test_rsqrtps:
   4455 ; BROADWELL-SSE:       # %bb.0:
   4456 ; BROADWELL-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
   4457 ; BROADWELL-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
   4458 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4459 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4460 ;
   4461 ; BROADWELL-LABEL: test_rsqrtps:
   4462 ; BROADWELL:       # %bb.0:
   4463 ; BROADWELL-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
   4464 ; BROADWELL-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
   4465 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4466 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4467 ;
   4468 ; SKYLAKE-SSE-LABEL: test_rsqrtps:
   4469 ; SKYLAKE-SSE:       # %bb.0:
   4470 ; SKYLAKE-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
   4471 ; SKYLAKE-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
   4472 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4473 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4474 ;
   4475 ; SKYLAKE-LABEL: test_rsqrtps:
   4476 ; SKYLAKE:       # %bb.0:
   4477 ; SKYLAKE-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
   4478 ; SKYLAKE-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
   4479 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4480 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4481 ;
   4482 ; SKX-SSE-LABEL: test_rsqrtps:
   4483 ; SKX-SSE:       # %bb.0:
   4484 ; SKX-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
   4485 ; SKX-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
   4486 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4487 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4488 ;
   4489 ; SKX-LABEL: test_rsqrtps:
   4490 ; SKX:       # %bb.0:
   4491 ; SKX-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
   4492 ; SKX-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
   4493 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4494 ; SKX-NEXT:    retq # sched: [7:1.00]
   4495 ;
   4496 ; BTVER2-SSE-LABEL: test_rsqrtps:
   4497 ; BTVER2-SSE:       # %bb.0:
   4498 ; BTVER2-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [2:1.00]
   4499 ; BTVER2-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [7:1.00]
   4500 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4501 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4502 ;
   4503 ; BTVER2-LABEL: test_rsqrtps:
   4504 ; BTVER2:       # %bb.0:
   4505 ; BTVER2-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
   4506 ; BTVER2-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [2:1.00]
   4507 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4508 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4509 ;
   4510 ; ZNVER1-SSE-LABEL: test_rsqrtps:
   4511 ; ZNVER1-SSE:       # %bb.0:
   4512 ; ZNVER1-SSE-NEXT:    rsqrtps %xmm0, %xmm1 # sched: [5:0.50]
   4513 ; ZNVER1-SSE-NEXT:    rsqrtps (%rdi), %xmm0 # sched: [12:0.50]
   4514 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4515 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4516 ;
   4517 ; ZNVER1-LABEL: test_rsqrtps:
   4518 ; ZNVER1:       # %bb.0:
   4519 ; ZNVER1-NEXT:    vrsqrtps (%rdi), %xmm1 # sched: [12:0.50]
   4520 ; ZNVER1-NEXT:    vrsqrtps %xmm0, %xmm0 # sched: [5:0.50]
   4521 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4522 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4523   %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
   4524   %2 = load <4 x float>, <4 x float> *%a1, align 16
   4525   %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2)
   4526   %4 = fadd <4 x float> %1, %3
   4527   ret <4 x float> %4
   4528 }
   4529 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
   4530 
   4531 ; TODO - rsqrtss_m
   4532 
   4533 define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
   4534 ; GENERIC-LABEL: test_rsqrtss:
   4535 ; GENERIC:       # %bb.0:
   4536 ; GENERIC-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
   4537 ; GENERIC-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
   4538 ; GENERIC-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
   4539 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4540 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4541 ;
   4542 ; ATOM-LABEL: test_rsqrtss:
   4543 ; ATOM:       # %bb.0:
   4544 ; ATOM-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
   4545 ; ATOM-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [4:4.00]
   4546 ; ATOM-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [4:4.00]
   4547 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   4548 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4549 ;
   4550 ; SLM-LABEL: test_rsqrtss:
   4551 ; SLM:       # %bb.0:
   4552 ; SLM-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
   4553 ; SLM-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
   4554 ; SLM-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
   4555 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4556 ; SLM-NEXT:    retq # sched: [4:1.00]
   4557 ;
   4558 ; SANDY-SSE-LABEL: test_rsqrtss:
   4559 ; SANDY-SSE:       # %bb.0:
   4560 ; SANDY-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
   4561 ; SANDY-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
   4562 ; SANDY-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
   4563 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4564 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4565 ;
   4566 ; SANDY-LABEL: test_rsqrtss:
   4567 ; SANDY:       # %bb.0:
   4568 ; SANDY-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   4569 ; SANDY-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
   4570 ; SANDY-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   4571 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4572 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4573 ;
   4574 ; HASWELL-SSE-LABEL: test_rsqrtss:
   4575 ; HASWELL-SSE:       # %bb.0:
   4576 ; HASWELL-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
   4577 ; HASWELL-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4578 ; HASWELL-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
   4579 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4580 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4581 ;
   4582 ; HASWELL-LABEL: test_rsqrtss:
   4583 ; HASWELL:       # %bb.0:
   4584 ; HASWELL-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   4585 ; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4586 ; HASWELL-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   4587 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4588 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4589 ;
   4590 ; BROADWELL-SSE-LABEL: test_rsqrtss:
   4591 ; BROADWELL-SSE:       # %bb.0:
   4592 ; BROADWELL-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
   4593 ; BROADWELL-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4594 ; BROADWELL-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
   4595 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4596 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4597 ;
   4598 ; BROADWELL-LABEL: test_rsqrtss:
   4599 ; BROADWELL:       # %bb.0:
   4600 ; BROADWELL-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
   4601 ; BROADWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4602 ; BROADWELL-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
   4603 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4604 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4605 ;
   4606 ; SKYLAKE-SSE-LABEL: test_rsqrtss:
   4607 ; SKYLAKE-SSE:       # %bb.0:
   4608 ; SKYLAKE-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
   4609 ; SKYLAKE-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4610 ; SKYLAKE-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
   4611 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4612 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4613 ;
   4614 ; SKYLAKE-LABEL: test_rsqrtss:
   4615 ; SKYLAKE:       # %bb.0:
   4616 ; SKYLAKE-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   4617 ; SKYLAKE-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4618 ; SKYLAKE-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
   4619 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4620 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4621 ;
   4622 ; SKX-SSE-LABEL: test_rsqrtss:
   4623 ; SKX-SSE:       # %bb.0:
   4624 ; SKX-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
   4625 ; SKX-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4626 ; SKX-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
   4627 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4628 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4629 ;
   4630 ; SKX-LABEL: test_rsqrtss:
   4631 ; SKX:       # %bb.0:
   4632 ; SKX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
   4633 ; SKX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
   4634 ; SKX-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
   4635 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4636 ; SKX-NEXT:    retq # sched: [7:1.00]
   4637 ;
   4638 ; BTVER2-SSE-LABEL: test_rsqrtss:
   4639 ; BTVER2-SSE:       # %bb.0:
   4640 ; BTVER2-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
   4641 ; BTVER2-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [2:1.00]
   4642 ; BTVER2-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [2:1.00]
   4643 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4644 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4645 ;
   4646 ; BTVER2-LABEL: test_rsqrtss:
   4647 ; BTVER2:       # %bb.0:
   4648 ; BTVER2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
   4649 ; BTVER2-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
   4650 ; BTVER2-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
   4651 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4652 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4653 ;
   4654 ; ZNVER1-SSE-LABEL: test_rsqrtss:
   4655 ; ZNVER1-SSE:       # %bb.0:
   4656 ; ZNVER1-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
   4657 ; ZNVER1-SSE-NEXT:    rsqrtss %xmm0, %xmm0 # sched: [5:0.50]
   4658 ; ZNVER1-SSE-NEXT:    rsqrtss %xmm1, %xmm1 # sched: [5:0.50]
   4659 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4660 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4661 ;
   4662 ; ZNVER1-LABEL: test_rsqrtss:
   4663 ; ZNVER1:       # %bb.0:
   4664 ; ZNVER1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
   4665 ; ZNVER1-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:0.50]
   4666 ; ZNVER1-NEXT:    vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:0.50]
   4667 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4668 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4669   %1 = insertelement <4 x float> undef, float %a0, i32 0
   4670   %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1)
   4671   %3 = load float, float *%a1, align 4
   4672   %4 = insertelement <4 x float> undef, float %3, i32 0
   4673   %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
   4674   %6 = fadd <4 x float> %2, %5
   4675   ret <4 x float> %6
   4676 }
   4677 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
   4678 
   4679 define void @test_sfence() {
   4680 ; GENERIC-LABEL: test_sfence:
   4681 ; GENERIC:       # %bb.0:
   4682 ; GENERIC-NEXT:    sfence # sched: [1:1.00]
   4683 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4684 ;
   4685 ; ATOM-LABEL: test_sfence:
   4686 ; ATOM:       # %bb.0:
   4687 ; ATOM-NEXT:    sfence # sched: [1:1.00]
   4688 ; ATOM-NEXT:    nop # sched: [1:0.50]
   4689 ; ATOM-NEXT:    nop # sched: [1:0.50]
   4690 ; ATOM-NEXT:    nop # sched: [1:0.50]
   4691 ; ATOM-NEXT:    nop # sched: [1:0.50]
   4692 ; ATOM-NEXT:    nop # sched: [1:0.50]
   4693 ; ATOM-NEXT:    nop # sched: [1:0.50]
   4694 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4695 ;
   4696 ; SLM-LABEL: test_sfence:
   4697 ; SLM:       # %bb.0:
   4698 ; SLM-NEXT:    sfence # sched: [1:1.00]
   4699 ; SLM-NEXT:    retq # sched: [4:1.00]
   4700 ;
   4701 ; SANDY-SSE-LABEL: test_sfence:
   4702 ; SANDY-SSE:       # %bb.0:
   4703 ; SANDY-SSE-NEXT:    sfence # sched: [1:1.00]
   4704 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4705 ;
   4706 ; SANDY-LABEL: test_sfence:
   4707 ; SANDY:       # %bb.0:
   4708 ; SANDY-NEXT:    sfence # sched: [1:1.00]
   4709 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4710 ;
   4711 ; HASWELL-SSE-LABEL: test_sfence:
   4712 ; HASWELL-SSE:       # %bb.0:
   4713 ; HASWELL-SSE-NEXT:    sfence # sched: [2:0.33]
   4714 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4715 ;
   4716 ; HASWELL-LABEL: test_sfence:
   4717 ; HASWELL:       # %bb.0:
   4718 ; HASWELL-NEXT:    sfence # sched: [2:0.33]
   4719 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4720 ;
   4721 ; BROADWELL-SSE-LABEL: test_sfence:
   4722 ; BROADWELL-SSE:       # %bb.0:
   4723 ; BROADWELL-SSE-NEXT:    sfence # sched: [2:0.33]
   4724 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4725 ;
   4726 ; BROADWELL-LABEL: test_sfence:
   4727 ; BROADWELL:       # %bb.0:
   4728 ; BROADWELL-NEXT:    sfence # sched: [2:0.33]
   4729 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4730 ;
   4731 ; SKYLAKE-SSE-LABEL: test_sfence:
   4732 ; SKYLAKE-SSE:       # %bb.0:
   4733 ; SKYLAKE-SSE-NEXT:    sfence # sched: [2:0.33]
   4734 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4735 ;
   4736 ; SKYLAKE-LABEL: test_sfence:
   4737 ; SKYLAKE:       # %bb.0:
   4738 ; SKYLAKE-NEXT:    sfence # sched: [2:0.33]
   4739 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4740 ;
   4741 ; SKX-SSE-LABEL: test_sfence:
   4742 ; SKX-SSE:       # %bb.0:
   4743 ; SKX-SSE-NEXT:    sfence # sched: [2:0.33]
   4744 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4745 ;
   4746 ; SKX-LABEL: test_sfence:
   4747 ; SKX:       # %bb.0:
   4748 ; SKX-NEXT:    sfence # sched: [2:0.33]
   4749 ; SKX-NEXT:    retq # sched: [7:1.00]
   4750 ;
   4751 ; BTVER2-SSE-LABEL: test_sfence:
   4752 ; BTVER2-SSE:       # %bb.0:
   4753 ; BTVER2-SSE-NEXT:    sfence # sched: [1:1.00]
   4754 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4755 ;
   4756 ; BTVER2-LABEL: test_sfence:
   4757 ; BTVER2:       # %bb.0:
   4758 ; BTVER2-NEXT:    sfence # sched: [1:1.00]
   4759 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4760 ;
   4761 ; ZNVER1-SSE-LABEL: test_sfence:
   4762 ; ZNVER1-SSE:       # %bb.0:
   4763 ; ZNVER1-SSE-NEXT:    sfence # sched: [1:0.50]
   4764 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4765 ;
   4766 ; ZNVER1-LABEL: test_sfence:
   4767 ; ZNVER1:       # %bb.0:
   4768 ; ZNVER1-NEXT:    sfence # sched: [1:0.50]
   4769 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4770   call void @llvm.x86.sse.sfence()
   4771   ret void
   4772 }
   4773 declare void @llvm.x86.sse.sfence() nounwind readnone
   4774 
   4775 define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
   4776 ; GENERIC-LABEL: test_shufps:
   4777 ; GENERIC:       # %bb.0:
   4778 ; GENERIC-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4779 ; GENERIC-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
   4780 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4781 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4782 ;
   4783 ; ATOM-LABEL: test_shufps:
   4784 ; ATOM:       # %bb.0:
   4785 ; ATOM-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4786 ; ATOM-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [1:1.00]
   4787 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   4788 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4789 ;
   4790 ; SLM-LABEL: test_shufps:
   4791 ; SLM:       # %bb.0:
   4792 ; SLM-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4793 ; SLM-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [4:1.00]
   4794 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4795 ; SLM-NEXT:    retq # sched: [4:1.00]
   4796 ;
   4797 ; SANDY-SSE-LABEL: test_shufps:
   4798 ; SANDY-SSE:       # %bb.0:
   4799 ; SANDY-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4800 ; SANDY-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
   4801 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4802 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4803 ;
   4804 ; SANDY-LABEL: test_shufps:
   4805 ; SANDY:       # %bb.0:
   4806 ; SANDY-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4807 ; SANDY-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
   4808 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4809 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4810 ;
   4811 ; HASWELL-SSE-LABEL: test_shufps:
   4812 ; HASWELL-SSE:       # %bb.0:
   4813 ; HASWELL-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4814 ; HASWELL-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
   4815 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4816 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4817 ;
   4818 ; HASWELL-LABEL: test_shufps:
   4819 ; HASWELL:       # %bb.0:
   4820 ; HASWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4821 ; HASWELL-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
   4822 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4823 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4824 ;
   4825 ; BROADWELL-SSE-LABEL: test_shufps:
   4826 ; BROADWELL-SSE:       # %bb.0:
   4827 ; BROADWELL-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4828 ; BROADWELL-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
   4829 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4830 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4831 ;
   4832 ; BROADWELL-LABEL: test_shufps:
   4833 ; BROADWELL:       # %bb.0:
   4834 ; BROADWELL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4835 ; BROADWELL-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
   4836 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4837 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4838 ;
   4839 ; SKYLAKE-SSE-LABEL: test_shufps:
   4840 ; SKYLAKE-SSE:       # %bb.0:
   4841 ; SKYLAKE-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4842 ; SKYLAKE-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
   4843 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4844 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4845 ;
   4846 ; SKYLAKE-LABEL: test_shufps:
   4847 ; SKYLAKE:       # %bb.0:
   4848 ; SKYLAKE-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4849 ; SKYLAKE-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
   4850 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4851 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4852 ;
   4853 ; SKX-SSE-LABEL: test_shufps:
   4854 ; SKX-SSE:       # %bb.0:
   4855 ; SKX-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4856 ; SKX-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
   4857 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4858 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4859 ;
   4860 ; SKX-LABEL: test_shufps:
   4861 ; SKX:       # %bb.0:
   4862 ; SKX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
   4863 ; SKX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
   4864 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4865 ; SKX-NEXT:    retq # sched: [7:1.00]
   4866 ;
   4867 ; BTVER2-SSE-LABEL: test_shufps:
   4868 ; BTVER2-SSE:       # %bb.0:
   4869 ; BTVER2-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
   4870 ; BTVER2-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
   4871 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4872 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   4873 ;
   4874 ; BTVER2-LABEL: test_shufps:
   4875 ; BTVER2:       # %bb.0:
   4876 ; BTVER2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
   4877 ; BTVER2-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
   4878 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4879 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4880 ;
   4881 ; ZNVER1-SSE-LABEL: test_shufps:
   4882 ; ZNVER1-SSE:       # %bb.0:
   4883 ; ZNVER1-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
   4884 ; ZNVER1-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50]
   4885 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4886 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   4887 ;
   4888 ; ZNVER1-LABEL: test_shufps:
   4889 ; ZNVER1:       # %bb.0:
   4890 ; ZNVER1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
   4891 ; ZNVER1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50]
   4892 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4893 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4894   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
   4895   %2 = load <4 x float>, <4 x float> *%a2, align 16
   4896   %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
   4897   %4 = fadd <4 x float> %1, %3
   4898   ret <4 x float> %4
   4899 }
   4900 
   4901 define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
   4902 ; GENERIC-LABEL: test_sqrtps:
   4903 ; GENERIC:       # %bb.0:
   4904 ; GENERIC-NEXT:    sqrtps %xmm0, %xmm1 # sched: [14:14.00]
   4905 ; GENERIC-NEXT:    sqrtps (%rdi), %xmm0 # sched: [20:14.00]
   4906 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4907 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4908 ;
   4909 ; ATOM-LABEL: test_sqrtps:
   4910 ; ATOM:       # %bb.0:
   4911 ; ATOM-NEXT:    sqrtps %xmm0, %xmm1 # sched: [70:35.00]
   4912 ; ATOM-NEXT:    sqrtps (%rdi), %xmm0 # sched: [70:35.00]
   4913 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   4914 ; ATOM-NEXT:    retq # sched: [79:39.50]
   4915 ;
   4916 ; SLM-LABEL: test_sqrtps:
   4917 ; SLM:       # %bb.0:
   4918 ; SLM-NEXT:    sqrtps (%rdi), %xmm1 # sched: [44:40.00]
   4919 ; SLM-NEXT:    sqrtps %xmm0, %xmm0 # sched: [41:40.00]
   4920 ; SLM-NEXT:    addps %xmm0, %xmm1 # sched: [3:1.00]
   4921 ; SLM-NEXT:    movaps %xmm1, %xmm0 # sched: [1:0.50]
   4922 ; SLM-NEXT:    retq # sched: [4:1.00]
   4923 ;
   4924 ; SANDY-SSE-LABEL: test_sqrtps:
   4925 ; SANDY-SSE:       # %bb.0:
   4926 ; SANDY-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [14:14.00]
   4927 ; SANDY-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [20:14.00]
   4928 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4929 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   4930 ;
   4931 ; SANDY-LABEL: test_sqrtps:
   4932 ; SANDY:       # %bb.0:
   4933 ; SANDY-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [14:14.00]
   4934 ; SANDY-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [20:14.00]
   4935 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4936 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4937 ;
   4938 ; HASWELL-SSE-LABEL: test_sqrtps:
   4939 ; HASWELL-SSE:       # %bb.0:
   4940 ; HASWELL-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [11:7.00]
   4941 ; HASWELL-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [17:7.00]
   4942 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4943 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4944 ;
   4945 ; HASWELL-LABEL: test_sqrtps:
   4946 ; HASWELL:       # %bb.0:
   4947 ; HASWELL-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [11:7.00]
   4948 ; HASWELL-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [17:7.00]
   4949 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4950 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4951 ;
   4952 ; BROADWELL-SSE-LABEL: test_sqrtps:
   4953 ; BROADWELL-SSE:       # %bb.0:
   4954 ; BROADWELL-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [11:7.00]
   4955 ; BROADWELL-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [16:7.00]
   4956 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4957 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   4958 ;
   4959 ; BROADWELL-LABEL: test_sqrtps:
   4960 ; BROADWELL:       # %bb.0:
   4961 ; BROADWELL-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [11:7.00]
   4962 ; BROADWELL-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [16:7.00]
   4963 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   4964 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4965 ;
   4966 ; SKYLAKE-SSE-LABEL: test_sqrtps:
   4967 ; SKYLAKE-SSE:       # %bb.0:
   4968 ; SKYLAKE-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [12:3.00]
   4969 ; SKYLAKE-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [18:3.00]
   4970 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4971 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   4972 ;
   4973 ; SKYLAKE-LABEL: test_sqrtps:
   4974 ; SKYLAKE:       # %bb.0:
   4975 ; SKYLAKE-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [12:3.00]
   4976 ; SKYLAKE-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [18:3.00]
   4977 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4978 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4979 ;
   4980 ; SKX-SSE-LABEL: test_sqrtps:
   4981 ; SKX-SSE:       # %bb.0:
   4982 ; SKX-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [12:3.00]
   4983 ; SKX-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [18:3.00]
   4984 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   4985 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   4986 ;
   4987 ; SKX-LABEL: test_sqrtps:
   4988 ; SKX:       # %bb.0:
   4989 ; SKX-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [12:3.00]
   4990 ; SKX-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [18:3.00]
   4991 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   4992 ; SKX-NEXT:    retq # sched: [7:1.00]
   4993 ;
   4994 ; BTVER2-SSE-LABEL: test_sqrtps:
   4995 ; BTVER2-SSE:       # %bb.0:
   4996 ; BTVER2-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [21:21.00]
   4997 ; BTVER2-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [26:21.00]
   4998 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   4999 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5000 ;
   5001 ; BTVER2-LABEL: test_sqrtps:
   5002 ; BTVER2:       # %bb.0:
   5003 ; BTVER2-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
   5004 ; BTVER2-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [21:21.00]
   5005 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5006 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5007 ;
   5008 ; ZNVER1-SSE-LABEL: test_sqrtps:
   5009 ; ZNVER1-SSE:       # %bb.0:
   5010 ; ZNVER1-SSE-NEXT:    sqrtps %xmm0, %xmm1 # sched: [20:20.00]
   5011 ; ZNVER1-SSE-NEXT:    sqrtps (%rdi), %xmm0 # sched: [27:20.00]
   5012 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5013 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5014 ;
   5015 ; ZNVER1-LABEL: test_sqrtps:
   5016 ; ZNVER1:       # %bb.0:
   5017 ; ZNVER1-NEXT:    vsqrtps (%rdi), %xmm1 # sched: [27:20.00]
   5018 ; ZNVER1-NEXT:    vsqrtps %xmm0, %xmm0 # sched: [20:20.00]
   5019 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5020 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5021   %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
   5022   %2 = load <4 x float>, <4 x float> *%a1, align 16
   5023   %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2)
   5024   %4 = fadd <4 x float> %1, %3
   5025   ret <4 x float> %4
   5026 }
   5027 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
   5028 
   5029 ; TODO - sqrtss_m
   5030 
   5031 define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
   5032 ; GENERIC-LABEL: test_sqrtss:
   5033 ; GENERIC:       # %bb.0:
   5034 ; GENERIC-NEXT:    sqrtss %xmm0, %xmm0 # sched: [14:14.00]
   5035 ; GENERIC-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
   5036 ; GENERIC-NEXT:    sqrtss %xmm1, %xmm1 # sched: [14:14.00]
   5037 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5038 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5039 ;
   5040 ; ATOM-LABEL: test_sqrtss:
   5041 ; ATOM:       # %bb.0:
   5042 ; ATOM-NEXT:    movaps (%rdi), %xmm1 # sched: [1:1.00]
   5043 ; ATOM-NEXT:    sqrtss %xmm0, %xmm0 # sched: [34:17.00]
   5044 ; ATOM-NEXT:    sqrtss %xmm1, %xmm1 # sched: [34:17.00]
   5045 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   5046 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5047 ;
   5048 ; SLM-LABEL: test_sqrtss:
   5049 ; SLM:       # %bb.0:
   5050 ; SLM-NEXT:    movaps (%rdi), %xmm1 # sched: [3:1.00]
   5051 ; SLM-NEXT:    sqrtss %xmm0, %xmm0 # sched: [20:20.00]
   5052 ; SLM-NEXT:    sqrtss %xmm1, %xmm1 # sched: [20:20.00]
   5053 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5054 ; SLM-NEXT:    retq # sched: [4:1.00]
   5055 ;
   5056 ; SANDY-SSE-LABEL: test_sqrtss:
   5057 ; SANDY-SSE:       # %bb.0:
   5058 ; SANDY-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [14:14.00]
   5059 ; SANDY-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
   5060 ; SANDY-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [14:14.00]
   5061 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5062 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5063 ;
   5064 ; SANDY-LABEL: test_sqrtss:
   5065 ; SANDY:       # %bb.0:
   5066 ; SANDY-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
   5067 ; SANDY-NEXT:    vmovaps (%rdi), %xmm1 # sched: [6:0.50]
   5068 ; SANDY-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:14.00]
   5069 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5070 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5071 ;
   5072 ; HASWELL-SSE-LABEL: test_sqrtss:
   5073 ; HASWELL-SSE:       # %bb.0:
   5074 ; HASWELL-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [11:7.00]
   5075 ; HASWELL-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
   5076 ; HASWELL-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [11:7.00]
   5077 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5078 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5079 ;
   5080 ; HASWELL-LABEL: test_sqrtss:
   5081 ; HASWELL:       # %bb.0:
   5082 ; HASWELL-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:7.00]
   5083 ; HASWELL-NEXT:    vmovaps (%rdi), %xmm1 # sched: [6:0.50]
   5084 ; HASWELL-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:7.00]
   5085 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5086 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5087 ;
   5088 ; BROADWELL-SSE-LABEL: test_sqrtss:
   5089 ; BROADWELL-SSE:       # %bb.0:
   5090 ; BROADWELL-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [11:4.00]
   5091 ; BROADWELL-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [5:0.50]
   5092 ; BROADWELL-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [11:4.00]
   5093 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5094 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5095 ;
   5096 ; BROADWELL-LABEL: test_sqrtss:
   5097 ; BROADWELL:       # %bb.0:
   5098 ; BROADWELL-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:4.00]
   5099 ; BROADWELL-NEXT:    vmovaps (%rdi), %xmm1 # sched: [5:0.50]
   5100 ; BROADWELL-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:4.00]
   5101 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5102 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5103 ;
   5104 ; SKYLAKE-SSE-LABEL: test_sqrtss:
   5105 ; SKYLAKE-SSE:       # %bb.0:
   5106 ; SKYLAKE-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [12:3.00]
   5107 ; SKYLAKE-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
   5108 ; SKYLAKE-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [12:3.00]
   5109 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   5110 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5111 ;
   5112 ; SKYLAKE-LABEL: test_sqrtss:
   5113 ; SKYLAKE:       # %bb.0:
   5114 ; SKYLAKE-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
   5115 ; SKYLAKE-NEXT:    vmovaps (%rdi), %xmm1 # sched: [6:0.50]
   5116 ; SKYLAKE-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00]
   5117 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5118 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5119 ;
   5120 ; SKX-SSE-LABEL: test_sqrtss:
   5121 ; SKX-SSE:       # %bb.0:
   5122 ; SKX-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [12:3.00]
   5123 ; SKX-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [6:0.50]
   5124 ; SKX-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [12:3.00]
   5125 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   5126 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5127 ;
   5128 ; SKX-LABEL: test_sqrtss:
   5129 ; SKX:       # %bb.0:
   5130 ; SKX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
   5131 ; SKX-NEXT:    vmovaps (%rdi), %xmm1 # sched: [6:0.50]
   5132 ; SKX-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00]
   5133 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5134 ; SKX-NEXT:    retq # sched: [7:1.00]
   5135 ;
   5136 ; BTVER2-SSE-LABEL: test_sqrtss:
   5137 ; BTVER2-SSE:       # %bb.0:
   5138 ; BTVER2-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [5:1.00]
   5139 ; BTVER2-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [21:21.00]
   5140 ; BTVER2-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [21:21.00]
   5141 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5142 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5143 ;
   5144 ; BTVER2-LABEL: test_sqrtss:
   5145 ; BTVER2:       # %bb.0:
   5146 ; BTVER2-NEXT:    vmovaps (%rdi), %xmm1 # sched: [5:1.00]
   5147 ; BTVER2-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
   5148 ; BTVER2-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [21:21.00]
   5149 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5150 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5151 ;
   5152 ; ZNVER1-SSE-LABEL: test_sqrtss:
   5153 ; ZNVER1-SSE:       # %bb.0:
   5154 ; ZNVER1-SSE-NEXT:    movaps (%rdi), %xmm1 # sched: [8:0.50]
   5155 ; ZNVER1-SSE-NEXT:    sqrtss %xmm0, %xmm0 # sched: [20:20.00]
   5156 ; ZNVER1-SSE-NEXT:    sqrtss %xmm1, %xmm1 # sched: [20:20.00]
   5157 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5158 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5159 ;
   5160 ; ZNVER1-LABEL: test_sqrtss:
   5161 ; ZNVER1:       # %bb.0:
   5162 ; ZNVER1-NEXT:    vmovaps (%rdi), %xmm1 # sched: [8:0.50]
   5163 ; ZNVER1-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:20.00]
   5164 ; ZNVER1-NEXT:    vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:20.00]
   5165 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5166 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5167   %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
   5168   %2 = load <4 x float>, <4 x float> *%a1, align 16
   5169   %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2)
   5170   %4 = fadd <4 x float> %1, %3
   5171   ret <4 x float> %4
   5172 }
   5173 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
   5174 
   5175 define i32 @test_stmxcsr() {
   5176 ; GENERIC-LABEL: test_stmxcsr:
   5177 ; GENERIC:       # %bb.0:
   5178 ; GENERIC-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
   5179 ; GENERIC-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5180 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5181 ;
   5182 ; ATOM-LABEL: test_stmxcsr:
   5183 ; ATOM:       # %bb.0:
   5184 ; ATOM-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [15:7.50]
   5185 ; ATOM-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [1:1.00]
   5186 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5187 ;
   5188 ; SLM-LABEL: test_stmxcsr:
   5189 ; SLM:       # %bb.0:
   5190 ; SLM-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   5191 ; SLM-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
   5192 ; SLM-NEXT:    retq # sched: [4:1.00]
   5193 ;
   5194 ; SANDY-SSE-LABEL: test_stmxcsr:
   5195 ; SANDY-SSE:       # %bb.0:
   5196 ; SANDY-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
   5197 ; SANDY-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5198 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5199 ;
   5200 ; SANDY-LABEL: test_stmxcsr:
   5201 ; SANDY:       # %bb.0:
   5202 ; SANDY-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
   5203 ; SANDY-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5204 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5205 ;
   5206 ; HASWELL-SSE-LABEL: test_stmxcsr:
   5207 ; HASWELL-SSE:       # %bb.0:
   5208 ; HASWELL-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
   5209 ; HASWELL-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5210 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5211 ;
   5212 ; HASWELL-LABEL: test_stmxcsr:
   5213 ; HASWELL:       # %bb.0:
   5214 ; HASWELL-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
   5215 ; HASWELL-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5216 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5217 ;
   5218 ; BROADWELL-SSE-LABEL: test_stmxcsr:
   5219 ; BROADWELL-SSE:       # %bb.0:
   5220 ; BROADWELL-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
   5221 ; BROADWELL-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5222 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5223 ;
   5224 ; BROADWELL-LABEL: test_stmxcsr:
   5225 ; BROADWELL:       # %bb.0:
   5226 ; BROADWELL-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
   5227 ; BROADWELL-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5228 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5229 ;
   5230 ; SKYLAKE-SSE-LABEL: test_stmxcsr:
   5231 ; SKYLAKE-SSE:       # %bb.0:
   5232 ; SKYLAKE-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
   5233 ; SKYLAKE-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5234 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5235 ;
   5236 ; SKYLAKE-LABEL: test_stmxcsr:
   5237 ; SKYLAKE:       # %bb.0:
   5238 ; SKYLAKE-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
   5239 ; SKYLAKE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5240 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5241 ;
   5242 ; SKX-SSE-LABEL: test_stmxcsr:
   5243 ; SKX-SSE:       # %bb.0:
   5244 ; SKX-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
   5245 ; SKX-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5246 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5247 ;
   5248 ; SKX-LABEL: test_stmxcsr:
   5249 ; SKX:       # %bb.0:
   5250 ; SKX-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
   5251 ; SKX-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
   5252 ; SKX-NEXT:    retq # sched: [7:1.00]
   5253 ;
   5254 ; BTVER2-SSE-LABEL: test_stmxcsr:
   5255 ; BTVER2-SSE:       # %bb.0:
   5256 ; BTVER2-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   5257 ; BTVER2-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
   5258 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5259 ;
   5260 ; BTVER2-LABEL: test_stmxcsr:
   5261 ; BTVER2:       # %bb.0:
   5262 ; BTVER2-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
   5263 ; BTVER2-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00]
   5264 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5265 ;
   5266 ; ZNVER1-SSE-LABEL: test_stmxcsr:
   5267 ; ZNVER1-SSE:       # %bb.0:
   5268 ; ZNVER1-SSE-NEXT:    stmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25]
   5269 ; ZNVER1-SSE-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
   5270 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5271 ;
   5272 ; ZNVER1-LABEL: test_stmxcsr:
   5273 ; ZNVER1:       # %bb.0:
   5274 ; ZNVER1-NEXT:    vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25]
   5275 ; ZNVER1-NEXT:    movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
   5276 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5277   %1 = alloca i32, align 4
   5278   %2 = bitcast i32* %1 to i8*
   5279   call void @llvm.x86.sse.stmxcsr(i8* %2)
   5280   %3 = load i32, i32* %1, align 4
   5281   ret i32 %3
   5282 }
   5283 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
   5284 
   5285 define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   5286 ; GENERIC-LABEL: test_subps:
   5287 ; GENERIC:       # %bb.0:
   5288 ; GENERIC-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
   5289 ; GENERIC-NEXT:    subps (%rdi), %xmm0 # sched: [9:1.00]
   5290 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5291 ;
   5292 ; ATOM-LABEL: test_subps:
   5293 ; ATOM:       # %bb.0:
   5294 ; ATOM-NEXT:    subps %xmm1, %xmm0 # sched: [5:5.00]
   5295 ; ATOM-NEXT:    subps (%rdi), %xmm0 # sched: [5:5.00]
   5296 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5297 ;
   5298 ; SLM-LABEL: test_subps:
   5299 ; SLM:       # %bb.0:
   5300 ; SLM-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
   5301 ; SLM-NEXT:    subps (%rdi), %xmm0 # sched: [6:1.00]
   5302 ; SLM-NEXT:    retq # sched: [4:1.00]
   5303 ;
   5304 ; SANDY-SSE-LABEL: test_subps:
   5305 ; SANDY-SSE:       # %bb.0:
   5306 ; SANDY-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
   5307 ; SANDY-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [9:1.00]
   5308 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5309 ;
   5310 ; SANDY-LABEL: test_subps:
   5311 ; SANDY:       # %bb.0:
   5312 ; SANDY-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5313 ; SANDY-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   5314 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5315 ;
   5316 ; HASWELL-SSE-LABEL: test_subps:
   5317 ; HASWELL-SSE:       # %bb.0:
   5318 ; HASWELL-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
   5319 ; HASWELL-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [9:1.00]
   5320 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5321 ;
   5322 ; HASWELL-LABEL: test_subps:
   5323 ; HASWELL:       # %bb.0:
   5324 ; HASWELL-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5325 ; HASWELL-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   5326 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5327 ;
   5328 ; BROADWELL-SSE-LABEL: test_subps:
   5329 ; BROADWELL-SSE:       # %bb.0:
   5330 ; BROADWELL-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
   5331 ; BROADWELL-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [8:1.00]
   5332 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5333 ;
   5334 ; BROADWELL-LABEL: test_subps:
   5335 ; BROADWELL:       # %bb.0:
   5336 ; BROADWELL-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5337 ; BROADWELL-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   5338 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5339 ;
   5340 ; SKYLAKE-SSE-LABEL: test_subps:
   5341 ; SKYLAKE-SSE:       # %bb.0:
   5342 ; SKYLAKE-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [4:0.50]
   5343 ; SKYLAKE-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [10:0.50]
   5344 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5345 ;
   5346 ; SKYLAKE-LABEL: test_subps:
   5347 ; SKYLAKE:       # %bb.0:
   5348 ; SKYLAKE-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5349 ; SKYLAKE-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   5350 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5351 ;
   5352 ; SKX-SSE-LABEL: test_subps:
   5353 ; SKX-SSE:       # %bb.0:
   5354 ; SKX-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [4:0.50]
   5355 ; SKX-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [10:0.50]
   5356 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5357 ;
   5358 ; SKX-LABEL: test_subps:
   5359 ; SKX:       # %bb.0:
   5360 ; SKX-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5361 ; SKX-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
   5362 ; SKX-NEXT:    retq # sched: [7:1.00]
   5363 ;
   5364 ; BTVER2-SSE-LABEL: test_subps:
   5365 ; BTVER2-SSE:       # %bb.0:
   5366 ; BTVER2-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
   5367 ; BTVER2-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [8:1.00]
   5368 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5369 ;
   5370 ; BTVER2-LABEL: test_subps:
   5371 ; BTVER2:       # %bb.0:
   5372 ; BTVER2-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5373 ; BTVER2-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   5374 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5375 ;
   5376 ; ZNVER1-SSE-LABEL: test_subps:
   5377 ; ZNVER1-SSE:       # %bb.0:
   5378 ; ZNVER1-SSE-NEXT:    subps %xmm1, %xmm0 # sched: [3:1.00]
   5379 ; ZNVER1-SSE-NEXT:    subps (%rdi), %xmm0 # sched: [10:1.00]
   5380 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5381 ;
   5382 ; ZNVER1-LABEL: test_subps:
   5383 ; ZNVER1:       # %bb.0:
   5384 ; ZNVER1-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5385 ; ZNVER1-NEXT:    vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   5386 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5387   %1 = fsub <4 x float> %a0, %a1
   5388   %2 = load <4 x float>, <4 x float> *%a2, align 16
   5389   %3 = fsub <4 x float> %1, %2
   5390   ret <4 x float> %3
   5391 }
   5392 
   5393 define float @test_subss(float %a0, float %a1, float *%a2) {
   5394 ; GENERIC-LABEL: test_subss:
   5395 ; GENERIC:       # %bb.0:
   5396 ; GENERIC-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
   5397 ; GENERIC-NEXT:    subss (%rdi), %xmm0 # sched: [9:1.00]
   5398 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5399 ;
   5400 ; ATOM-LABEL: test_subss:
   5401 ; ATOM:       # %bb.0:
   5402 ; ATOM-NEXT:    subss %xmm1, %xmm0 # sched: [5:5.00]
   5403 ; ATOM-NEXT:    subss (%rdi), %xmm0 # sched: [5:5.00]
   5404 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5405 ;
   5406 ; SLM-LABEL: test_subss:
   5407 ; SLM:       # %bb.0:
   5408 ; SLM-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
   5409 ; SLM-NEXT:    subss (%rdi), %xmm0 # sched: [6:1.00]
   5410 ; SLM-NEXT:    retq # sched: [4:1.00]
   5411 ;
   5412 ; SANDY-SSE-LABEL: test_subss:
   5413 ; SANDY-SSE:       # %bb.0:
   5414 ; SANDY-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
   5415 ; SANDY-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [9:1.00]
   5416 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5417 ;
   5418 ; SANDY-LABEL: test_subss:
   5419 ; SANDY:       # %bb.0:
   5420 ; SANDY-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5421 ; SANDY-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
   5422 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5423 ;
   5424 ; HASWELL-SSE-LABEL: test_subss:
   5425 ; HASWELL-SSE:       # %bb.0:
   5426 ; HASWELL-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
   5427 ; HASWELL-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [8:1.00]
   5428 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5429 ;
   5430 ; HASWELL-LABEL: test_subss:
   5431 ; HASWELL:       # %bb.0:
   5432 ; HASWELL-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5433 ; HASWELL-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   5434 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5435 ;
   5436 ; BROADWELL-SSE-LABEL: test_subss:
   5437 ; BROADWELL-SSE:       # %bb.0:
   5438 ; BROADWELL-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
   5439 ; BROADWELL-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [8:1.00]
   5440 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5441 ;
   5442 ; BROADWELL-LABEL: test_subss:
   5443 ; BROADWELL:       # %bb.0:
   5444 ; BROADWELL-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5445 ; BROADWELL-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   5446 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5447 ;
   5448 ; SKYLAKE-SSE-LABEL: test_subss:
   5449 ; SKYLAKE-SSE:       # %bb.0:
   5450 ; SKYLAKE-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [4:0.50]
   5451 ; SKYLAKE-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [9:0.50]
   5452 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5453 ;
   5454 ; SKYLAKE-LABEL: test_subss:
   5455 ; SKYLAKE:       # %bb.0:
   5456 ; SKYLAKE-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5457 ; SKYLAKE-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   5458 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5459 ;
   5460 ; SKX-SSE-LABEL: test_subss:
   5461 ; SKX-SSE:       # %bb.0:
   5462 ; SKX-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [4:0.50]
   5463 ; SKX-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [9:0.50]
   5464 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5465 ;
   5466 ; SKX-LABEL: test_subss:
   5467 ; SKX:       # %bb.0:
   5468 ; SKX-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5469 ; SKX-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
   5470 ; SKX-NEXT:    retq # sched: [7:1.00]
   5471 ;
   5472 ; BTVER2-SSE-LABEL: test_subss:
   5473 ; BTVER2-SSE:       # %bb.0:
   5474 ; BTVER2-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
   5475 ; BTVER2-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [8:1.00]
   5476 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5477 ;
   5478 ; BTVER2-LABEL: test_subss:
   5479 ; BTVER2:       # %bb.0:
   5480 ; BTVER2-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5481 ; BTVER2-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
   5482 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5483 ;
   5484 ; ZNVER1-SSE-LABEL: test_subss:
   5485 ; ZNVER1-SSE:       # %bb.0:
   5486 ; ZNVER1-SSE-NEXT:    subss %xmm1, %xmm0 # sched: [3:1.00]
   5487 ; ZNVER1-SSE-NEXT:    subss (%rdi), %xmm0 # sched: [10:1.00]
   5488 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5489 ;
   5490 ; ZNVER1-LABEL: test_subss:
   5491 ; ZNVER1:       # %bb.0:
   5492 ; ZNVER1-NEXT:    vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5493 ; ZNVER1-NEXT:    vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
   5494 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5495   %1 = fsub float %a0, %a1
   5496   %2 = load float, float *%a2, align 4
   5497   %3 = fsub float %1, %2
   5498   ret float %3
   5499 }
   5500 
   5501 define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   5502 ; GENERIC-LABEL: test_ucomiss:
   5503 ; GENERIC:       # %bb.0:
   5504 ; GENERIC-NEXT:    ucomiss %xmm1, %xmm0 # sched: [2:1.00]
   5505 ; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
   5506 ; GENERIC-NEXT:    sete %cl # sched: [1:0.50]
   5507 ; GENERIC-NEXT:    andb %al, %cl # sched: [1:0.33]
   5508 ; GENERIC-NEXT:    ucomiss (%rdi), %xmm0 # sched: [8:1.00]
   5509 ; GENERIC-NEXT:    setnp %al # sched: [1:0.50]
   5510 ; GENERIC-NEXT:    sete %dl # sched: [1:0.50]
   5511 ; GENERIC-NEXT:    andb %al, %dl # sched: [1:0.33]
   5512 ; GENERIC-NEXT:    orb %cl, %dl # sched: [1:0.33]
   5513 ; GENERIC-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
   5514 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5515 ;
   5516 ; ATOM-LABEL: test_ucomiss:
   5517 ; ATOM:       # %bb.0:
   5518 ; ATOM-NEXT:    ucomiss %xmm1, %xmm0 # sched: [9:4.50]
   5519 ; ATOM-NEXT:    setnp %al # sched: [1:0.50]
   5520 ; ATOM-NEXT:    sete %cl # sched: [1:0.50]
   5521 ; ATOM-NEXT:    andb %al, %cl # sched: [1:0.50]
   5522 ; ATOM-NEXT:    ucomiss (%rdi), %xmm0 # sched: [10:5.00]
   5523 ; ATOM-NEXT:    setnp %al # sched: [1:0.50]
   5524 ; ATOM-NEXT:    sete %dl # sched: [1:0.50]
   5525 ; ATOM-NEXT:    andb %al, %dl # sched: [1:0.50]
   5526 ; ATOM-NEXT:    orb %cl, %dl # sched: [1:0.50]
   5527 ; ATOM-NEXT:    movzbl %dl, %eax # sched: [1:1.00]
   5528 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5529 ;
   5530 ; SLM-LABEL: test_ucomiss:
   5531 ; SLM:       # %bb.0:
   5532 ; SLM-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
   5533 ; SLM-NEXT:    setnp %al # sched: [1:0.50]
   5534 ; SLM-NEXT:    sete %cl # sched: [1:0.50]
   5535 ; SLM-NEXT:    andb %al, %cl # sched: [1:0.50]
   5536 ; SLM-NEXT:    ucomiss (%rdi), %xmm0 # sched: [6:1.00]
   5537 ; SLM-NEXT:    setnp %al # sched: [1:0.50]
   5538 ; SLM-NEXT:    sete %dl # sched: [1:0.50]
   5539 ; SLM-NEXT:    andb %al, %dl # sched: [1:0.50]
   5540 ; SLM-NEXT:    orb %cl, %dl # sched: [1:0.50]
   5541 ; SLM-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
   5542 ; SLM-NEXT:    retq # sched: [4:1.00]
   5543 ;
   5544 ; SANDY-SSE-LABEL: test_ucomiss:
   5545 ; SANDY-SSE:       # %bb.0:
   5546 ; SANDY-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [2:1.00]
   5547 ; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5548 ; SANDY-SSE-NEXT:    sete %cl # sched: [1:0.50]
   5549 ; SANDY-SSE-NEXT:    andb %al, %cl # sched: [1:0.33]
   5550 ; SANDY-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [8:1.00]
   5551 ; SANDY-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5552 ; SANDY-SSE-NEXT:    sete %dl # sched: [1:0.50]
   5553 ; SANDY-SSE-NEXT:    andb %al, %dl # sched: [1:0.33]
   5554 ; SANDY-SSE-NEXT:    orb %cl, %dl # sched: [1:0.33]
   5555 ; SANDY-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
   5556 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5557 ;
   5558 ; SANDY-LABEL: test_ucomiss:
   5559 ; SANDY:       # %bb.0:
   5560 ; SANDY-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
   5561 ; SANDY-NEXT:    setnp %al # sched: [1:0.50]
   5562 ; SANDY-NEXT:    sete %cl # sched: [1:0.50]
   5563 ; SANDY-NEXT:    andb %al, %cl # sched: [1:0.33]
   5564 ; SANDY-NEXT:    vucomiss (%rdi), %xmm0 # sched: [8:1.00]
   5565 ; SANDY-NEXT:    setnp %al # sched: [1:0.50]
   5566 ; SANDY-NEXT:    sete %dl # sched: [1:0.50]
   5567 ; SANDY-NEXT:    andb %al, %dl # sched: [1:0.33]
   5568 ; SANDY-NEXT:    orb %cl, %dl # sched: [1:0.33]
   5569 ; SANDY-NEXT:    movzbl %dl, %eax # sched: [1:0.33]
   5570 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5571 ;
   5572 ; HASWELL-SSE-LABEL: test_ucomiss:
   5573 ; HASWELL-SSE:       # %bb.0:
   5574 ; HASWELL-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
   5575 ; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5576 ; HASWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
   5577 ; HASWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5578 ; HASWELL-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [8:1.00]
   5579 ; HASWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5580 ; HASWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
   5581 ; HASWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   5582 ; HASWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5583 ; HASWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5584 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5585 ;
   5586 ; HASWELL-LABEL: test_ucomiss:
   5587 ; HASWELL:       # %bb.0:
   5588 ; HASWELL-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
   5589 ; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
   5590 ; HASWELL-NEXT:    sete %cl # sched: [1:0.50]
   5591 ; HASWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
   5592 ; HASWELL-NEXT:    vucomiss (%rdi), %xmm0 # sched: [8:1.00]
   5593 ; HASWELL-NEXT:    setnp %al # sched: [1:0.50]
   5594 ; HASWELL-NEXT:    sete %dl # sched: [1:0.50]
   5595 ; HASWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
   5596 ; HASWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5597 ; HASWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5598 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5599 ;
   5600 ; BROADWELL-SSE-LABEL: test_ucomiss:
   5601 ; BROADWELL-SSE:       # %bb.0:
   5602 ; BROADWELL-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
   5603 ; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5604 ; BROADWELL-SSE-NEXT:    sete %cl # sched: [1:0.50]
   5605 ; BROADWELL-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5606 ; BROADWELL-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [8:1.00]
   5607 ; BROADWELL-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5608 ; BROADWELL-SSE-NEXT:    sete %dl # sched: [1:0.50]
   5609 ; BROADWELL-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   5610 ; BROADWELL-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5611 ; BROADWELL-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5612 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5613 ;
   5614 ; BROADWELL-LABEL: test_ucomiss:
   5615 ; BROADWELL:       # %bb.0:
   5616 ; BROADWELL-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
   5617 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
   5618 ; BROADWELL-NEXT:    sete %cl # sched: [1:0.50]
   5619 ; BROADWELL-NEXT:    andb %al, %cl # sched: [1:0.25]
   5620 ; BROADWELL-NEXT:    vucomiss (%rdi), %xmm0 # sched: [8:1.00]
   5621 ; BROADWELL-NEXT:    setnp %al # sched: [1:0.50]
   5622 ; BROADWELL-NEXT:    sete %dl # sched: [1:0.50]
   5623 ; BROADWELL-NEXT:    andb %al, %dl # sched: [1:0.25]
   5624 ; BROADWELL-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5625 ; BROADWELL-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5626 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5627 ;
   5628 ; SKYLAKE-SSE-LABEL: test_ucomiss:
   5629 ; SKYLAKE-SSE:       # %bb.0:
   5630 ; SKYLAKE-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [2:1.00]
   5631 ; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5632 ; SKYLAKE-SSE-NEXT:    sete %cl # sched: [1:0.50]
   5633 ; SKYLAKE-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5634 ; SKYLAKE-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [7:1.00]
   5635 ; SKYLAKE-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5636 ; SKYLAKE-SSE-NEXT:    sete %dl # sched: [1:0.50]
   5637 ; SKYLAKE-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   5638 ; SKYLAKE-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5639 ; SKYLAKE-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5640 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5641 ;
   5642 ; SKYLAKE-LABEL: test_ucomiss:
   5643 ; SKYLAKE:       # %bb.0:
   5644 ; SKYLAKE-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
   5645 ; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
   5646 ; SKYLAKE-NEXT:    sete %cl # sched: [1:0.50]
   5647 ; SKYLAKE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5648 ; SKYLAKE-NEXT:    vucomiss (%rdi), %xmm0 # sched: [7:1.00]
   5649 ; SKYLAKE-NEXT:    setnp %al # sched: [1:0.50]
   5650 ; SKYLAKE-NEXT:    sete %dl # sched: [1:0.50]
   5651 ; SKYLAKE-NEXT:    andb %al, %dl # sched: [1:0.25]
   5652 ; SKYLAKE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5653 ; SKYLAKE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5654 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5655 ;
   5656 ; SKX-SSE-LABEL: test_ucomiss:
   5657 ; SKX-SSE:       # %bb.0:
   5658 ; SKX-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [2:1.00]
   5659 ; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5660 ; SKX-SSE-NEXT:    sete %cl # sched: [1:0.50]
   5661 ; SKX-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5662 ; SKX-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [7:1.00]
   5663 ; SKX-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5664 ; SKX-SSE-NEXT:    sete %dl # sched: [1:0.50]
   5665 ; SKX-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   5666 ; SKX-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5667 ; SKX-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5668 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5669 ;
   5670 ; SKX-LABEL: test_ucomiss:
   5671 ; SKX:       # %bb.0:
   5672 ; SKX-NEXT:    vucomiss %xmm1, %xmm0 # sched: [2:1.00]
   5673 ; SKX-NEXT:    setnp %al # sched: [1:0.50]
   5674 ; SKX-NEXT:    sete %cl # sched: [1:0.50]
   5675 ; SKX-NEXT:    andb %al, %cl # sched: [1:0.25]
   5676 ; SKX-NEXT:    vucomiss (%rdi), %xmm0 # sched: [7:1.00]
   5677 ; SKX-NEXT:    setnp %al # sched: [1:0.50]
   5678 ; SKX-NEXT:    sete %dl # sched: [1:0.50]
   5679 ; SKX-NEXT:    andb %al, %dl # sched: [1:0.25]
   5680 ; SKX-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5681 ; SKX-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5682 ; SKX-NEXT:    retq # sched: [7:1.00]
   5683 ;
   5684 ; BTVER2-SSE-LABEL: test_ucomiss:
   5685 ; BTVER2-SSE:       # %bb.0:
   5686 ; BTVER2-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
   5687 ; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5688 ; BTVER2-SSE-NEXT:    sete %cl # sched: [1:0.50]
   5689 ; BTVER2-SSE-NEXT:    andb %al, %cl # sched: [1:0.50]
   5690 ; BTVER2-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [8:1.00]
   5691 ; BTVER2-SSE-NEXT:    setnp %al # sched: [1:0.50]
   5692 ; BTVER2-SSE-NEXT:    sete %dl # sched: [1:0.50]
   5693 ; BTVER2-SSE-NEXT:    andb %al, %dl # sched: [1:0.50]
   5694 ; BTVER2-SSE-NEXT:    orb %cl, %dl # sched: [1:0.50]
   5695 ; BTVER2-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
   5696 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5697 ;
   5698 ; BTVER2-LABEL: test_ucomiss:
   5699 ; BTVER2:       # %bb.0:
   5700 ; BTVER2-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
   5701 ; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
   5702 ; BTVER2-NEXT:    sete %cl # sched: [1:0.50]
   5703 ; BTVER2-NEXT:    andb %al, %cl # sched: [1:0.50]
   5704 ; BTVER2-NEXT:    vucomiss (%rdi), %xmm0 # sched: [8:1.00]
   5705 ; BTVER2-NEXT:    setnp %al # sched: [1:0.50]
   5706 ; BTVER2-NEXT:    sete %dl # sched: [1:0.50]
   5707 ; BTVER2-NEXT:    andb %al, %dl # sched: [1:0.50]
   5708 ; BTVER2-NEXT:    orb %cl, %dl # sched: [1:0.50]
   5709 ; BTVER2-NEXT:    movzbl %dl, %eax # sched: [1:0.50]
   5710 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5711 ;
   5712 ; ZNVER1-SSE-LABEL: test_ucomiss:
   5713 ; ZNVER1-SSE:       # %bb.0:
   5714 ; ZNVER1-SSE-NEXT:    ucomiss %xmm1, %xmm0 # sched: [3:1.00]
   5715 ; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
   5716 ; ZNVER1-SSE-NEXT:    sete %cl # sched: [1:0.25]
   5717 ; ZNVER1-SSE-NEXT:    andb %al, %cl # sched: [1:0.25]
   5718 ; ZNVER1-SSE-NEXT:    ucomiss (%rdi), %xmm0 # sched: [10:1.00]
   5719 ; ZNVER1-SSE-NEXT:    setnp %al # sched: [1:0.25]
   5720 ; ZNVER1-SSE-NEXT:    sete %dl # sched: [1:0.25]
   5721 ; ZNVER1-SSE-NEXT:    andb %al, %dl # sched: [1:0.25]
   5722 ; ZNVER1-SSE-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5723 ; ZNVER1-SSE-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5724 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5725 ;
   5726 ; ZNVER1-LABEL: test_ucomiss:
   5727 ; ZNVER1:       # %bb.0:
   5728 ; ZNVER1-NEXT:    vucomiss %xmm1, %xmm0 # sched: [3:1.00]
   5729 ; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
   5730 ; ZNVER1-NEXT:    sete %cl # sched: [1:0.25]
   5731 ; ZNVER1-NEXT:    andb %al, %cl # sched: [1:0.25]
   5732 ; ZNVER1-NEXT:    vucomiss (%rdi), %xmm0 # sched: [10:1.00]
   5733 ; ZNVER1-NEXT:    setnp %al # sched: [1:0.25]
   5734 ; ZNVER1-NEXT:    sete %dl # sched: [1:0.25]
   5735 ; ZNVER1-NEXT:    andb %al, %dl # sched: [1:0.25]
   5736 ; ZNVER1-NEXT:    orb %cl, %dl # sched: [1:0.25]
   5737 ; ZNVER1-NEXT:    movzbl %dl, %eax # sched: [1:0.25]
   5738 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5739   %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
   5740   %2 = load <4 x float>, <4 x float> *%a2, align 4
   5741   %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2)
   5742   %4 = or i32 %1, %3
   5743   ret i32 %4
   5744 }
   5745 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
   5746 
   5747 define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   5748 ; GENERIC-LABEL: test_unpckhps:
   5749 ; GENERIC:       # %bb.0:
   5750 ; GENERIC-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5751 ; GENERIC-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   5752 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5753 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5754 ;
   5755 ; ATOM-LABEL: test_unpckhps:
   5756 ; ATOM:       # %bb.0:
   5757 ; ATOM-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5758 ; ATOM-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00]
   5759 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   5760 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5761 ;
   5762 ; SLM-LABEL: test_unpckhps:
   5763 ; SLM:       # %bb.0:
   5764 ; SLM-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5765 ; SLM-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00]
   5766 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5767 ; SLM-NEXT:    retq # sched: [4:1.00]
   5768 ;
   5769 ; SANDY-SSE-LABEL: test_unpckhps:
   5770 ; SANDY-SSE:       # %bb.0:
   5771 ; SANDY-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5772 ; SANDY-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   5773 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5774 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5775 ;
   5776 ; SANDY-LABEL: test_unpckhps:
   5777 ; SANDY:       # %bb.0:
   5778 ; SANDY-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5779 ; SANDY-NEXT:    vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   5780 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5781 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5782 ;
   5783 ; HASWELL-SSE-LABEL: test_unpckhps:
   5784 ; HASWELL-SSE:       # %bb.0:
   5785 ; HASWELL-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5786 ; HASWELL-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   5787 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5788 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5789 ;
   5790 ; HASWELL-LABEL: test_unpckhps:
   5791 ; HASWELL:       # %bb.0:
   5792 ; HASWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5793 ; HASWELL-NEXT:    vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   5794 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5795 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5796 ;
   5797 ; BROADWELL-SSE-LABEL: test_unpckhps:
   5798 ; BROADWELL-SSE:       # %bb.0:
   5799 ; BROADWELL-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5800 ; BROADWELL-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
   5801 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5802 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5803 ;
   5804 ; BROADWELL-LABEL: test_unpckhps:
   5805 ; BROADWELL:       # %bb.0:
   5806 ; BROADWELL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5807 ; BROADWELL-NEXT:    vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
   5808 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5809 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5810 ;
   5811 ; SKYLAKE-SSE-LABEL: test_unpckhps:
   5812 ; SKYLAKE-SSE:       # %bb.0:
   5813 ; SKYLAKE-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5814 ; SKYLAKE-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   5815 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   5816 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5817 ;
   5818 ; SKYLAKE-LABEL: test_unpckhps:
   5819 ; SKYLAKE:       # %bb.0:
   5820 ; SKYLAKE-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5821 ; SKYLAKE-NEXT:    vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   5822 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5823 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5824 ;
   5825 ; SKX-SSE-LABEL: test_unpckhps:
   5826 ; SKX-SSE:       # %bb.0:
   5827 ; SKX-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5828 ; SKX-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   5829 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   5830 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5831 ;
   5832 ; SKX-LABEL: test_unpckhps:
   5833 ; SKX:       # %bb.0:
   5834 ; SKX-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
   5835 ; SKX-NEXT:    vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
   5836 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5837 ; SKX-NEXT:    retq # sched: [7:1.00]
   5838 ;
   5839 ; BTVER2-SSE-LABEL: test_unpckhps:
   5840 ; BTVER2-SSE:       # %bb.0:
   5841 ; BTVER2-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   5842 ; BTVER2-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
   5843 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5844 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5845 ;
   5846 ; BTVER2-LABEL: test_unpckhps:
   5847 ; BTVER2:       # %bb.0:
   5848 ; BTVER2-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   5849 ; BTVER2-NEXT:    vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
   5850 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5851 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5852 ;
   5853 ; ZNVER1-SSE-LABEL: test_unpckhps:
   5854 ; ZNVER1-SSE:       # %bb.0:
   5855 ; ZNVER1-SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   5856 ; ZNVER1-SSE-NEXT:    unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
   5857 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5858 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5859 ;
   5860 ; ZNVER1-LABEL: test_unpckhps:
   5861 ; ZNVER1:       # %bb.0:
   5862 ; ZNVER1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
   5863 ; ZNVER1-NEXT:    vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
   5864 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5865 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5866   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   5867   %2 = load <4 x float>, <4 x float> *%a2, align 16
   5868   %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   5869   %4 = fadd <4 x float> %1, %3
   5870   ret <4 x float> %4
   5871 }
   5872 
   5873 define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   5874 ; GENERIC-LABEL: test_unpcklps:
   5875 ; GENERIC:       # %bb.0:
   5876 ; GENERIC-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5877 ; GENERIC-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   5878 ; GENERIC-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5879 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5880 ;
   5881 ; ATOM-LABEL: test_unpcklps:
   5882 ; ATOM:       # %bb.0:
   5883 ; ATOM-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5884 ; ATOM-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00]
   5885 ; ATOM-NEXT:    addps %xmm1, %xmm0 # sched: [5:5.00]
   5886 ; ATOM-NEXT:    retq # sched: [79:39.50]
   5887 ;
   5888 ; SLM-LABEL: test_unpcklps:
   5889 ; SLM:       # %bb.0:
   5890 ; SLM-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5891 ; SLM-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00]
   5892 ; SLM-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5893 ; SLM-NEXT:    retq # sched: [4:1.00]
   5894 ;
   5895 ; SANDY-SSE-LABEL: test_unpcklps:
   5896 ; SANDY-SSE:       # %bb.0:
   5897 ; SANDY-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5898 ; SANDY-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   5899 ; SANDY-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5900 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   5901 ;
   5902 ; SANDY-LABEL: test_unpcklps:
   5903 ; SANDY:       # %bb.0:
   5904 ; SANDY-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5905 ; SANDY-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   5906 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5907 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5908 ;
   5909 ; HASWELL-SSE-LABEL: test_unpcklps:
   5910 ; HASWELL-SSE:       # %bb.0:
   5911 ; HASWELL-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5912 ; HASWELL-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   5913 ; HASWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5914 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5915 ;
   5916 ; HASWELL-LABEL: test_unpcklps:
   5917 ; HASWELL:       # %bb.0:
   5918 ; HASWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5919 ; HASWELL-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   5920 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5921 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5922 ;
   5923 ; BROADWELL-SSE-LABEL: test_unpcklps:
   5924 ; BROADWELL-SSE:       # %bb.0:
   5925 ; BROADWELL-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5926 ; BROADWELL-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
   5927 ; BROADWELL-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5928 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   5929 ;
   5930 ; BROADWELL-LABEL: test_unpcklps:
   5931 ; BROADWELL:       # %bb.0:
   5932 ; BROADWELL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5933 ; BROADWELL-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
   5934 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5935 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5936 ;
   5937 ; SKYLAKE-SSE-LABEL: test_unpcklps:
   5938 ; SKYLAKE-SSE:       # %bb.0:
   5939 ; SKYLAKE-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5940 ; SKYLAKE-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   5941 ; SKYLAKE-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   5942 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   5943 ;
   5944 ; SKYLAKE-LABEL: test_unpcklps:
   5945 ; SKYLAKE:       # %bb.0:
   5946 ; SKYLAKE-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5947 ; SKYLAKE-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   5948 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5949 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5950 ;
   5951 ; SKX-SSE-LABEL: test_unpcklps:
   5952 ; SKX-SSE:       # %bb.0:
   5953 ; SKX-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5954 ; SKX-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   5955 ; SKX-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [4:0.50]
   5956 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   5957 ;
   5958 ; SKX-LABEL: test_unpcklps:
   5959 ; SKX:       # %bb.0:
   5960 ; SKX-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
   5961 ; SKX-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
   5962 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   5963 ; SKX-NEXT:    retq # sched: [7:1.00]
   5964 ;
   5965 ; BTVER2-SSE-LABEL: test_unpcklps:
   5966 ; BTVER2-SSE:       # %bb.0:
   5967 ; BTVER2-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
   5968 ; BTVER2-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
   5969 ; BTVER2-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5970 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   5971 ;
   5972 ; BTVER2-LABEL: test_unpcklps:
   5973 ; BTVER2:       # %bb.0:
   5974 ; BTVER2-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
   5975 ; BTVER2-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
   5976 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5977 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5978 ;
   5979 ; ZNVER1-SSE-LABEL: test_unpcklps:
   5980 ; ZNVER1-SSE:       # %bb.0:
   5981 ; ZNVER1-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
   5982 ; ZNVER1-SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
   5983 ; ZNVER1-SSE-NEXT:    addps %xmm1, %xmm0 # sched: [3:1.00]
   5984 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   5985 ;
   5986 ; ZNVER1-LABEL: test_unpcklps:
   5987 ; ZNVER1:       # %bb.0:
   5988 ; ZNVER1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
   5989 ; ZNVER1-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
   5990 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   5991 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5992   %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   5993   %2 = load <4 x float>, <4 x float> *%a2, align 16
   5994   %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   5995   %4 = fadd <4 x float> %1, %3
   5996   ret <4 x float> %4
   5997 }
   5998 
   5999 define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   6000 ; GENERIC-LABEL: test_xorps:
   6001 ; GENERIC:       # %bb.0:
   6002 ; GENERIC-NEXT:    xorps %xmm1, %xmm0 # sched: [1:1.00]
   6003 ; GENERIC-NEXT:    xorps (%rdi), %xmm0 # sched: [7:1.00]
   6004 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6005 ;
   6006 ; ATOM-LABEL: test_xorps:
   6007 ; ATOM:       # %bb.0:
   6008 ; ATOM-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.50]
   6009 ; ATOM-NEXT:    xorps (%rdi), %xmm0 # sched: [1:1.00]
   6010 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6011 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6012 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6013 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6014 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6015 ;
   6016 ; SLM-LABEL: test_xorps:
   6017 ; SLM:       # %bb.0:
   6018 ; SLM-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.50]
   6019 ; SLM-NEXT:    xorps (%rdi), %xmm0 # sched: [4:1.00]
   6020 ; SLM-NEXT:    retq # sched: [4:1.00]
   6021 ;
   6022 ; SANDY-SSE-LABEL: test_xorps:
   6023 ; SANDY-SSE:       # %bb.0:
   6024 ; SANDY-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:1.00]
   6025 ; SANDY-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [7:1.00]
   6026 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6027 ;
   6028 ; SANDY-LABEL: test_xorps:
   6029 ; SANDY:       # %bb.0:
   6030 ; SANDY-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6031 ; SANDY-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6032 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6033 ;
   6034 ; HASWELL-SSE-LABEL: test_xorps:
   6035 ; HASWELL-SSE:       # %bb.0:
   6036 ; HASWELL-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:1.00]
   6037 ; HASWELL-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [7:1.00]
   6038 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6039 ;
   6040 ; HASWELL-LABEL: test_xorps:
   6041 ; HASWELL:       # %bb.0:
   6042 ; HASWELL-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6043 ; HASWELL-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   6044 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6045 ;
   6046 ; BROADWELL-SSE-LABEL: test_xorps:
   6047 ; BROADWELL-SSE:       # %bb.0:
   6048 ; BROADWELL-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:1.00]
   6049 ; BROADWELL-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [6:1.00]
   6050 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6051 ;
   6052 ; BROADWELL-LABEL: test_xorps:
   6053 ; BROADWELL:       # %bb.0:
   6054 ; BROADWELL-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   6055 ; BROADWELL-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6056 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6057 ;
   6058 ; SKYLAKE-SSE-LABEL: test_xorps:
   6059 ; SKYLAKE-SSE:       # %bb.0:
   6060 ; SKYLAKE-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.33]
   6061 ; SKYLAKE-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [7:0.50]
   6062 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6063 ;
   6064 ; SKYLAKE-LABEL: test_xorps:
   6065 ; SKYLAKE:       # %bb.0:
   6066 ; SKYLAKE-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6067 ; SKYLAKE-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6068 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6069 ;
   6070 ; SKX-SSE-LABEL: test_xorps:
   6071 ; SKX-SSE:       # %bb.0:
   6072 ; SKX-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.33]
   6073 ; SKX-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [7:0.50]
   6074 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6075 ;
   6076 ; SKX-LABEL: test_xorps:
   6077 ; SKX:       # %bb.0:
   6078 ; SKX-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
   6079 ; SKX-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
   6080 ; SKX-NEXT:    retq # sched: [7:1.00]
   6081 ;
   6082 ; BTVER2-SSE-LABEL: test_xorps:
   6083 ; BTVER2-SSE:       # %bb.0:
   6084 ; BTVER2-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.50]
   6085 ; BTVER2-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [6:1.00]
   6086 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6087 ;
   6088 ; BTVER2-LABEL: test_xorps:
   6089 ; BTVER2:       # %bb.0:
   6090 ; BTVER2-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   6091 ; BTVER2-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   6092 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6093 ;
   6094 ; ZNVER1-SSE-LABEL: test_xorps:
   6095 ; ZNVER1-SSE:       # %bb.0:
   6096 ; ZNVER1-SSE-NEXT:    xorps %xmm1, %xmm0 # sched: [1:0.25]
   6097 ; ZNVER1-SSE-NEXT:    xorps (%rdi), %xmm0 # sched: [8:0.50]
   6098 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6099 ;
   6100 ; ZNVER1-LABEL: test_xorps:
   6101 ; ZNVER1:       # %bb.0:
   6102 ; ZNVER1-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
   6103 ; ZNVER1-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   6104 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6105   %1 = bitcast <4 x float> %a0 to <4 x i32>
   6106   %2 = bitcast <4 x float> %a1 to <4 x i32>
   6107   %3 = xor <4 x i32> %1, %2
   6108   %4 = load <4 x float>, <4 x float> *%a2, align 16
   6109   %5 = bitcast <4 x float> %4 to <4 x i32>
   6110   %6 = xor <4 x i32> %3, %5
   6111   %7 = bitcast <4 x i32> %6 to <4 x float>
   6112   ret <4 x float> %7
   6113 }
   6114 
   6115 ; 'WriteZero' and 'WriteNop' class instructions.
   6116 
   6117 define <4 x float> @test_fnop() nounwind {
   6118 ; GENERIC-LABEL: test_fnop:
   6119 ; GENERIC:       # %bb.0:
   6120 ; GENERIC-NEXT:    #APP
   6121 ; GENERIC-NEXT:    nop # sched: [1:0.25]
   6122 ; GENERIC-NEXT:    #NO_APP
   6123 ; GENERIC-NEXT:    xorps %xmm0, %xmm0 # sched: [1:1.00]
   6124 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   6125 ;
   6126 ; ATOM-LABEL: test_fnop:
   6127 ; ATOM:       # %bb.0:
   6128 ; ATOM-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.50]
   6129 ; ATOM-NEXT:    #APP
   6130 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6131 ; ATOM-NEXT:    #NO_APP
   6132 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6133 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6134 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6135 ; ATOM-NEXT:    nop # sched: [1:0.50]
   6136 ; ATOM-NEXT:    retq # sched: [79:39.50]
   6137 ;
   6138 ; SLM-LABEL: test_fnop:
   6139 ; SLM:       # %bb.0:
   6140 ; SLM-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.50]
   6141 ; SLM-NEXT:    #APP
   6142 ; SLM-NEXT:    nop # sched: [1:0.50]
   6143 ; SLM-NEXT:    #NO_APP
   6144 ; SLM-NEXT:    retq # sched: [4:1.00]
   6145 ;
   6146 ; SANDY-SSE-LABEL: test_fnop:
   6147 ; SANDY-SSE:       # %bb.0:
   6148 ; SANDY-SSE-NEXT:    #APP
   6149 ; SANDY-SSE-NEXT:    nop # sched: [1:0.25]
   6150 ; SANDY-SSE-NEXT:    #NO_APP
   6151 ; SANDY-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:1.00]
   6152 ; SANDY-SSE-NEXT:    retq # sched: [1:1.00]
   6153 ;
   6154 ; SANDY-LABEL: test_fnop:
   6155 ; SANDY:       # %bb.0:
   6156 ; SANDY-NEXT:    #APP
   6157 ; SANDY-NEXT:    nop # sched: [1:0.25]
   6158 ; SANDY-NEXT:    #NO_APP
   6159 ; SANDY-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
   6160 ; SANDY-NEXT:    retq # sched: [1:1.00]
   6161 ;
   6162 ; HASWELL-SSE-LABEL: test_fnop:
   6163 ; HASWELL-SSE:       # %bb.0:
   6164 ; HASWELL-SSE-NEXT:    #APP
   6165 ; HASWELL-SSE-NEXT:    nop # sched: [1:0.25]
   6166 ; HASWELL-SSE-NEXT:    #NO_APP
   6167 ; HASWELL-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:1.00]
   6168 ; HASWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6169 ;
   6170 ; HASWELL-LABEL: test_fnop:
   6171 ; HASWELL:       # %bb.0:
   6172 ; HASWELL-NEXT:    #APP
   6173 ; HASWELL-NEXT:    nop # sched: [1:0.25]
   6174 ; HASWELL-NEXT:    #NO_APP
   6175 ; HASWELL-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
   6176 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   6177 ;
   6178 ; BROADWELL-SSE-LABEL: test_fnop:
   6179 ; BROADWELL-SSE:       # %bb.0:
   6180 ; BROADWELL-SSE-NEXT:    #APP
   6181 ; BROADWELL-SSE-NEXT:    nop # sched: [1:0.25]
   6182 ; BROADWELL-SSE-NEXT:    #NO_APP
   6183 ; BROADWELL-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:1.00]
   6184 ; BROADWELL-SSE-NEXT:    retq # sched: [7:1.00]
   6185 ;
   6186 ; BROADWELL-LABEL: test_fnop:
   6187 ; BROADWELL:       # %bb.0:
   6188 ; BROADWELL-NEXT:    #APP
   6189 ; BROADWELL-NEXT:    nop # sched: [1:0.25]
   6190 ; BROADWELL-NEXT:    #NO_APP
   6191 ; BROADWELL-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
   6192 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   6193 ;
   6194 ; SKYLAKE-SSE-LABEL: test_fnop:
   6195 ; SKYLAKE-SSE:       # %bb.0:
   6196 ; SKYLAKE-SSE-NEXT:    #APP
   6197 ; SKYLAKE-SSE-NEXT:    nop # sched: [1:0.17]
   6198 ; SKYLAKE-SSE-NEXT:    #NO_APP
   6199 ; SKYLAKE-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.33]
   6200 ; SKYLAKE-SSE-NEXT:    retq # sched: [7:1.00]
   6201 ;
   6202 ; SKYLAKE-LABEL: test_fnop:
   6203 ; SKYLAKE:       # %bb.0:
   6204 ; SKYLAKE-NEXT:    #APP
   6205 ; SKYLAKE-NEXT:    nop # sched: [1:0.17]
   6206 ; SKYLAKE-NEXT:    #NO_APP
   6207 ; SKYLAKE-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   6208 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   6209 ;
   6210 ; SKX-SSE-LABEL: test_fnop:
   6211 ; SKX-SSE:       # %bb.0:
   6212 ; SKX-SSE-NEXT:    #APP
   6213 ; SKX-SSE-NEXT:    nop # sched: [1:0.17]
   6214 ; SKX-SSE-NEXT:    #NO_APP
   6215 ; SKX-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.33]
   6216 ; SKX-SSE-NEXT:    retq # sched: [7:1.00]
   6217 ;
   6218 ; SKX-LABEL: test_fnop:
   6219 ; SKX:       # %bb.0:
   6220 ; SKX-NEXT:    #APP
   6221 ; SKX-NEXT:    nop # sched: [1:0.17]
   6222 ; SKX-NEXT:    #NO_APP
   6223 ; SKX-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
   6224 ; SKX-NEXT:    retq # sched: [7:1.00]
   6225 ;
   6226 ; BTVER2-SSE-LABEL: test_fnop:
   6227 ; BTVER2-SSE:       # %bb.0:
   6228 ; BTVER2-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [0:0.50]
   6229 ; BTVER2-SSE-NEXT:    #APP
   6230 ; BTVER2-SSE-NEXT:    nop # sched: [1:0.50]
   6231 ; BTVER2-SSE-NEXT:    #NO_APP
   6232 ; BTVER2-SSE-NEXT:    retq # sched: [4:1.00]
   6233 ;
   6234 ; BTVER2-LABEL: test_fnop:
   6235 ; BTVER2:       # %bb.0:
   6236 ; BTVER2-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.50]
   6237 ; BTVER2-NEXT:    #APP
   6238 ; BTVER2-NEXT:    nop # sched: [1:0.50]
   6239 ; BTVER2-NEXT:    #NO_APP
   6240 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   6241 ;
   6242 ; ZNVER1-SSE-LABEL: test_fnop:
   6243 ; ZNVER1-SSE:       # %bb.0:
   6244 ; ZNVER1-SSE-NEXT:    xorps %xmm0, %xmm0 # sched: [1:0.25]
   6245 ; ZNVER1-SSE-NEXT:    #APP
   6246 ; ZNVER1-SSE-NEXT:    nop # sched: [1:0.25]
   6247 ; ZNVER1-SSE-NEXT:    #NO_APP
   6248 ; ZNVER1-SSE-NEXT:    retq # sched: [1:0.50]
   6249 ;
   6250 ; ZNVER1-LABEL: test_fnop:
   6251 ; ZNVER1:       # %bb.0:
   6252 ; ZNVER1-NEXT:    vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
   6253 ; ZNVER1-NEXT:    #APP
   6254 ; ZNVER1-NEXT:    nop # sched: [1:0.25]
   6255 ; ZNVER1-NEXT:    #NO_APP
   6256 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   6257   tail call void asm sideeffect "nop", ""() nounwind
   6258   ret <4 x float> zeroinitializer
   6259 }
   6260 
   6261 !0 = !{i32 1}
   6262