Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
      9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
     10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
     11 
     12 define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
     13 ; GENERIC-LABEL: test_addpd:
     14 ; GENERIC:       # %bb.0:
     15 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
     16 ; GENERIC-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
     17 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     18 ;
     19 ; SANDY-LABEL: test_addpd:
     20 ; SANDY:       # %bb.0:
     21 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
     22 ; SANDY-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
     23 ; SANDY-NEXT:    retq # sched: [1:1.00]
     24 ;
     25 ; HASWELL-LABEL: test_addpd:
     26 ; HASWELL:       # %bb.0:
     27 ; HASWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
     28 ; HASWELL-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
     29 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     30 ;
     31 ; BROADWELL-LABEL: test_addpd:
     32 ; BROADWELL:       # %bb.0:
     33 ; BROADWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
     34 ; BROADWELL-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
     35 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     36 ;
     37 ; SKYLAKE-LABEL: test_addpd:
     38 ; SKYLAKE:       # %bb.0:
     39 ; SKYLAKE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
     40 ; SKYLAKE-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
     41 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     42 ;
     43 ; SKX-LABEL: test_addpd:
     44 ; SKX:       # %bb.0:
     45 ; SKX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
     46 ; SKX-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
     47 ; SKX-NEXT:    retq # sched: [7:1.00]
     48 ;
     49 ; BTVER2-LABEL: test_addpd:
     50 ; BTVER2:       # %bb.0:
     51 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
     52 ; BTVER2-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
     53 ; BTVER2-NEXT:    retq # sched: [4:1.00]
     54 ;
     55 ; ZNVER1-LABEL: test_addpd:
     56 ; ZNVER1:       # %bb.0:
     57 ; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
     58 ; ZNVER1-NEXT:    vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
     59 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
     60   %1 = fadd <4 x double> %a0, %a1
     61   %2 = load <4 x double>, <4 x double> *%a2, align 32
     62   %3 = fadd <4 x double> %1, %2
     63   ret <4 x double> %3
     64 }
     65 
     66 define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
     67 ; GENERIC-LABEL: test_addps:
     68 ; GENERIC:       # %bb.0:
     69 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
     70 ; GENERIC-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
     71 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     72 ;
     73 ; SANDY-LABEL: test_addps:
     74 ; SANDY:       # %bb.0:
     75 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
     76 ; SANDY-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
     77 ; SANDY-NEXT:    retq # sched: [1:1.00]
     78 ;
     79 ; HASWELL-LABEL: test_addps:
     80 ; HASWELL:       # %bb.0:
     81 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
     82 ; HASWELL-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
     83 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     84 ;
     85 ; BROADWELL-LABEL: test_addps:
     86 ; BROADWELL:       # %bb.0:
     87 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
     88 ; BROADWELL-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
     89 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     90 ;
     91 ; SKYLAKE-LABEL: test_addps:
     92 ; SKYLAKE:       # %bb.0:
     93 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
     94 ; SKYLAKE-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
     95 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     96 ;
     97 ; SKX-LABEL: test_addps:
     98 ; SKX:       # %bb.0:
     99 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
    100 ; SKX-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
    101 ; SKX-NEXT:    retq # sched: [7:1.00]
    102 ;
    103 ; BTVER2-LABEL: test_addps:
    104 ; BTVER2:       # %bb.0:
    105 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
    106 ; BTVER2-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
    107 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    108 ;
    109 ; ZNVER1-LABEL: test_addps:
    110 ; ZNVER1:       # %bb.0:
    111 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    112 ; ZNVER1-NEXT:    vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    113 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    114   %1 = fadd <8 x float> %a0, %a1
    115   %2 = load <8 x float>, <8 x float> *%a2, align 32
    116   %3 = fadd <8 x float> %1, %2
    117   ret <8 x float> %3
    118 }
    119 
    120 define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
    121 ; GENERIC-LABEL: test_addsubpd:
    122 ; GENERIC:       # %bb.0:
    123 ; GENERIC-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    124 ; GENERIC-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    125 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    126 ;
    127 ; SANDY-LABEL: test_addsubpd:
    128 ; SANDY:       # %bb.0:
    129 ; SANDY-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    130 ; SANDY-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    131 ; SANDY-NEXT:    retq # sched: [1:1.00]
    132 ;
    133 ; HASWELL-LABEL: test_addsubpd:
    134 ; HASWELL:       # %bb.0:
    135 ; HASWELL-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    136 ; HASWELL-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    137 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    138 ;
    139 ; BROADWELL-LABEL: test_addsubpd:
    140 ; BROADWELL:       # %bb.0:
    141 ; BROADWELL-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    142 ; BROADWELL-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
    143 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    144 ;
    145 ; SKYLAKE-LABEL: test_addsubpd:
    146 ; SKYLAKE:       # %bb.0:
    147 ; SKYLAKE-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
    148 ; SKYLAKE-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
    149 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    150 ;
    151 ; SKX-LABEL: test_addsubpd:
    152 ; SKX:       # %bb.0:
    153 ; SKX-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
    154 ; SKX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
    155 ; SKX-NEXT:    retq # sched: [7:1.00]
    156 ;
    157 ; BTVER2-LABEL: test_addsubpd:
    158 ; BTVER2:       # %bb.0:
    159 ; BTVER2-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
    160 ; BTVER2-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
    161 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    162 ;
    163 ; ZNVER1-LABEL: test_addsubpd:
    164 ; ZNVER1:       # %bb.0:
    165 ; ZNVER1-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    166 ; ZNVER1-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    167 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    168   %1 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1)
    169   %2 = load <4 x double>, <4 x double> *%a2, align 32
    170   %3 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %1, <4 x double> %2)
    171   ret <4 x double> %3
    172 }
    173 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
    174 
    175 define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
    176 ; GENERIC-LABEL: test_addsubps:
    177 ; GENERIC:       # %bb.0:
    178 ; GENERIC-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    179 ; GENERIC-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    180 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    181 ;
    182 ; SANDY-LABEL: test_addsubps:
    183 ; SANDY:       # %bb.0:
    184 ; SANDY-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    185 ; SANDY-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    186 ; SANDY-NEXT:    retq # sched: [1:1.00]
    187 ;
    188 ; HASWELL-LABEL: test_addsubps:
    189 ; HASWELL:       # %bb.0:
    190 ; HASWELL-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    191 ; HASWELL-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    192 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    193 ;
    194 ; BROADWELL-LABEL: test_addsubps:
    195 ; BROADWELL:       # %bb.0:
    196 ; BROADWELL-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    197 ; BROADWELL-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
    198 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    199 ;
    200 ; SKYLAKE-LABEL: test_addsubps:
    201 ; SKYLAKE:       # %bb.0:
    202 ; SKYLAKE-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
    203 ; SKYLAKE-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
    204 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    205 ;
    206 ; SKX-LABEL: test_addsubps:
    207 ; SKX:       # %bb.0:
    208 ; SKX-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
    209 ; SKX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
    210 ; SKX-NEXT:    retq # sched: [7:1.00]
    211 ;
    212 ; BTVER2-LABEL: test_addsubps:
    213 ; BTVER2:       # %bb.0:
    214 ; BTVER2-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
    215 ; BTVER2-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
    216 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    217 ;
    218 ; ZNVER1-LABEL: test_addsubps:
    219 ; ZNVER1:       # %bb.0:
    220 ; ZNVER1-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    221 ; ZNVER1-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    222 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    223   %1 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1)
    224   %2 = load <8 x float>, <8 x float> *%a2, align 32
    225   %3 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %1, <8 x float> %2)
    226   ret <8 x float> %3
    227 }
    228 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
    229 
    230 define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
    231 ; GENERIC-LABEL: test_andnotpd:
    232 ; GENERIC:       # %bb.0:
    233 ; GENERIC-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    234 ; GENERIC-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    235 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    236 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    237 ;
    238 ; SANDY-LABEL: test_andnotpd:
    239 ; SANDY:       # %bb.0:
    240 ; SANDY-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    241 ; SANDY-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    242 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    243 ; SANDY-NEXT:    retq # sched: [1:1.00]
    244 ;
    245 ; HASWELL-LABEL: test_andnotpd:
    246 ; HASWELL:       # %bb.0:
    247 ; HASWELL-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    248 ; HASWELL-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    249 ; HASWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    250 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    251 ;
    252 ; BROADWELL-LABEL: test_andnotpd:
    253 ; BROADWELL:       # %bb.0:
    254 ; BROADWELL-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    255 ; BROADWELL-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
    256 ; BROADWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    257 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    258 ;
    259 ; SKYLAKE-LABEL: test_andnotpd:
    260 ; SKYLAKE:       # %bb.0:
    261 ; SKYLAKE-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
    262 ; SKYLAKE-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    263 ; SKYLAKE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    264 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    265 ;
    266 ; SKX-LABEL: test_andnotpd:
    267 ; SKX:       # %bb.0:
    268 ; SKX-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
    269 ; SKX-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    270 ; SKX-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    271 ; SKX-NEXT:    retq # sched: [7:1.00]
    272 ;
    273 ; BTVER2-LABEL: test_andnotpd:
    274 ; BTVER2:       # %bb.0:
    275 ; BTVER2-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    276 ; BTVER2-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
    277 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
    278 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    279 ;
    280 ; ZNVER1-LABEL: test_andnotpd:
    281 ; ZNVER1:       # %bb.0:
    282 ; ZNVER1-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
    283 ; ZNVER1-NEXT:    vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    284 ; ZNVER1-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    285 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    286   %1 = bitcast <4 x double> %a0 to <4 x i64>
    287   %2 = bitcast <4 x double> %a1 to <4 x i64>
    288   %3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
    289   %4 = and <4 x i64> %3, %2
    290   %5 = load <4 x double>, <4 x double> *%a2, align 32
    291   %6 = bitcast <4 x double> %5 to <4 x i64>
    292   %7 = xor <4 x i64> %4, <i64 -1, i64 -1, i64 -1, i64 -1>
    293   %8 = and <4 x i64> %6, %7
    294   %9 = bitcast <4 x i64> %8 to <4 x double>
    295   %10 = fadd <4 x double> %a1, %9
    296   ret <4 x double> %10
    297 }
    298 
    299 define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
    300 ; GENERIC-LABEL: test_andnotps:
    301 ; GENERIC:       # %bb.0:
    302 ; GENERIC-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    303 ; GENERIC-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    304 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    305 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    306 ;
    307 ; SANDY-LABEL: test_andnotps:
    308 ; SANDY:       # %bb.0:
    309 ; SANDY-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    310 ; SANDY-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    311 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    312 ; SANDY-NEXT:    retq # sched: [1:1.00]
    313 ;
    314 ; HASWELL-LABEL: test_andnotps:
    315 ; HASWELL:       # %bb.0:
    316 ; HASWELL-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    317 ; HASWELL-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    318 ; HASWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    319 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    320 ;
    321 ; BROADWELL-LABEL: test_andnotps:
    322 ; BROADWELL:       # %bb.0:
    323 ; BROADWELL-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    324 ; BROADWELL-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
    325 ; BROADWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    326 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    327 ;
    328 ; SKYLAKE-LABEL: test_andnotps:
    329 ; SKYLAKE:       # %bb.0:
    330 ; SKYLAKE-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
    331 ; SKYLAKE-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    332 ; SKYLAKE-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    333 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    334 ;
    335 ; SKX-LABEL: test_andnotps:
    336 ; SKX:       # %bb.0:
    337 ; SKX-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
    338 ; SKX-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    339 ; SKX-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    340 ; SKX-NEXT:    retq # sched: [7:1.00]
    341 ;
    342 ; BTVER2-LABEL: test_andnotps:
    343 ; BTVER2:       # %bb.0:
    344 ; BTVER2-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    345 ; BTVER2-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
    346 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
    347 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    348 ;
    349 ; ZNVER1-LABEL: test_andnotps:
    350 ; ZNVER1:       # %bb.0:
    351 ; ZNVER1-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
    352 ; ZNVER1-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    353 ; ZNVER1-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    354 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    355   %1 = bitcast <8 x float> %a0 to <4 x i64>
    356   %2 = bitcast <8 x float> %a1 to <4 x i64>
    357   %3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
    358   %4 = and <4 x i64> %3, %2
    359   %5 = load <8 x float>, <8 x float> *%a2, align 32
    360   %6 = bitcast <8 x float> %5 to <4 x i64>
    361   %7 = xor <4 x i64> %4, <i64 -1, i64 -1, i64 -1, i64 -1>
    362   %8 = and <4 x i64> %6, %7
    363   %9 = bitcast <4 x i64> %8 to <8 x float>
    364   %10 = fadd <8 x float> %a1, %9
    365   ret <8 x float> %10
    366 }
    367 
    368 define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
    369 ; GENERIC-LABEL: test_andpd:
    370 ; GENERIC:       # %bb.0:
    371 ; GENERIC-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    372 ; GENERIC-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    373 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    374 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    375 ;
    376 ; SANDY-LABEL: test_andpd:
    377 ; SANDY:       # %bb.0:
    378 ; SANDY-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    379 ; SANDY-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    380 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    381 ; SANDY-NEXT:    retq # sched: [1:1.00]
    382 ;
    383 ; HASWELL-LABEL: test_andpd:
    384 ; HASWELL:       # %bb.0:
    385 ; HASWELL-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    386 ; HASWELL-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    387 ; HASWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    388 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    389 ;
    390 ; BROADWELL-LABEL: test_andpd:
    391 ; BROADWELL:       # %bb.0:
    392 ; BROADWELL-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    393 ; BROADWELL-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
    394 ; BROADWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    395 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    396 ;
    397 ; SKYLAKE-LABEL: test_andpd:
    398 ; SKYLAKE:       # %bb.0:
    399 ; SKYLAKE-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
    400 ; SKYLAKE-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    401 ; SKYLAKE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    402 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    403 ;
    404 ; SKX-LABEL: test_andpd:
    405 ; SKX:       # %bb.0:
    406 ; SKX-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
    407 ; SKX-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    408 ; SKX-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    409 ; SKX-NEXT:    retq # sched: [7:1.00]
    410 ;
    411 ; BTVER2-LABEL: test_andpd:
    412 ; BTVER2:       # %bb.0:
    413 ; BTVER2-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    414 ; BTVER2-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
    415 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
    416 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    417 ;
    418 ; ZNVER1-LABEL: test_andpd:
    419 ; ZNVER1:       # %bb.0:
    420 ; ZNVER1-NEXT:    vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
    421 ; ZNVER1-NEXT:    vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    422 ; ZNVER1-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    423 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    424   %1 = bitcast <4 x double> %a0 to <4 x i64>
    425   %2 = bitcast <4 x double> %a1 to <4 x i64>
    426   %3 = and <4 x i64> %1, %2
    427   %4 = load <4 x double>, <4 x double> *%a2, align 32
    428   %5 = bitcast <4 x double> %4 to <4 x i64>
    429   %6 = and <4 x i64> %3, %5
    430   %7 = bitcast <4 x i64> %6 to <4 x double>
    431   %8 = fadd <4 x double> %a1, %7
    432   ret <4 x double> %8
    433 }
    434 
    435 define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
    436 ; GENERIC-LABEL: test_andps:
    437 ; GENERIC:       # %bb.0:
    438 ; GENERIC-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    439 ; GENERIC-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    440 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    441 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    442 ;
    443 ; SANDY-LABEL: test_andps:
    444 ; SANDY:       # %bb.0:
    445 ; SANDY-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    446 ; SANDY-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    447 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    448 ; SANDY-NEXT:    retq # sched: [1:1.00]
    449 ;
    450 ; HASWELL-LABEL: test_andps:
    451 ; HASWELL:       # %bb.0:
    452 ; HASWELL-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    453 ; HASWELL-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
    454 ; HASWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    455 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    456 ;
    457 ; BROADWELL-LABEL: test_andps:
    458 ; BROADWELL:       # %bb.0:
    459 ; BROADWELL-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    460 ; BROADWELL-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
    461 ; BROADWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    462 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    463 ;
    464 ; SKYLAKE-LABEL: test_andps:
    465 ; SKYLAKE:       # %bb.0:
    466 ; SKYLAKE-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
    467 ; SKYLAKE-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    468 ; SKYLAKE-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    469 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    470 ;
    471 ; SKX-LABEL: test_andps:
    472 ; SKX:       # %bb.0:
    473 ; SKX-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
    474 ; SKX-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    475 ; SKX-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    476 ; SKX-NEXT:    retq # sched: [7:1.00]
    477 ;
    478 ; BTVER2-LABEL: test_andps:
    479 ; BTVER2:       # %bb.0:
    480 ; BTVER2-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
    481 ; BTVER2-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
    482 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
    483 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    484 ;
    485 ; ZNVER1-LABEL: test_andps:
    486 ; ZNVER1:       # %bb.0:
    487 ; ZNVER1-NEXT:    vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
    488 ; ZNVER1-NEXT:    vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    489 ; ZNVER1-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    490 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    491   %1 = bitcast <8 x float> %a0 to <4 x i64>
    492   %2 = bitcast <8 x float> %a1 to <4 x i64>
    493   %3 = and <4 x i64> %1, %2
    494   %4 = load <8 x float>, <8 x float> *%a2, align 32
    495   %5 = bitcast <8 x float> %4 to <4 x i64>
    496   %6 = and <4 x i64> %3, %5
    497   %7 = bitcast <4 x i64> %6 to <8 x float>
    498   %8 = fadd <8 x float> %a1, %7
    499   ret <8 x float> %8
    500 }
    501 
    502 define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
    503 ; GENERIC-LABEL: test_blendpd:
    504 ; GENERIC:       # %bb.0:
    505 ; GENERIC-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
    506 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    507 ; GENERIC-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
    508 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    509 ;
    510 ; SANDY-LABEL: test_blendpd:
    511 ; SANDY:       # %bb.0:
    512 ; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
    513 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    514 ; SANDY-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
    515 ; SANDY-NEXT:    retq # sched: [1:1.00]
    516 ;
    517 ; HASWELL-LABEL: test_blendpd:
    518 ; HASWELL:       # %bb.0:
    519 ; HASWELL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
    520 ; HASWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    521 ; HASWELL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
    522 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    523 ;
    524 ; BROADWELL-LABEL: test_blendpd:
    525 ; BROADWELL:       # %bb.0:
    526 ; BROADWELL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
    527 ; BROADWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    528 ; BROADWELL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [7:0.50]
    529 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    530 ;
    531 ; SKYLAKE-LABEL: test_blendpd:
    532 ; SKYLAKE:       # %bb.0:
    533 ; SKYLAKE-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
    534 ; SKYLAKE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    535 ; SKYLAKE-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
    536 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    537 ;
    538 ; SKX-LABEL: test_blendpd:
    539 ; SKX:       # %bb.0:
    540 ; SKX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
    541 ; SKX-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    542 ; SKX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
    543 ; SKX-NEXT:    retq # sched: [7:1.00]
    544 ;
    545 ; BTVER2-LABEL: test_blendpd:
    546 ; BTVER2:       # %bb.0:
    547 ; BTVER2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
    548 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
    549 ; BTVER2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:2.00]
    550 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    551 ;
    552 ; ZNVER1-LABEL: test_blendpd:
    553 ; ZNVER1:       # %bb.0:
    554 ; ZNVER1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
    555 ; ZNVER1-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    556 ; ZNVER1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50]
    557 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    558   %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
    559   %2 = load <4 x double>, <4 x double> *%a2, align 32
    560   %3 = fadd <4 x double> %a1, %1
    561   %4 = shufflevector <4 x double> %3, <4 x double> %2, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
    562   ret <4 x double> %4
    563 }
    564 
    565 define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
    566 ; GENERIC-LABEL: test_blendps:
    567 ; GENERIC:       # %bb.0:
    568 ; GENERIC-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
    569 ; GENERIC-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
    570 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    571 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    572 ;
    573 ; SANDY-LABEL: test_blendps:
    574 ; SANDY:       # %bb.0:
    575 ; SANDY-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
    576 ; SANDY-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
    577 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    578 ; SANDY-NEXT:    retq # sched: [1:1.00]
    579 ;
    580 ; HASWELL-LABEL: test_blendps:
    581 ; HASWELL:       # %bb.0:
    582 ; HASWELL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
    583 ; HASWELL-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
    584 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    585 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    586 ;
    587 ; BROADWELL-LABEL: test_blendps:
    588 ; BROADWELL:       # %bb.0:
    589 ; BROADWELL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
    590 ; BROADWELL-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [7:0.50]
    591 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    592 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    593 ;
    594 ; SKYLAKE-LABEL: test_blendps:
    595 ; SKYLAKE:       # %bb.0:
    596 ; SKYLAKE-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
    597 ; SKYLAKE-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
    598 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
    599 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    600 ;
    601 ; SKX-LABEL: test_blendps:
    602 ; SKX:       # %bb.0:
    603 ; SKX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
    604 ; SKX-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
    605 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
    606 ; SKX-NEXT:    retq # sched: [7:1.00]
    607 ;
    608 ; BTVER2-LABEL: test_blendps:
    609 ; BTVER2:       # %bb.0:
    610 ; BTVER2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
    611 ; BTVER2-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [6:2.00]
    612 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
    613 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    614 ;
    615 ; ZNVER1-LABEL: test_blendps:
    616 ; ZNVER1:       # %bb.0:
    617 ; ZNVER1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
    618 ; ZNVER1-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
    619 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
    620 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    621   %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
    622   %2 = load <8 x float>, <8 x float> *%a2, align 32
    623   %3 = shufflevector <8 x float> %a1, <8 x float> %2, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 13, i32 14, i32 7>
    624   %4 = fadd <8 x float> %1, %3
    625   ret <8 x float> %4
    626 }
    627 
    628 define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) {
    629 ; GENERIC-LABEL: test_blendvpd:
    630 ; GENERIC:       # %bb.0:
    631 ; GENERIC-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
    632 ; GENERIC-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
    633 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    634 ;
    635 ; SANDY-LABEL: test_blendvpd:
    636 ; SANDY:       # %bb.0:
    637 ; SANDY-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
    638 ; SANDY-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
    639 ; SANDY-NEXT:    retq # sched: [1:1.00]
    640 ;
    641 ; HASWELL-LABEL: test_blendvpd:
    642 ; HASWELL:       # %bb.0:
    643 ; HASWELL-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
    644 ; HASWELL-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
    645 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    646 ;
    647 ; BROADWELL-LABEL: test_blendvpd:
    648 ; BROADWELL:       # %bb.0:
    649 ; BROADWELL-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
    650 ; BROADWELL-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
    651 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    652 ;
    653 ; SKYLAKE-LABEL: test_blendvpd:
    654 ; SKYLAKE:       # %bb.0:
    655 ; SKYLAKE-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
    656 ; SKYLAKE-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
    657 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    658 ;
    659 ; SKX-LABEL: test_blendvpd:
    660 ; SKX:       # %bb.0:
    661 ; SKX-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
    662 ; SKX-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
    663 ; SKX-NEXT:    retq # sched: [7:1.00]
    664 ;
    665 ; BTVER2-LABEL: test_blendvpd:
    666 ; BTVER2:       # %bb.0:
    667 ; BTVER2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
    668 ; BTVER2-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
    669 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    670 ;
    671 ; ZNVER1-LABEL: test_blendvpd:
    672 ; ZNVER1:       # %bb.0:
    673 ; ZNVER1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
    674 ; ZNVER1-NEXT:    vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    675 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    676   %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
    677   %2 = load <4 x double>, <4 x double> *%a3, align 32
    678   %3 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %1, <4 x double> %2, <4 x double> %a2)
    679   ret <4 x double> %3
    680 }
    681 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
    682 
    683 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) {
    684 ; GENERIC-LABEL: test_blendvps:
    685 ; GENERIC:       # %bb.0:
    686 ; GENERIC-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
    687 ; GENERIC-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
    688 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    689 ;
    690 ; SANDY-LABEL: test_blendvps:
    691 ; SANDY:       # %bb.0:
    692 ; SANDY-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
    693 ; SANDY-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
    694 ; SANDY-NEXT:    retq # sched: [1:1.00]
    695 ;
    696 ; HASWELL-LABEL: test_blendvps:
    697 ; HASWELL:       # %bb.0:
    698 ; HASWELL-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
    699 ; HASWELL-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
    700 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    701 ;
    702 ; BROADWELL-LABEL: test_blendvps:
    703 ; BROADWELL:       # %bb.0:
    704 ; BROADWELL-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
    705 ; BROADWELL-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
    706 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    707 ;
    708 ; SKYLAKE-LABEL: test_blendvps:
    709 ; SKYLAKE:       # %bb.0:
    710 ; SKYLAKE-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
    711 ; SKYLAKE-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
    712 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    713 ;
    714 ; SKX-LABEL: test_blendvps:
    715 ; SKX:       # %bb.0:
    716 ; SKX-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
    717 ; SKX-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
    718 ; SKX-NEXT:    retq # sched: [7:1.00]
    719 ;
    720 ; BTVER2-LABEL: test_blendvps:
    721 ; BTVER2:       # %bb.0:
    722 ; BTVER2-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
    723 ; BTVER2-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
    724 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    725 ;
    726 ; ZNVER1-LABEL: test_blendvps:
    727 ; ZNVER1:       # %bb.0:
    728 ; ZNVER1-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
    729 ; ZNVER1-NEXT:    vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
    730 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    731   %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
    732   %2 = load <8 x float>, <8 x float> *%a3, align 32
    733   %3 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %1, <8 x float> %2, <8 x float> %a2)
    734   ret <8 x float> %3
    735 }
    736 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
    737 
    738 define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
    739 ; GENERIC-LABEL: test_broadcastf128:
    740 ; GENERIC:       # %bb.0:
    741 ; GENERIC-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
    742 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    743 ;
    744 ; SANDY-LABEL: test_broadcastf128:
    745 ; SANDY:       # %bb.0:
    746 ; SANDY-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
    747 ; SANDY-NEXT:    retq # sched: [1:1.00]
    748 ;
    749 ; HASWELL-LABEL: test_broadcastf128:
    750 ; HASWELL:       # %bb.0:
    751 ; HASWELL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
    752 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    753 ;
    754 ; BROADWELL-LABEL: test_broadcastf128:
    755 ; BROADWELL:       # %bb.0:
    756 ; BROADWELL-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:0.50]
    757 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    758 ;
    759 ; SKYLAKE-LABEL: test_broadcastf128:
    760 ; SKYLAKE:       # %bb.0:
    761 ; SKYLAKE-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
    762 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    763 ;
    764 ; SKX-LABEL: test_broadcastf128:
    765 ; SKX:       # %bb.0:
    766 ; SKX-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
    767 ; SKX-NEXT:    retq # sched: [7:1.00]
    768 ;
    769 ; BTVER2-LABEL: test_broadcastf128:
    770 ; BTVER2:       # %bb.0:
    771 ; BTVER2-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:1.00]
    772 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    773 ;
    774 ; ZNVER1-LABEL: test_broadcastf128:
    775 ; ZNVER1:       # %bb.0:
    776 ; ZNVER1-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [8:0.50]
    777 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    778   %1 = load <4 x float>, <4 x float> *%a0, align 32
    779   %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
    780   ret <8 x float> %2
    781 }
    782 
    783 define <4 x double> @test_broadcastsd_ymm(double *%a0) {
    784 ; GENERIC-LABEL: test_broadcastsd_ymm:
    785 ; GENERIC:       # %bb.0:
    786 ; GENERIC-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
    787 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    788 ;
    789 ; SANDY-LABEL: test_broadcastsd_ymm:
    790 ; SANDY:       # %bb.0:
    791 ; SANDY-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
    792 ; SANDY-NEXT:    retq # sched: [1:1.00]
    793 ;
    794 ; HASWELL-LABEL: test_broadcastsd_ymm:
    795 ; HASWELL:       # %bb.0:
    796 ; HASWELL-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
    797 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    798 ;
    799 ; BROADWELL-LABEL: test_broadcastsd_ymm:
    800 ; BROADWELL:       # %bb.0:
    801 ; BROADWELL-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [6:0.50]
    802 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    803 ;
    804 ; SKYLAKE-LABEL: test_broadcastsd_ymm:
    805 ; SKYLAKE:       # %bb.0:
    806 ; SKYLAKE-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
    807 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    808 ;
    809 ; SKX-LABEL: test_broadcastsd_ymm:
    810 ; SKX:       # %bb.0:
    811 ; SKX-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
    812 ; SKX-NEXT:    retq # sched: [7:1.00]
    813 ;
    814 ; BTVER2-LABEL: test_broadcastsd_ymm:
    815 ; BTVER2:       # %bb.0:
    816 ; BTVER2-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00]
    817 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    818 ;
    819 ; ZNVER1-LABEL: test_broadcastsd_ymm:
    820 ; ZNVER1:       # %bb.0:
    821 ; ZNVER1-NEXT:    vbroadcastsd (%rdi), %ymm0 # sched: [8:0.50]
    822 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    823   %1 = load double, double *%a0, align 8
    824   %2 = insertelement <4 x double> undef, double %1, i32 0
    825   %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer
    826   ret <4 x double> %3
    827 }
    828 
    829 define <4 x float> @test_broadcastss(float *%a0) {
    830 ; GENERIC-LABEL: test_broadcastss:
    831 ; GENERIC:       # %bb.0:
    832 ; GENERIC-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
    833 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    834 ;
    835 ; SANDY-LABEL: test_broadcastss:
    836 ; SANDY:       # %bb.0:
    837 ; SANDY-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
    838 ; SANDY-NEXT:    retq # sched: [1:1.00]
    839 ;
    840 ; HASWELL-LABEL: test_broadcastss:
    841 ; HASWELL:       # %bb.0:
    842 ; HASWELL-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
    843 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    844 ;
    845 ; BROADWELL-LABEL: test_broadcastss:
    846 ; BROADWELL:       # %bb.0:
    847 ; BROADWELL-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [5:0.50]
    848 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    849 ;
    850 ; SKYLAKE-LABEL: test_broadcastss:
    851 ; SKYLAKE:       # %bb.0:
    852 ; SKYLAKE-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
    853 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    854 ;
    855 ; SKX-LABEL: test_broadcastss:
    856 ; SKX:       # %bb.0:
    857 ; SKX-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
    858 ; SKX-NEXT:    retq # sched: [7:1.00]
    859 ;
    860 ; BTVER2-LABEL: test_broadcastss:
    861 ; BTVER2:       # %bb.0:
    862 ; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
    863 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    864 ;
    865 ; ZNVER1-LABEL: test_broadcastss:
    866 ; ZNVER1:       # %bb.0:
    867 ; ZNVER1-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [8:0.50]
    868 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    869   %1 = load float, float *%a0, align 4
    870   %2 = insertelement <4 x float> undef, float %1, i32 0
    871   %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
    872   ret <4 x float> %3
    873 }
    874 
    875 define <8 x float> @test_broadcastss_ymm(float *%a0) {
    876 ; GENERIC-LABEL: test_broadcastss_ymm:
    877 ; GENERIC:       # %bb.0:
    878 ; GENERIC-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
    879 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    880 ;
    881 ; SANDY-LABEL: test_broadcastss_ymm:
    882 ; SANDY:       # %bb.0:
    883 ; SANDY-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
    884 ; SANDY-NEXT:    retq # sched: [1:1.00]
    885 ;
    886 ; HASWELL-LABEL: test_broadcastss_ymm:
    887 ; HASWELL:       # %bb.0:
    888 ; HASWELL-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
    889 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    890 ;
    891 ; BROADWELL-LABEL: test_broadcastss_ymm:
    892 ; BROADWELL:       # %bb.0:
    893 ; BROADWELL-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [6:0.50]
    894 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    895 ;
    896 ; SKYLAKE-LABEL: test_broadcastss_ymm:
    897 ; SKYLAKE:       # %bb.0:
    898 ; SKYLAKE-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
    899 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    900 ;
    901 ; SKX-LABEL: test_broadcastss_ymm:
    902 ; SKX:       # %bb.0:
    903 ; SKX-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
    904 ; SKX-NEXT:    retq # sched: [7:1.00]
    905 ;
    906 ; BTVER2-LABEL: test_broadcastss_ymm:
    907 ; BTVER2:       # %bb.0:
    908 ; BTVER2-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [6:2.00]
    909 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    910 ;
    911 ; ZNVER1-LABEL: test_broadcastss_ymm:
    912 ; ZNVER1:       # %bb.0:
    913 ; ZNVER1-NEXT:    vbroadcastss (%rdi), %ymm0 # sched: [8:0.50]
    914 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    915   %1 = load float, float *%a0, align 4
    916   %2 = insertelement <8 x float> undef, float %1, i32 0
    917   %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer
    918   ret <8 x float> %3
    919 }
    920 
    921 define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
    922 ; GENERIC-LABEL: test_cmppd:
    923 ; GENERIC:       # %bb.0:
    924 ; GENERIC-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
    925 ; GENERIC-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    926 ; GENERIC-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
    927 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    928 ;
    929 ; SANDY-LABEL: test_cmppd:
    930 ; SANDY:       # %bb.0:
    931 ; SANDY-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
    932 ; SANDY-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    933 ; SANDY-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
    934 ; SANDY-NEXT:    retq # sched: [1:1.00]
    935 ;
    936 ; HASWELL-LABEL: test_cmppd:
    937 ; HASWELL:       # %bb.0:
    938 ; HASWELL-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
    939 ; HASWELL-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    940 ; HASWELL-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
    941 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    942 ;
    943 ; BROADWELL-LABEL: test_cmppd:
    944 ; BROADWELL:       # %bb.0:
    945 ; BROADWELL-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
    946 ; BROADWELL-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
    947 ; BROADWELL-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
    948 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    949 ;
    950 ; SKYLAKE-LABEL: test_cmppd:
    951 ; SKYLAKE:       # %bb.0:
    952 ; SKYLAKE-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
    953 ; SKYLAKE-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
    954 ; SKYLAKE-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
    955 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    956 ;
    957 ; SKX-LABEL: test_cmppd:
    958 ; SKX:       # %bb.0:
    959 ; SKX-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
    960 ; SKX-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
    961 ; SKX-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
    962 ; SKX-NEXT:    retq # sched: [7:1.00]
    963 ;
    964 ; BTVER2-LABEL: test_cmppd:
    965 ; BTVER2:       # %bb.0:
    966 ; BTVER2-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
    967 ; BTVER2-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
    968 ; BTVER2-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
    969 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    970 ;
    971 ; ZNVER1-LABEL: test_cmppd:
    972 ; ZNVER1:       # %bb.0:
    973 ; ZNVER1-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
    974 ; ZNVER1-NEXT:    vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    975 ; ZNVER1-NEXT:    vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
    976 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    977   %1 = fcmp oeq <4 x double> %a0, %a1
    978   %2 = load <4 x double>, <4 x double> *%a2, align 32
    979   %3 = fcmp oeq <4 x double> %a0, %2
    980   %4 = sext <4 x i1> %1 to <4 x i64>
    981   %5 = sext <4 x i1> %3 to <4 x i64>
    982   %6 = or <4 x i64> %4, %5
    983   %7 = bitcast <4 x i64> %6 to <4 x double>
    984   ret <4 x double> %7
    985 }
    986 
    987 define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
    988 ; GENERIC-LABEL: test_cmpps:
    989 ; GENERIC:       # %bb.0:
    990 ; GENERIC-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
    991 ; GENERIC-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    992 ; GENERIC-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
    993 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    994 ;
    995 ; SANDY-LABEL: test_cmpps:
    996 ; SANDY:       # %bb.0:
    997 ; SANDY-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
    998 ; SANDY-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
    999 ; SANDY-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
   1000 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1001 ;
   1002 ; HASWELL-LABEL: test_cmpps:
   1003 ; HASWELL:       # %bb.0:
   1004 ; HASWELL-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
   1005 ; HASWELL-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   1006 ; HASWELL-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
   1007 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1008 ;
   1009 ; BROADWELL-LABEL: test_cmpps:
   1010 ; BROADWELL:       # %bb.0:
   1011 ; BROADWELL-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
   1012 ; BROADWELL-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
   1013 ; BROADWELL-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
   1014 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1015 ;
   1016 ; SKYLAKE-LABEL: test_cmpps:
   1017 ; SKYLAKE:       # %bb.0:
   1018 ; SKYLAKE-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
   1019 ; SKYLAKE-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   1020 ; SKYLAKE-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
   1021 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1022 ;
   1023 ; SKX-LABEL: test_cmpps:
   1024 ; SKX:       # %bb.0:
   1025 ; SKX-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
   1026 ; SKX-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   1027 ; SKX-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
   1028 ; SKX-NEXT:    retq # sched: [7:1.00]
   1029 ;
   1030 ; BTVER2-LABEL: test_cmpps:
   1031 ; BTVER2:       # %bb.0:
   1032 ; BTVER2-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
   1033 ; BTVER2-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
   1034 ; BTVER2-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
   1035 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1036 ;
   1037 ; ZNVER1-LABEL: test_cmpps:
   1038 ; ZNVER1:       # %bb.0:
   1039 ; ZNVER1-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
   1040 ; ZNVER1-NEXT:    vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   1041 ; ZNVER1-NEXT:    vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
   1042 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1043   %1 = fcmp oeq <8 x float> %a0, %a1
   1044   %2 = load <8 x float>, <8 x float> *%a2, align 32
   1045   %3 = fcmp oeq <8 x float> %a0, %2
   1046   %4 = sext <8 x i1> %1 to <8 x i32>
   1047   %5 = sext <8 x i1> %3 to <8 x i32>
   1048   %6 = or <8 x i32> %4, %5
   1049   %7 = bitcast <8 x i32> %6 to <8 x float>
   1050   ret <8 x float> %7
   1051 }
   1052 
   1053 define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
   1054 ; GENERIC-LABEL: test_cvtdq2pd:
   1055 ; GENERIC:       # %bb.0:
   1056 ; GENERIC-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
   1057 ; GENERIC-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
   1058 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1059 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1060 ;
   1061 ; SANDY-LABEL: test_cvtdq2pd:
   1062 ; SANDY:       # %bb.0:
   1063 ; SANDY-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
   1064 ; SANDY-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
   1065 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1066 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1067 ;
   1068 ; HASWELL-LABEL: test_cvtdq2pd:
   1069 ; HASWELL:       # %bb.0:
   1070 ; HASWELL-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00]
   1071 ; HASWELL-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00]
   1072 ; HASWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1073 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1074 ;
   1075 ; BROADWELL-LABEL: test_cvtdq2pd:
   1076 ; BROADWELL:       # %bb.0:
   1077 ; BROADWELL-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [11:1.00]
   1078 ; BROADWELL-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00]
   1079 ; BROADWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1080 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1081 ;
   1082 ; SKYLAKE-LABEL: test_cvtdq2pd:
   1083 ; SKYLAKE:       # %bb.0:
   1084 ; SKYLAKE-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
   1085 ; SKYLAKE-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
   1086 ; SKYLAKE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   1087 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1088 ;
   1089 ; SKX-LABEL: test_cvtdq2pd:
   1090 ; SKX:       # %bb.0:
   1091 ; SKX-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
   1092 ; SKX-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
   1093 ; SKX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   1094 ; SKX-NEXT:    retq # sched: [7:1.00]
   1095 ;
   1096 ; BTVER2-LABEL: test_cvtdq2pd:
   1097 ; BTVER2:       # %bb.0:
   1098 ; BTVER2-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00]
   1099 ; BTVER2-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [3:2.00]
   1100 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   1101 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1102 ;
   1103 ; ZNVER1-LABEL: test_cvtdq2pd:
   1104 ; ZNVER1:       # %bb.0:
   1105 ; ZNVER1-NEXT:    vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00]
   1106 ; ZNVER1-NEXT:    vcvtdq2pd %xmm0, %ymm0 # sched: [5:1.00]
   1107 ; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1108 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1109   %1 = sitofp <4 x i32> %a0 to <4 x double>
   1110   %2 = load <4 x i32>, <4 x i32> *%a1, align 16
   1111   %3 = sitofp <4 x i32> %2 to <4 x double>
   1112   %4 = fadd <4 x double> %1, %3
   1113   ret <4 x double> %4
   1114 }
   1115 
   1116 define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
   1117 ; GENERIC-LABEL: test_cvtdq2ps:
   1118 ; GENERIC:       # %bb.0:
   1119 ; GENERIC-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
   1120 ; GENERIC-NEXT:    vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
   1121 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1122 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1123 ;
   1124 ; SANDY-LABEL: test_cvtdq2ps:
   1125 ; SANDY:       # %bb.0:
   1126 ; SANDY-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
   1127 ; SANDY-NEXT:    vmovaps (%rdi), %xmm1 # sched: [6:0.50]
   1128 ; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50]
   1129 ; SANDY-NEXT:    vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00]
   1130 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1131 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1132 ;
   1133 ; HASWELL-LABEL: test_cvtdq2ps:
   1134 ; HASWELL:       # %bb.0:
   1135 ; HASWELL-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
   1136 ; HASWELL-NEXT:    vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
   1137 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1138 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1139 ;
   1140 ; BROADWELL-LABEL: test_cvtdq2ps:
   1141 ; BROADWELL:       # %bb.0:
   1142 ; BROADWELL-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
   1143 ; BROADWELL-NEXT:    vcvtdq2ps (%rdi), %ymm1 # sched: [9:1.00]
   1144 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1145 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1146 ;
   1147 ; SKYLAKE-LABEL: test_cvtdq2ps:
   1148 ; SKYLAKE:       # %bb.0:
   1149 ; SKYLAKE-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
   1150 ; SKYLAKE-NEXT:    vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
   1151 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   1152 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1153 ;
   1154 ; SKX-LABEL: test_cvtdq2ps:
   1155 ; SKX:       # %bb.0:
   1156 ; SKX-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
   1157 ; SKX-NEXT:    vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
   1158 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   1159 ; SKX-NEXT:    retq # sched: [7:1.00]
   1160 ;
   1161 ; BTVER2-LABEL: test_cvtdq2ps:
   1162 ; BTVER2:       # %bb.0:
   1163 ; BTVER2-NEXT:    vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00]
   1164 ; BTVER2-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [3:2.00]
   1165 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   1166 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1167 ;
   1168 ; ZNVER1-LABEL: test_cvtdq2ps:
   1169 ; ZNVER1:       # %bb.0:
   1170 ; ZNVER1-NEXT:    vcvtdq2ps (%rdi), %ymm1 # sched: [12:1.00]
   1171 ; ZNVER1-NEXT:    vcvtdq2ps %ymm0, %ymm0 # sched: [5:1.00]
   1172 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   1173 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1174   %1 = sitofp <8 x i32> %a0 to <8 x float>
   1175   %2 = load <8 x i32>, <8 x i32> *%a1, align 16
   1176   %3 = sitofp <8 x i32> %2 to <8 x float>
   1177   %4 = fadd <8 x float> %1, %3
   1178   ret <8 x float> %4
   1179 }
   1180 
   1181 define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
   1182 ; GENERIC-LABEL: test_cvtpd2dq:
   1183 ; GENERIC:       # %bb.0:
   1184 ; GENERIC-NEXT:    vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
   1185 ; GENERIC-NEXT:    vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
   1186 ; GENERIC-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
   1187 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1188 ;
   1189 ; SANDY-LABEL: test_cvtpd2dq:
   1190 ; SANDY:       # %bb.0:
   1191 ; SANDY-NEXT:    vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
   1192 ; SANDY-NEXT:    vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
   1193 ; SANDY-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
   1194 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1195 ;
   1196 ; HASWELL-LABEL: test_cvtpd2dq:
   1197 ; HASWELL:       # %bb.0:
   1198 ; HASWELL-NEXT:    vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
   1199 ; HASWELL-NEXT:    vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
   1200 ; HASWELL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1201 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1202 ;
   1203 ; BROADWELL-LABEL: test_cvtpd2dq:
   1204 ; BROADWELL:       # %bb.0:
   1205 ; BROADWELL-NEXT:    vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
   1206 ; BROADWELL-NEXT:    vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
   1207 ; BROADWELL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1208 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1209 ;
   1210 ; SKYLAKE-LABEL: test_cvtpd2dq:
   1211 ; SKYLAKE:       # %bb.0:
   1212 ; SKYLAKE-NEXT:    vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
   1213 ; SKYLAKE-NEXT:    vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
   1214 ; SKYLAKE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1215 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1216 ;
   1217 ; SKX-LABEL: test_cvtpd2dq:
   1218 ; SKX:       # %bb.0:
   1219 ; SKX-NEXT:    vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
   1220 ; SKX-NEXT:    vcvtpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
   1221 ; SKX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1222 ; SKX-NEXT:    retq # sched: [7:1.00]
   1223 ;
   1224 ; BTVER2-LABEL: test_cvtpd2dq:
   1225 ; BTVER2:       # %bb.0:
   1226 ; BTVER2-NEXT:    vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
   1227 ; BTVER2-NEXT:    vcvtpd2dq %ymm0, %xmm0 # sched: [6:2.00]
   1228 ; BTVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
   1229 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1230 ;
   1231 ; ZNVER1-LABEL: test_cvtpd2dq:
   1232 ; ZNVER1:       # %bb.0:
   1233 ; ZNVER1-NEXT:    vcvtpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
   1234 ; ZNVER1-NEXT:    vcvtpd2dq %ymm0, %xmm0 # sched: [5:1.00]
   1235 ; ZNVER1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
   1236 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1237   %1 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0)
   1238   %2 = load <4 x double>, <4 x double> *%a1, align 32
   1239   %3 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %2)
   1240   %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1241   ret <8 x i32> %4
   1242 }
   1243 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
   1244 
   1245 define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) {
   1246 ; GENERIC-LABEL: test_cvttpd2dq:
   1247 ; GENERIC:       # %bb.0:
   1248 ; GENERIC-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
   1249 ; GENERIC-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
   1250 ; GENERIC-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
   1251 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1252 ;
   1253 ; SANDY-LABEL: test_cvttpd2dq:
   1254 ; SANDY:       # %bb.0:
   1255 ; SANDY-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
   1256 ; SANDY-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
   1257 ; SANDY-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
   1258 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1259 ;
   1260 ; HASWELL-LABEL: test_cvttpd2dq:
   1261 ; HASWELL:       # %bb.0:
   1262 ; HASWELL-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
   1263 ; HASWELL-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
   1264 ; HASWELL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1265 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1266 ;
   1267 ; BROADWELL-LABEL: test_cvttpd2dq:
   1268 ; BROADWELL:       # %bb.0:
   1269 ; BROADWELL-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
   1270 ; BROADWELL-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
   1271 ; BROADWELL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1272 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1273 ;
   1274 ; SKYLAKE-LABEL: test_cvttpd2dq:
   1275 ; SKYLAKE:       # %bb.0:
   1276 ; SKYLAKE-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
   1277 ; SKYLAKE-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
   1278 ; SKYLAKE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1279 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1280 ;
   1281 ; SKX-LABEL: test_cvttpd2dq:
   1282 ; SKX:       # %bb.0:
   1283 ; SKX-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
   1284 ; SKX-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
   1285 ; SKX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1286 ; SKX-NEXT:    retq # sched: [7:1.00]
   1287 ;
   1288 ; BTVER2-LABEL: test_cvttpd2dq:
   1289 ; BTVER2:       # %bb.0:
   1290 ; BTVER2-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
   1291 ; BTVER2-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [6:2.00]
   1292 ; BTVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
   1293 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1294 ;
   1295 ; ZNVER1-LABEL: test_cvttpd2dq:
   1296 ; ZNVER1:       # %bb.0:
   1297 ; ZNVER1-NEXT:    vcvttpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
   1298 ; ZNVER1-NEXT:    vcvttpd2dq %ymm0, %xmm0 # sched: [5:1.00]
   1299 ; ZNVER1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
   1300 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1301   %1 = fptosi <4 x double> %a0 to <4 x i32>
   1302   %2 = load <4 x double>, <4 x double> *%a1, align 32
   1303   %3 = fptosi <4 x double> %2 to <4 x i32>
   1304   %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1305   ret <8 x i32> %4
   1306 }
   1307 
   1308 define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) {
   1309 ; GENERIC-LABEL: test_cvtpd2ps:
   1310 ; GENERIC:       # %bb.0:
   1311 ; GENERIC-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
   1312 ; GENERIC-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
   1313 ; GENERIC-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
   1314 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1315 ;
   1316 ; SANDY-LABEL: test_cvtpd2ps:
   1317 ; SANDY:       # %bb.0:
   1318 ; SANDY-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
   1319 ; SANDY-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
   1320 ; SANDY-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
   1321 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1322 ;
   1323 ; HASWELL-LABEL: test_cvtpd2ps:
   1324 ; HASWELL:       # %bb.0:
   1325 ; HASWELL-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00]
   1326 ; HASWELL-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
   1327 ; HASWELL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1328 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1329 ;
   1330 ; BROADWELL-LABEL: test_cvtpd2ps:
   1331 ; BROADWELL:       # %bb.0:
   1332 ; BROADWELL-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00]
   1333 ; BROADWELL-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
   1334 ; BROADWELL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1335 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1336 ;
   1337 ; SKYLAKE-LABEL: test_cvtpd2ps:
   1338 ; SKYLAKE:       # %bb.0:
   1339 ; SKYLAKE-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
   1340 ; SKYLAKE-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
   1341 ; SKYLAKE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1342 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1343 ;
   1344 ; SKX-LABEL: test_cvtpd2ps:
   1345 ; SKX:       # %bb.0:
   1346 ; SKX-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
   1347 ; SKX-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
   1348 ; SKX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
   1349 ; SKX-NEXT:    retq # sched: [7:1.00]
   1350 ;
   1351 ; BTVER2-LABEL: test_cvtpd2ps:
   1352 ; BTVER2:       # %bb.0:
   1353 ; BTVER2-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00]
   1354 ; BTVER2-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00]
   1355 ; BTVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
   1356 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1357 ;
   1358 ; ZNVER1-LABEL: test_cvtpd2ps:
   1359 ; ZNVER1:       # %bb.0:
   1360 ; ZNVER1-NEXT:    vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
   1361 ; ZNVER1-NEXT:    vcvtpd2ps %ymm0, %xmm0 # sched: [5:1.00]
   1362 ; ZNVER1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
   1363 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1364   %1 = fptrunc <4 x double> %a0 to <4 x float>
   1365   %2 = load <4 x double>, <4 x double> *%a1, align 32
   1366   %3 = fptrunc <4 x double> %2 to <4 x float>
   1367   %4 = shufflevector <4 x float> %1, <4 x float> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1368   ret <8 x float> %4
   1369 }
   1370 
   1371 define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
   1372 ; GENERIC-LABEL: test_cvtps2dq:
   1373 ; GENERIC:       # %bb.0:
   1374 ; GENERIC-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
   1375 ; GENERIC-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
   1376 ; GENERIC-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1377 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1378 ;
   1379 ; SANDY-LABEL: test_cvtps2dq:
   1380 ; SANDY:       # %bb.0:
   1381 ; SANDY-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
   1382 ; SANDY-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
   1383 ; SANDY-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1384 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1385 ;
   1386 ; HASWELL-LABEL: test_cvtps2dq:
   1387 ; HASWELL:       # %bb.0:
   1388 ; HASWELL-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
   1389 ; HASWELL-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
   1390 ; HASWELL-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1391 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1392 ;
   1393 ; BROADWELL-LABEL: test_cvtps2dq:
   1394 ; BROADWELL:       # %bb.0:
   1395 ; BROADWELL-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
   1396 ; BROADWELL-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [9:1.00]
   1397 ; BROADWELL-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1398 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1399 ;
   1400 ; SKYLAKE-LABEL: test_cvtps2dq:
   1401 ; SKYLAKE:       # %bb.0:
   1402 ; SKYLAKE-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50]
   1403 ; SKYLAKE-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
   1404 ; SKYLAKE-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   1405 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1406 ;
   1407 ; SKX-LABEL: test_cvtps2dq:
   1408 ; SKX:       # %bb.0:
   1409 ; SKX-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50]
   1410 ; SKX-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
   1411 ; SKX-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   1412 ; SKX-NEXT:    retq # sched: [7:1.00]
   1413 ;
   1414 ; BTVER2-LABEL: test_cvtps2dq:
   1415 ; BTVER2:       # %bb.0:
   1416 ; BTVER2-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00]
   1417 ; BTVER2-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [3:2.00]
   1418 ; BTVER2-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1419 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1420 ;
   1421 ; ZNVER1-LABEL: test_cvtps2dq:
   1422 ; ZNVER1:       # %bb.0:
   1423 ; ZNVER1-NEXT:    vcvtps2dq (%rdi), %ymm1 # sched: [12:1.00]
   1424 ; ZNVER1-NEXT:    vcvtps2dq %ymm0, %ymm0 # sched: [5:1.00]
   1425 ; ZNVER1-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
   1426 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1427   %1 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0)
   1428   %2 = load <8 x float>, <8 x float> *%a1, align 32
   1429   %3 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %2)
   1430   %4 = or <8 x i32> %1, %3
   1431   ret <8 x i32> %4
   1432 }
   1433 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
   1434 
   1435 define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) {
   1436 ; GENERIC-LABEL: test_cvttps2dq:
   1437 ; GENERIC:       # %bb.0:
   1438 ; GENERIC-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
   1439 ; GENERIC-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
   1440 ; GENERIC-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1441 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1442 ;
   1443 ; SANDY-LABEL: test_cvttps2dq:
   1444 ; SANDY:       # %bb.0:
   1445 ; SANDY-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
   1446 ; SANDY-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
   1447 ; SANDY-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1448 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1449 ;
   1450 ; HASWELL-LABEL: test_cvttps2dq:
   1451 ; HASWELL:       # %bb.0:
   1452 ; HASWELL-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
   1453 ; HASWELL-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
   1454 ; HASWELL-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1455 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1456 ;
   1457 ; BROADWELL-LABEL: test_cvttps2dq:
   1458 ; BROADWELL:       # %bb.0:
   1459 ; BROADWELL-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
   1460 ; BROADWELL-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [9:1.00]
   1461 ; BROADWELL-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1462 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1463 ;
   1464 ; SKYLAKE-LABEL: test_cvttps2dq:
   1465 ; SKYLAKE:       # %bb.0:
   1466 ; SKYLAKE-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [4:0.50]
   1467 ; SKYLAKE-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
   1468 ; SKYLAKE-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   1469 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1470 ;
   1471 ; SKX-LABEL: test_cvttps2dq:
   1472 ; SKX:       # %bb.0:
   1473 ; SKX-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:0.50]
   1474 ; SKX-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
   1475 ; SKX-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   1476 ; SKX-NEXT:    retq # sched: [7:1.00]
   1477 ;
   1478 ; BTVER2-LABEL: test_cvttps2dq:
   1479 ; BTVER2:       # %bb.0:
   1480 ; BTVER2-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00]
   1481 ; BTVER2-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [3:2.00]
   1482 ; BTVER2-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   1483 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1484 ;
   1485 ; ZNVER1-LABEL: test_cvttps2dq:
   1486 ; ZNVER1:       # %bb.0:
   1487 ; ZNVER1-NEXT:    vcvttps2dq (%rdi), %ymm1 # sched: [12:1.00]
   1488 ; ZNVER1-NEXT:    vcvttps2dq %ymm0, %ymm0 # sched: [5:1.00]
   1489 ; ZNVER1-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
   1490 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1491   %1 = fptosi <8 x float> %a0 to <8 x i32>
   1492   %2 = load <8 x float>, <8 x float> *%a1, align 32
   1493   %3 = fptosi <8 x float> %2 to <8 x i32>
   1494   %4 = or <8 x i32> %1, %3
   1495   ret <8 x i32> %4
   1496 }
   1497 
   1498 define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   1499 ; GENERIC-LABEL: test_divpd:
   1500 ; GENERIC:       # %bb.0:
   1501 ; GENERIC-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00]
   1502 ; GENERIC-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00]
   1503 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1504 ;
   1505 ; SANDY-LABEL: test_divpd:
   1506 ; SANDY:       # %bb.0:
   1507 ; SANDY-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00]
   1508 ; SANDY-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00]
   1509 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1510 ;
   1511 ; HASWELL-LABEL: test_divpd:
   1512 ; HASWELL:       # %bb.0:
   1513 ; HASWELL-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:28.00]
   1514 ; HASWELL-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [42:28.00]
   1515 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1516 ;
   1517 ; BROADWELL-LABEL: test_divpd:
   1518 ; BROADWELL:       # %bb.0:
   1519 ; BROADWELL-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [23:16.00]
   1520 ; BROADWELL-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [29:16.00]
   1521 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1522 ;
   1523 ; SKYLAKE-LABEL: test_divpd:
   1524 ; SKYLAKE:       # %bb.0:
   1525 ; SKYLAKE-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:5.00]
   1526 ; SKYLAKE-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00]
   1527 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1528 ;
   1529 ; SKX-LABEL: test_divpd:
   1530 ; SKX:       # %bb.0:
   1531 ; SKX-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:5.00]
   1532 ; SKX-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00]
   1533 ; SKX-NEXT:    retq # sched: [7:1.00]
   1534 ;
   1535 ; BTVER2-LABEL: test_divpd:
   1536 ; BTVER2:       # %bb.0:
   1537 ; BTVER2-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
   1538 ; BTVER2-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [43:38.00]
   1539 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1540 ;
   1541 ; ZNVER1-LABEL: test_divpd:
   1542 ; ZNVER1:       # %bb.0:
   1543 ; ZNVER1-NEXT:    vdivpd %ymm1, %ymm0, %ymm0 # sched: [15:15.00]
   1544 ; ZNVER1-NEXT:    vdivpd (%rdi), %ymm0, %ymm0 # sched: [22:22.00]
   1545 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1546   %1 = fdiv <4 x double> %a0, %a1
   1547   %2 = load <4 x double>, <4 x double> *%a2, align 32
   1548   %3 = fdiv <4 x double> %1, %2
   1549   ret <4 x double> %3
   1550 }
   1551 
   1552 define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   1553 ; GENERIC-LABEL: test_divps:
   1554 ; GENERIC:       # %bb.0:
   1555 ; GENERIC-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00]
   1556 ; GENERIC-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00]
   1557 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1558 ;
   1559 ; SANDY-LABEL: test_divps:
   1560 ; SANDY:       # %bb.0:
   1561 ; SANDY-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00]
   1562 ; SANDY-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00]
   1563 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1564 ;
   1565 ; HASWELL-LABEL: test_divps:
   1566 ; HASWELL:       # %bb.0:
   1567 ; HASWELL-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [21:14.00]
   1568 ; HASWELL-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [28:14.00]
   1569 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1570 ;
   1571 ; BROADWELL-LABEL: test_divps:
   1572 ; BROADWELL:       # %bb.0:
   1573 ; BROADWELL-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [17:10.00]
   1574 ; BROADWELL-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [23:10.00]
   1575 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1576 ;
   1577 ; SKYLAKE-LABEL: test_divps:
   1578 ; SKYLAKE:       # %bb.0:
   1579 ; SKYLAKE-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [11:5.00]
   1580 ; SKYLAKE-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00]
   1581 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1582 ;
   1583 ; SKX-LABEL: test_divps:
   1584 ; SKX:       # %bb.0:
   1585 ; SKX-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [11:5.00]
   1586 ; SKX-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00]
   1587 ; SKX-NEXT:    retq # sched: [7:1.00]
   1588 ;
   1589 ; BTVER2-LABEL: test_divps:
   1590 ; BTVER2:       # %bb.0:
   1591 ; BTVER2-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
   1592 ; BTVER2-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [43:38.00]
   1593 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1594 ;
   1595 ; ZNVER1-LABEL: test_divps:
   1596 ; ZNVER1:       # %bb.0:
   1597 ; ZNVER1-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # sched: [12:12.00]
   1598 ; ZNVER1-NEXT:    vdivps (%rdi), %ymm0, %ymm0 # sched: [19:19.00]
   1599 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1600   %1 = fdiv <8 x float> %a0, %a1
   1601   %2 = load <8 x float>, <8 x float> *%a2, align 32
   1602   %3 = fdiv <8 x float> %1, %2
   1603   ret <8 x float> %3
   1604 }
   1605 
   1606 define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   1607 ; GENERIC-LABEL: test_dpps:
   1608 ; GENERIC:       # %bb.0:
   1609 ; GENERIC-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
   1610 ; GENERIC-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00]
   1611 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1612 ;
   1613 ; SANDY-LABEL: test_dpps:
   1614 ; SANDY:       # %bb.0:
   1615 ; SANDY-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
   1616 ; SANDY-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00]
   1617 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1618 ;
   1619 ; HASWELL-LABEL: test_dpps:
   1620 ; HASWELL:       # %bb.0:
   1621 ; HASWELL-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00]
   1622 ; HASWELL-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [21:2.00]
   1623 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1624 ;
   1625 ; BROADWELL-LABEL: test_dpps:
   1626 ; BROADWELL:       # %bb.0:
   1627 ; BROADWELL-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00]
   1628 ; BROADWELL-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:2.00]
   1629 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1630 ;
   1631 ; SKYLAKE-LABEL: test_dpps:
   1632 ; SKYLAKE:       # %bb.0:
   1633 ; SKYLAKE-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.50]
   1634 ; SKYLAKE-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.50]
   1635 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1636 ;
   1637 ; SKX-LABEL: test_dpps:
   1638 ; SKX:       # %bb.0:
   1639 ; SKX-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33]
   1640 ; SKX-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33]
   1641 ; SKX-NEXT:    retq # sched: [7:1.00]
   1642 ;
   1643 ; BTVER2-LABEL: test_dpps:
   1644 ; BTVER2:       # %bb.0:
   1645 ; BTVER2-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:6.00]
   1646 ; BTVER2-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [17:6.00]
   1647 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1648 ;
   1649 ; ZNVER1-LABEL: test_dpps:
   1650 ; ZNVER1:       # %bb.0:
   1651 ; ZNVER1-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
   1652 ; ZNVER1-NEXT:    vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
   1653 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1654   %1 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7)
   1655   %2 = load <8 x float>, <8 x float> *%a2, align 32
   1656   %3 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %1, <8 x float> %2, i8 7)
   1657   ret <8 x float> %3
   1658 }
   1659 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   1660 
   1661 define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x float> *%a2) {
   1662 ; GENERIC-LABEL: test_extractf128:
   1663 ; GENERIC:       # %bb.0:
   1664 ; GENERIC-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
   1665 ; GENERIC-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
   1666 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   1667 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1668 ;
   1669 ; SANDY-LABEL: test_extractf128:
   1670 ; SANDY:       # %bb.0:
   1671 ; SANDY-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
   1672 ; SANDY-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
   1673 ; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
   1674 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1675 ;
   1676 ; HASWELL-LABEL: test_extractf128:
   1677 ; HASWELL:       # %bb.0:
   1678 ; HASWELL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
   1679 ; HASWELL-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
   1680 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1681 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1682 ;
   1683 ; BROADWELL-LABEL: test_extractf128:
   1684 ; BROADWELL:       # %bb.0:
   1685 ; BROADWELL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
   1686 ; BROADWELL-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
   1687 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   1688 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1689 ;
   1690 ; SKYLAKE-LABEL: test_extractf128:
   1691 ; SKYLAKE:       # %bb.0:
   1692 ; SKYLAKE-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
   1693 ; SKYLAKE-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
   1694 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   1695 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1696 ;
   1697 ; SKX-LABEL: test_extractf128:
   1698 ; SKX:       # %bb.0:
   1699 ; SKX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
   1700 ; SKX-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
   1701 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   1702 ; SKX-NEXT:    retq # sched: [7:1.00]
   1703 ;
   1704 ; BTVER2-LABEL: test_extractf128:
   1705 ; BTVER2:       # %bb.0:
   1706 ; BTVER2-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50]
   1707 ; BTVER2-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
   1708 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1709 ;
   1710 ; ZNVER1-LABEL: test_extractf128:
   1711 ; ZNVER1:       # %bb.0:
   1712 ; ZNVER1-NEXT:    vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.33]
   1713 ; ZNVER1-NEXT:    vextractf128 $1, %ymm1, (%rdi) # sched: [8:0.50]
   1714 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   1715 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1716   %1 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1717   %2 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1718   store <4 x float> %2, <4 x float> *%a2
   1719   ret <4 x float> %1
   1720 }
   1721 
   1722 define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   1723 ; GENERIC-LABEL: test_haddpd:
   1724 ; GENERIC:       # %bb.0:
   1725 ; GENERIC-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1726 ; GENERIC-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1727 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1728 ;
   1729 ; SANDY-LABEL: test_haddpd:
   1730 ; SANDY:       # %bb.0:
   1731 ; SANDY-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1732 ; SANDY-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1733 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1734 ;
   1735 ; HASWELL-LABEL: test_haddpd:
   1736 ; HASWELL:       # %bb.0:
   1737 ; HASWELL-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1738 ; HASWELL-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1739 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1740 ;
   1741 ; BROADWELL-LABEL: test_haddpd:
   1742 ; BROADWELL:       # %bb.0:
   1743 ; BROADWELL-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1744 ; BROADWELL-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
   1745 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1746 ;
   1747 ; SKYLAKE-LABEL: test_haddpd:
   1748 ; SKYLAKE:       # %bb.0:
   1749 ; SKYLAKE-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
   1750 ; SKYLAKE-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
   1751 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1752 ;
   1753 ; SKX-LABEL: test_haddpd:
   1754 ; SKX:       # %bb.0:
   1755 ; SKX-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
   1756 ; SKX-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
   1757 ; SKX-NEXT:    retq # sched: [7:1.00]
   1758 ;
   1759 ; BTVER2-LABEL: test_haddpd:
   1760 ; BTVER2:       # %bb.0:
   1761 ; BTVER2-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   1762 ; BTVER2-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
   1763 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1764 ;
   1765 ; ZNVER1-LABEL: test_haddpd:
   1766 ; ZNVER1:       # %bb.0:
   1767 ; ZNVER1-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
   1768 ; ZNVER1-NEXT:    vhaddpd (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
   1769 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1770   %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
   1771   %2 = load <4 x double>, <4 x double> *%a2, align 32
   1772   %3 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %1, <4 x double> %2)
   1773   ret <4 x double> %3
   1774 }
   1775 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
   1776 
   1777 define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   1778 ; GENERIC-LABEL: test_haddps:
   1779 ; GENERIC:       # %bb.0:
   1780 ; GENERIC-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1781 ; GENERIC-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1782 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1783 ;
   1784 ; SANDY-LABEL: test_haddps:
   1785 ; SANDY:       # %bb.0:
   1786 ; SANDY-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1787 ; SANDY-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1788 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1789 ;
   1790 ; HASWELL-LABEL: test_haddps:
   1791 ; HASWELL:       # %bb.0:
   1792 ; HASWELL-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1793 ; HASWELL-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1794 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1795 ;
   1796 ; BROADWELL-LABEL: test_haddps:
   1797 ; BROADWELL:       # %bb.0:
   1798 ; BROADWELL-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1799 ; BROADWELL-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
   1800 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1801 ;
   1802 ; SKYLAKE-LABEL: test_haddps:
   1803 ; SKYLAKE:       # %bb.0:
   1804 ; SKYLAKE-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
   1805 ; SKYLAKE-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
   1806 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1807 ;
   1808 ; SKX-LABEL: test_haddps:
   1809 ; SKX:       # %bb.0:
   1810 ; SKX-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
   1811 ; SKX-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
   1812 ; SKX-NEXT:    retq # sched: [7:1.00]
   1813 ;
   1814 ; BTVER2-LABEL: test_haddps:
   1815 ; BTVER2:       # %bb.0:
   1816 ; BTVER2-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   1817 ; BTVER2-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
   1818 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1819 ;
   1820 ; ZNVER1-LABEL: test_haddps:
   1821 ; ZNVER1:       # %bb.0:
   1822 ; ZNVER1-NEXT:    vhaddps %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
   1823 ; ZNVER1-NEXT:    vhaddps (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
   1824 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1825   %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
   1826   %2 = load <8 x float>, <8 x float> *%a2, align 32
   1827   %3 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %1, <8 x float> %2)
   1828   ret <8 x float> %3
   1829 }
   1830 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
   1831 
   1832 define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   1833 ; GENERIC-LABEL: test_hsubpd:
   1834 ; GENERIC:       # %bb.0:
   1835 ; GENERIC-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1836 ; GENERIC-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1837 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1838 ;
   1839 ; SANDY-LABEL: test_hsubpd:
   1840 ; SANDY:       # %bb.0:
   1841 ; SANDY-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1842 ; SANDY-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1843 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1844 ;
   1845 ; HASWELL-LABEL: test_hsubpd:
   1846 ; HASWELL:       # %bb.0:
   1847 ; HASWELL-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1848 ; HASWELL-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1849 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1850 ;
   1851 ; BROADWELL-LABEL: test_hsubpd:
   1852 ; BROADWELL:       # %bb.0:
   1853 ; BROADWELL-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1854 ; BROADWELL-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
   1855 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1856 ;
   1857 ; SKYLAKE-LABEL: test_hsubpd:
   1858 ; SKYLAKE:       # %bb.0:
   1859 ; SKYLAKE-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
   1860 ; SKYLAKE-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
   1861 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1862 ;
   1863 ; SKX-LABEL: test_hsubpd:
   1864 ; SKX:       # %bb.0:
   1865 ; SKX-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
   1866 ; SKX-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
   1867 ; SKX-NEXT:    retq # sched: [7:1.00]
   1868 ;
   1869 ; BTVER2-LABEL: test_hsubpd:
   1870 ; BTVER2:       # %bb.0:
   1871 ; BTVER2-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   1872 ; BTVER2-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
   1873 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1874 ;
   1875 ; ZNVER1-LABEL: test_hsubpd:
   1876 ; ZNVER1:       # %bb.0:
   1877 ; ZNVER1-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
   1878 ; ZNVER1-NEXT:    vhsubpd (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
   1879 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1880   %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
   1881   %2 = load <4 x double>, <4 x double> *%a2, align 32
   1882   %3 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %1, <4 x double> %2)
   1883   ret <4 x double> %3
   1884 }
   1885 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
   1886 
   1887 define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   1888 ; GENERIC-LABEL: test_hsubps:
   1889 ; GENERIC:       # %bb.0:
   1890 ; GENERIC-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1891 ; GENERIC-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1892 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1893 ;
   1894 ; SANDY-LABEL: test_hsubps:
   1895 ; SANDY:       # %bb.0:
   1896 ; SANDY-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1897 ; SANDY-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1898 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1899 ;
   1900 ; HASWELL-LABEL: test_hsubps:
   1901 ; HASWELL:       # %bb.0:
   1902 ; HASWELL-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1903 ; HASWELL-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
   1904 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1905 ;
   1906 ; BROADWELL-LABEL: test_hsubps:
   1907 ; BROADWELL:       # %bb.0:
   1908 ; BROADWELL-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
   1909 ; BROADWELL-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
   1910 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1911 ;
   1912 ; SKYLAKE-LABEL: test_hsubps:
   1913 ; SKYLAKE:       # %bb.0:
   1914 ; SKYLAKE-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
   1915 ; SKYLAKE-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
   1916 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1917 ;
   1918 ; SKX-LABEL: test_hsubps:
   1919 ; SKX:       # %bb.0:
   1920 ; SKX-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
   1921 ; SKX-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
   1922 ; SKX-NEXT:    retq # sched: [7:1.00]
   1923 ;
   1924 ; BTVER2-LABEL: test_hsubps:
   1925 ; BTVER2:       # %bb.0:
   1926 ; BTVER2-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   1927 ; BTVER2-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
   1928 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1929 ;
   1930 ; ZNVER1-LABEL: test_hsubps:
   1931 ; ZNVER1:       # %bb.0:
   1932 ; ZNVER1-NEXT:    vhsubps %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
   1933 ; ZNVER1-NEXT:    vhsubps (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
   1934 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1935   %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
   1936   %2 = load <8 x float>, <8 x float> *%a2, align 32
   1937   %3 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %1, <8 x float> %2)
   1938   ret <8 x float> %3
   1939 }
   1940 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
   1941 
   1942 define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   1943 ; GENERIC-LABEL: test_insertf128:
   1944 ; GENERIC:       # %bb.0:
   1945 ; GENERIC-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
   1946 ; GENERIC-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
   1947 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   1948 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   1949 ;
   1950 ; SANDY-LABEL: test_insertf128:
   1951 ; SANDY:       # %bb.0:
   1952 ; SANDY-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
   1953 ; SANDY-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
   1954 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   1955 ; SANDY-NEXT:    retq # sched: [1:1.00]
   1956 ;
   1957 ; HASWELL-LABEL: test_insertf128:
   1958 ; HASWELL:       # %bb.0:
   1959 ; HASWELL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
   1960 ; HASWELL-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
   1961 ; HASWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   1962 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   1963 ;
   1964 ; BROADWELL-LABEL: test_insertf128:
   1965 ; BROADWELL:       # %bb.0:
   1966 ; BROADWELL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
   1967 ; BROADWELL-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
   1968 ; BROADWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   1969 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   1970 ;
   1971 ; SKYLAKE-LABEL: test_insertf128:
   1972 ; SKYLAKE:       # %bb.0:
   1973 ; SKYLAKE-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
   1974 ; SKYLAKE-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
   1975 ; SKYLAKE-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   1976 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   1977 ;
   1978 ; SKX-LABEL: test_insertf128:
   1979 ; SKX:       # %bb.0:
   1980 ; SKX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
   1981 ; SKX-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
   1982 ; SKX-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   1983 ; SKX-NEXT:    retq # sched: [7:1.00]
   1984 ;
   1985 ; BTVER2-LABEL: test_insertf128:
   1986 ; BTVER2:       # %bb.0:
   1987 ; BTVER2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:0.50]
   1988 ; BTVER2-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
   1989 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
   1990 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   1991 ;
   1992 ; ZNVER1-LABEL: test_insertf128:
   1993 ; ZNVER1:       # %bb.0:
   1994 ; ZNVER1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.67]
   1995 ; ZNVER1-NEXT:    vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
   1996 ; ZNVER1-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   1997 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   1998   %1 = shufflevector <4 x float> %a1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   1999   %2 = shufflevector <8 x float> %a0, <8 x float> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   2000   %3 = load <4 x float>, <4 x float> *%a2, align 16
   2001   %4 = shufflevector <4 x float> %3, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   2002   %5 = shufflevector <8 x float> %a0, <8 x float> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   2003   %6 = fadd <8 x float> %2, %5
   2004   ret <8 x float> %6
   2005 }
   2006 
   2007 define <32 x i8> @test_lddqu(i8* %a0) {
   2008 ; GENERIC-LABEL: test_lddqu:
   2009 ; GENERIC:       # %bb.0:
   2010 ; GENERIC-NEXT:    vlddqu (%rdi), %ymm0 # sched: [7:0.50]
   2011 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2012 ;
   2013 ; SANDY-LABEL: test_lddqu:
   2014 ; SANDY:       # %bb.0:
   2015 ; SANDY-NEXT:    vlddqu (%rdi), %ymm0 # sched: [7:0.50]
   2016 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2017 ;
   2018 ; HASWELL-LABEL: test_lddqu:
   2019 ; HASWELL:       # %bb.0:
   2020 ; HASWELL-NEXT:    vlddqu (%rdi), %ymm0 # sched: [7:0.50]
   2021 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2022 ;
   2023 ; BROADWELL-LABEL: test_lddqu:
   2024 ; BROADWELL:       # %bb.0:
   2025 ; BROADWELL-NEXT:    vlddqu (%rdi), %ymm0 # sched: [6:0.50]
   2026 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2027 ;
   2028 ; SKYLAKE-LABEL: test_lddqu:
   2029 ; SKYLAKE:       # %bb.0:
   2030 ; SKYLAKE-NEXT:    vlddqu (%rdi), %ymm0 # sched: [7:0.50]
   2031 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2032 ;
   2033 ; SKX-LABEL: test_lddqu:
   2034 ; SKX:       # %bb.0:
   2035 ; SKX-NEXT:    vlddqu (%rdi), %ymm0 # sched: [7:0.50]
   2036 ; SKX-NEXT:    retq # sched: [7:1.00]
   2037 ;
   2038 ; BTVER2-LABEL: test_lddqu:
   2039 ; BTVER2:       # %bb.0:
   2040 ; BTVER2-NEXT:    vlddqu (%rdi), %ymm0 # sched: [5:1.00]
   2041 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2042 ;
   2043 ; ZNVER1-LABEL: test_lddqu:
   2044 ; ZNVER1:       # %bb.0:
   2045 ; ZNVER1-NEXT:    vlddqu (%rdi), %ymm0 # sched: [8:0.50]
   2046 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2047   %1 = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0)
   2048   ret <32 x i8> %1
   2049 }
   2050 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
   2051 
   2052 define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) {
   2053 ; GENERIC-LABEL: test_maskmovpd:
   2054 ; GENERIC:       # %bb.0:
   2055 ; GENERIC-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
   2056 ; GENERIC-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
   2057 ; GENERIC-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
   2058 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2059 ;
   2060 ; SANDY-LABEL: test_maskmovpd:
   2061 ; SANDY:       # %bb.0:
   2062 ; SANDY-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
   2063 ; SANDY-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
   2064 ; SANDY-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
   2065 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2066 ;
   2067 ; HASWELL-LABEL: test_maskmovpd:
   2068 ; HASWELL:       # %bb.0:
   2069 ; HASWELL-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
   2070 ; HASWELL-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
   2071 ; HASWELL-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
   2072 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2073 ;
   2074 ; BROADWELL-LABEL: test_maskmovpd:
   2075 ; BROADWELL:       # %bb.0:
   2076 ; BROADWELL-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
   2077 ; BROADWELL-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
   2078 ; BROADWELL-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:1.00]
   2079 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2080 ;
   2081 ; SKYLAKE-LABEL: test_maskmovpd:
   2082 ; SKYLAKE:       # %bb.0:
   2083 ; SKYLAKE-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
   2084 ; SKYLAKE-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
   2085 ; SKYLAKE-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:0.33]
   2086 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2087 ;
   2088 ; SKX-LABEL: test_maskmovpd:
   2089 ; SKX:       # %bb.0:
   2090 ; SKX-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
   2091 ; SKX-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
   2092 ; SKX-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:0.33]
   2093 ; SKX-NEXT:    retq # sched: [7:1.00]
   2094 ;
   2095 ; BTVER2-LABEL: test_maskmovpd:
   2096 ; BTVER2:       # %bb.0:
   2097 ; BTVER2-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
   2098 ; BTVER2-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
   2099 ; BTVER2-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:0.50]
   2100 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2101 ;
   2102 ; ZNVER1-LABEL: test_maskmovpd:
   2103 ; ZNVER1:       # %bb.0:
   2104 ; ZNVER1-NEXT:    vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:0.50]
   2105 ; ZNVER1-NEXT:    vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:0.50]
   2106 ; ZNVER1-NEXT:    vmovapd %xmm2, %xmm0 # sched: [1:0.25]
   2107 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2108   %1 = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %a1)
   2109   call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %a1, <2 x double> %a2)
   2110   ret <2 x double> %1
   2111 }
   2112 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
   2113 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
   2114 
   2115 define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) {
   2116 ; GENERIC-LABEL: test_maskmovpd_ymm:
   2117 ; GENERIC:       # %bb.0:
   2118 ; GENERIC-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
   2119 ; GENERIC-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2120 ; GENERIC-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
   2121 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2122 ;
   2123 ; SANDY-LABEL: test_maskmovpd_ymm:
   2124 ; SANDY:       # %bb.0:
   2125 ; SANDY-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
   2126 ; SANDY-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2127 ; SANDY-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
   2128 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2129 ;
   2130 ; HASWELL-LABEL: test_maskmovpd_ymm:
   2131 ; HASWELL:       # %bb.0:
   2132 ; HASWELL-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
   2133 ; HASWELL-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2134 ; HASWELL-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
   2135 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2136 ;
   2137 ; BROADWELL-LABEL: test_maskmovpd_ymm:
   2138 ; BROADWELL:       # %bb.0:
   2139 ; BROADWELL-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
   2140 ; BROADWELL-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2141 ; BROADWELL-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
   2142 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2143 ;
   2144 ; SKYLAKE-LABEL: test_maskmovpd_ymm:
   2145 ; SKYLAKE:       # %bb.0:
   2146 ; SKYLAKE-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
   2147 ; SKYLAKE-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
   2148 ; SKYLAKE-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:0.33]
   2149 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2150 ;
   2151 ; SKX-LABEL: test_maskmovpd_ymm:
   2152 ; SKX:       # %bb.0:
   2153 ; SKX-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
   2154 ; SKX-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
   2155 ; SKX-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:0.33]
   2156 ; SKX-NEXT:    retq # sched: [7:1.00]
   2157 ;
   2158 ; BTVER2-LABEL: test_maskmovpd_ymm:
   2159 ; BTVER2:       # %bb.0:
   2160 ; BTVER2-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
   2161 ; BTVER2-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
   2162 ; BTVER2-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:1.00]
   2163 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2164 ;
   2165 ; ZNVER1-LABEL: test_maskmovpd_ymm:
   2166 ; ZNVER1:       # %bb.0:
   2167 ; ZNVER1-NEXT:    vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:1.00]
   2168 ; ZNVER1-NEXT:    vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2169 ; ZNVER1-NEXT:    vmovapd %ymm2, %ymm0 # sched: [1:0.25]
   2170 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2171   %1 = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %a1)
   2172   call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %a1, <4 x double> %a2)
   2173   ret <4 x double> %1
   2174 }
   2175 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
   2176 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
   2177 
   2178 define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) {
   2179 ; GENERIC-LABEL: test_maskmovps:
   2180 ; GENERIC:       # %bb.0:
   2181 ; GENERIC-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
   2182 ; GENERIC-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
   2183 ; GENERIC-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
   2184 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2185 ;
   2186 ; SANDY-LABEL: test_maskmovps:
   2187 ; SANDY:       # %bb.0:
   2188 ; SANDY-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
   2189 ; SANDY-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
   2190 ; SANDY-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
   2191 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2192 ;
   2193 ; HASWELL-LABEL: test_maskmovps:
   2194 ; HASWELL:       # %bb.0:
   2195 ; HASWELL-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
   2196 ; HASWELL-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
   2197 ; HASWELL-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
   2198 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2199 ;
   2200 ; BROADWELL-LABEL: test_maskmovps:
   2201 ; BROADWELL:       # %bb.0:
   2202 ; BROADWELL-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
   2203 ; BROADWELL-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
   2204 ; BROADWELL-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:1.00]
   2205 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2206 ;
   2207 ; SKYLAKE-LABEL: test_maskmovps:
   2208 ; SKYLAKE:       # %bb.0:
   2209 ; SKYLAKE-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
   2210 ; SKYLAKE-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
   2211 ; SKYLAKE-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:0.33]
   2212 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2213 ;
   2214 ; SKX-LABEL: test_maskmovps:
   2215 ; SKX:       # %bb.0:
   2216 ; SKX-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
   2217 ; SKX-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
   2218 ; SKX-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:0.33]
   2219 ; SKX-NEXT:    retq # sched: [7:1.00]
   2220 ;
   2221 ; BTVER2-LABEL: test_maskmovps:
   2222 ; BTVER2:       # %bb.0:
   2223 ; BTVER2-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
   2224 ; BTVER2-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
   2225 ; BTVER2-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:0.50]
   2226 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2227 ;
   2228 ; ZNVER1-LABEL: test_maskmovps:
   2229 ; ZNVER1:       # %bb.0:
   2230 ; ZNVER1-NEXT:    vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:0.50]
   2231 ; ZNVER1-NEXT:    vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:0.50]
   2232 ; ZNVER1-NEXT:    vmovaps %xmm2, %xmm0 # sched: [1:0.25]
   2233 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2234   %1 = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %a1)
   2235   call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %a1, <4 x float> %a2)
   2236   ret <4 x float> %1
   2237 }
   2238 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
   2239 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
   2240 
   2241 define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) {
   2242 ; GENERIC-LABEL: test_maskmovps_ymm:
   2243 ; GENERIC:       # %bb.0:
   2244 ; GENERIC-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
   2245 ; GENERIC-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2246 ; GENERIC-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
   2247 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2248 ;
   2249 ; SANDY-LABEL: test_maskmovps_ymm:
   2250 ; SANDY:       # %bb.0:
   2251 ; SANDY-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
   2252 ; SANDY-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2253 ; SANDY-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
   2254 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2255 ;
   2256 ; HASWELL-LABEL: test_maskmovps_ymm:
   2257 ; HASWELL:       # %bb.0:
   2258 ; HASWELL-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
   2259 ; HASWELL-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2260 ; HASWELL-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
   2261 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2262 ;
   2263 ; BROADWELL-LABEL: test_maskmovps_ymm:
   2264 ; BROADWELL:       # %bb.0:
   2265 ; BROADWELL-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
   2266 ; BROADWELL-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2267 ; BROADWELL-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
   2268 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2269 ;
   2270 ; SKYLAKE-LABEL: test_maskmovps_ymm:
   2271 ; SKYLAKE:       # %bb.0:
   2272 ; SKYLAKE-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
   2273 ; SKYLAKE-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
   2274 ; SKYLAKE-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:0.33]
   2275 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2276 ;
   2277 ; SKX-LABEL: test_maskmovps_ymm:
   2278 ; SKX:       # %bb.0:
   2279 ; SKX-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
   2280 ; SKX-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
   2281 ; SKX-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:0.33]
   2282 ; SKX-NEXT:    retq # sched: [7:1.00]
   2283 ;
   2284 ; BTVER2-LABEL: test_maskmovps_ymm:
   2285 ; BTVER2:       # %bb.0:
   2286 ; BTVER2-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
   2287 ; BTVER2-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
   2288 ; BTVER2-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:1.00]
   2289 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2290 ;
   2291 ; ZNVER1-LABEL: test_maskmovps_ymm:
   2292 ; ZNVER1:       # %bb.0:
   2293 ; ZNVER1-NEXT:    vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:1.00]
   2294 ; ZNVER1-NEXT:    vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
   2295 ; ZNVER1-NEXT:    vmovaps %ymm2, %ymm0 # sched: [1:0.25]
   2296 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2297   %1 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %a1)
   2298   call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %a1, <8 x float> %a2)
   2299   ret <8 x float> %1
   2300 }
   2301 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
   2302 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
   2303 
   2304 define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   2305 ; GENERIC-LABEL: test_maxpd:
   2306 ; GENERIC:       # %bb.0:
   2307 ; GENERIC-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2308 ; GENERIC-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2309 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2310 ;
   2311 ; SANDY-LABEL: test_maxpd:
   2312 ; SANDY:       # %bb.0:
   2313 ; SANDY-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2314 ; SANDY-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2315 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2316 ;
   2317 ; HASWELL-LABEL: test_maxpd:
   2318 ; HASWELL:       # %bb.0:
   2319 ; HASWELL-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2320 ; HASWELL-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2321 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2322 ;
   2323 ; BROADWELL-LABEL: test_maxpd:
   2324 ; BROADWELL:       # %bb.0:
   2325 ; BROADWELL-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2326 ; BROADWELL-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
   2327 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2328 ;
   2329 ; SKYLAKE-LABEL: test_maxpd:
   2330 ; SKYLAKE:       # %bb.0:
   2331 ; SKYLAKE-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2332 ; SKYLAKE-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   2333 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2334 ;
   2335 ; SKX-LABEL: test_maxpd:
   2336 ; SKX:       # %bb.0:
   2337 ; SKX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2338 ; SKX-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   2339 ; SKX-NEXT:    retq # sched: [7:1.00]
   2340 ;
   2341 ; BTVER2-LABEL: test_maxpd:
   2342 ; BTVER2:       # %bb.0:
   2343 ; BTVER2-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
   2344 ; BTVER2-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
   2345 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2346 ;
   2347 ; ZNVER1-LABEL: test_maxpd:
   2348 ; ZNVER1:       # %bb.0:
   2349 ; ZNVER1-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2350 ; ZNVER1-NEXT:    vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2351 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2352   %1 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
   2353   %2 = load <4 x double>, <4 x double> *%a2, align 32
   2354   %3 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %1, <4 x double> %2)
   2355   ret <4 x double> %3
   2356 }
   2357 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2358 
   2359 define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   2360 ; GENERIC-LABEL: test_maxps:
   2361 ; GENERIC:       # %bb.0:
   2362 ; GENERIC-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2363 ; GENERIC-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2364 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2365 ;
   2366 ; SANDY-LABEL: test_maxps:
   2367 ; SANDY:       # %bb.0:
   2368 ; SANDY-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2369 ; SANDY-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2370 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2371 ;
   2372 ; HASWELL-LABEL: test_maxps:
   2373 ; HASWELL:       # %bb.0:
   2374 ; HASWELL-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2375 ; HASWELL-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2376 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2377 ;
   2378 ; BROADWELL-LABEL: test_maxps:
   2379 ; BROADWELL:       # %bb.0:
   2380 ; BROADWELL-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2381 ; BROADWELL-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
   2382 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2383 ;
   2384 ; SKYLAKE-LABEL: test_maxps:
   2385 ; SKYLAKE:       # %bb.0:
   2386 ; SKYLAKE-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2387 ; SKYLAKE-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   2388 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2389 ;
   2390 ; SKX-LABEL: test_maxps:
   2391 ; SKX:       # %bb.0:
   2392 ; SKX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2393 ; SKX-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   2394 ; SKX-NEXT:    retq # sched: [7:1.00]
   2395 ;
   2396 ; BTVER2-LABEL: test_maxps:
   2397 ; BTVER2:       # %bb.0:
   2398 ; BTVER2-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
   2399 ; BTVER2-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
   2400 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2401 ;
   2402 ; ZNVER1-LABEL: test_maxps:
   2403 ; ZNVER1:       # %bb.0:
   2404 ; ZNVER1-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2405 ; ZNVER1-NEXT:    vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2406 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2407   %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
   2408   %2 = load <8 x float>, <8 x float> *%a2, align 32
   2409   %3 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %1, <8 x float> %2)
   2410   ret <8 x float> %3
   2411 }
   2412 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2413 
   2414 define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   2415 ; GENERIC-LABEL: test_minpd:
   2416 ; GENERIC:       # %bb.0:
   2417 ; GENERIC-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2418 ; GENERIC-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2419 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2420 ;
   2421 ; SANDY-LABEL: test_minpd:
   2422 ; SANDY:       # %bb.0:
   2423 ; SANDY-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2424 ; SANDY-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2425 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2426 ;
   2427 ; HASWELL-LABEL: test_minpd:
   2428 ; HASWELL:       # %bb.0:
   2429 ; HASWELL-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2430 ; HASWELL-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2431 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2432 ;
   2433 ; BROADWELL-LABEL: test_minpd:
   2434 ; BROADWELL:       # %bb.0:
   2435 ; BROADWELL-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2436 ; BROADWELL-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
   2437 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2438 ;
   2439 ; SKYLAKE-LABEL: test_minpd:
   2440 ; SKYLAKE:       # %bb.0:
   2441 ; SKYLAKE-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2442 ; SKYLAKE-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   2443 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2444 ;
   2445 ; SKX-LABEL: test_minpd:
   2446 ; SKX:       # %bb.0:
   2447 ; SKX-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2448 ; SKX-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   2449 ; SKX-NEXT:    retq # sched: [7:1.00]
   2450 ;
   2451 ; BTVER2-LABEL: test_minpd:
   2452 ; BTVER2:       # %bb.0:
   2453 ; BTVER2-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
   2454 ; BTVER2-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
   2455 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2456 ;
   2457 ; ZNVER1-LABEL: test_minpd:
   2458 ; ZNVER1:       # %bb.0:
   2459 ; ZNVER1-NEXT:    vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2460 ; ZNVER1-NEXT:    vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2461 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2462   %1 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1)
   2463   %2 = load <4 x double>, <4 x double> *%a2, align 32
   2464   %3 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %1, <4 x double> %2)
   2465   ret <4 x double> %3
   2466 }
   2467 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
   2468 
   2469 define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   2470 ; GENERIC-LABEL: test_minps:
   2471 ; GENERIC:       # %bb.0:
   2472 ; GENERIC-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2473 ; GENERIC-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2474 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2475 ;
   2476 ; SANDY-LABEL: test_minps:
   2477 ; SANDY:       # %bb.0:
   2478 ; SANDY-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2479 ; SANDY-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2480 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2481 ;
   2482 ; HASWELL-LABEL: test_minps:
   2483 ; HASWELL:       # %bb.0:
   2484 ; HASWELL-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2485 ; HASWELL-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2486 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2487 ;
   2488 ; BROADWELL-LABEL: test_minps:
   2489 ; BROADWELL:       # %bb.0:
   2490 ; BROADWELL-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2491 ; BROADWELL-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
   2492 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2493 ;
   2494 ; SKYLAKE-LABEL: test_minps:
   2495 ; SKYLAKE:       # %bb.0:
   2496 ; SKYLAKE-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2497 ; SKYLAKE-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   2498 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2499 ;
   2500 ; SKX-LABEL: test_minps:
   2501 ; SKX:       # %bb.0:
   2502 ; SKX-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2503 ; SKX-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   2504 ; SKX-NEXT:    retq # sched: [7:1.00]
   2505 ;
   2506 ; BTVER2-LABEL: test_minps:
   2507 ; BTVER2:       # %bb.0:
   2508 ; BTVER2-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
   2509 ; BTVER2-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
   2510 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2511 ;
   2512 ; ZNVER1-LABEL: test_minps:
   2513 ; ZNVER1:       # %bb.0:
   2514 ; ZNVER1-NEXT:    vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2515 ; ZNVER1-NEXT:    vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   2516 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2517   %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
   2518   %2 = load <8 x float>, <8 x float> *%a2, align 32
   2519   %3 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %1, <8 x float> %2)
   2520   ret <8 x float> %3
   2521 }
   2522 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
   2523 
   2524 define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
   2525 ; GENERIC-LABEL: test_movapd:
   2526 ; GENERIC:       # %bb.0:
   2527 ; GENERIC-NEXT:    vmovapd (%rdi), %ymm0 # sched: [7:0.50]
   2528 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2529 ; GENERIC-NEXT:    vmovapd %ymm0, (%rsi) # sched: [1:1.00]
   2530 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2531 ;
   2532 ; SANDY-LABEL: test_movapd:
   2533 ; SANDY:       # %bb.0:
   2534 ; SANDY-NEXT:    vmovapd (%rdi), %ymm0 # sched: [7:0.50]
   2535 ; SANDY-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2536 ; SANDY-NEXT:    vmovapd %ymm0, (%rsi) # sched: [1:1.00]
   2537 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2538 ;
   2539 ; HASWELL-LABEL: test_movapd:
   2540 ; HASWELL:       # %bb.0:
   2541 ; HASWELL-NEXT:    vmovapd (%rdi), %ymm0 # sched: [7:0.50]
   2542 ; HASWELL-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2543 ; HASWELL-NEXT:    vmovapd %ymm0, (%rsi) # sched: [1:1.00]
   2544 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2545 ;
   2546 ; BROADWELL-LABEL: test_movapd:
   2547 ; BROADWELL:       # %bb.0:
   2548 ; BROADWELL-NEXT:    vmovapd (%rdi), %ymm0 # sched: [6:0.50]
   2549 ; BROADWELL-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2550 ; BROADWELL-NEXT:    vmovapd %ymm0, (%rsi) # sched: [1:1.00]
   2551 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2552 ;
   2553 ; SKYLAKE-LABEL: test_movapd:
   2554 ; SKYLAKE:       # %bb.0:
   2555 ; SKYLAKE-NEXT:    vmovapd (%rdi), %ymm0 # sched: [7:0.50]
   2556 ; SKYLAKE-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   2557 ; SKYLAKE-NEXT:    vmovapd %ymm0, (%rsi) # sched: [1:1.00]
   2558 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2559 ;
   2560 ; SKX-LABEL: test_movapd:
   2561 ; SKX:       # %bb.0:
   2562 ; SKX-NEXT:    vmovapd (%rdi), %ymm0 # sched: [7:0.50]
   2563 ; SKX-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   2564 ; SKX-NEXT:    vmovapd %ymm0, (%rsi) # sched: [1:1.00]
   2565 ; SKX-NEXT:    retq # sched: [7:1.00]
   2566 ;
   2567 ; BTVER2-LABEL: test_movapd:
   2568 ; BTVER2:       # %bb.0:
   2569 ; BTVER2-NEXT:    vmovapd (%rdi), %ymm0 # sched: [5:1.00]
   2570 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
   2571 ; BTVER2-NEXT:    vmovapd %ymm0, (%rsi) # sched: [1:1.00]
   2572 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2573 ;
   2574 ; ZNVER1-LABEL: test_movapd:
   2575 ; ZNVER1:       # %bb.0:
   2576 ; ZNVER1-NEXT:    vmovapd (%rdi), %ymm0 # sched: [8:0.50]
   2577 ; ZNVER1-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2578 ; ZNVER1-NEXT:    vmovapd %ymm0, (%rsi) # sched: [1:0.50]
   2579 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2580   %1 = load <4 x double>, <4 x double> *%a0, align 32
   2581   %2 = fadd <4 x double> %1, %1
   2582   store <4 x double> %2, <4 x double> *%a1, align 32
   2583   ret <4 x double> %2
   2584 }
   2585 
   2586 define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) {
   2587 ; GENERIC-LABEL: test_movaps:
   2588 ; GENERIC:       # %bb.0:
   2589 ; GENERIC-NEXT:    vmovaps (%rdi), %ymm0 # sched: [7:0.50]
   2590 ; GENERIC-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2591 ; GENERIC-NEXT:    vmovaps %ymm0, (%rsi) # sched: [1:1.00]
   2592 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2593 ;
   2594 ; SANDY-LABEL: test_movaps:
   2595 ; SANDY:       # %bb.0:
   2596 ; SANDY-NEXT:    vmovaps (%rdi), %ymm0 # sched: [7:0.50]
   2597 ; SANDY-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2598 ; SANDY-NEXT:    vmovaps %ymm0, (%rsi) # sched: [1:1.00]
   2599 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2600 ;
   2601 ; HASWELL-LABEL: test_movaps:
   2602 ; HASWELL:       # %bb.0:
   2603 ; HASWELL-NEXT:    vmovaps (%rdi), %ymm0 # sched: [7:0.50]
   2604 ; HASWELL-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2605 ; HASWELL-NEXT:    vmovaps %ymm0, (%rsi) # sched: [1:1.00]
   2606 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2607 ;
   2608 ; BROADWELL-LABEL: test_movaps:
   2609 ; BROADWELL:       # %bb.0:
   2610 ; BROADWELL-NEXT:    vmovaps (%rdi), %ymm0 # sched: [6:0.50]
   2611 ; BROADWELL-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2612 ; BROADWELL-NEXT:    vmovaps %ymm0, (%rsi) # sched: [1:1.00]
   2613 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2614 ;
   2615 ; SKYLAKE-LABEL: test_movaps:
   2616 ; SKYLAKE:       # %bb.0:
   2617 ; SKYLAKE-NEXT:    vmovaps (%rdi), %ymm0 # sched: [7:0.50]
   2618 ; SKYLAKE-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   2619 ; SKYLAKE-NEXT:    vmovaps %ymm0, (%rsi) # sched: [1:1.00]
   2620 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2621 ;
   2622 ; SKX-LABEL: test_movaps:
   2623 ; SKX:       # %bb.0:
   2624 ; SKX-NEXT:    vmovaps (%rdi), %ymm0 # sched: [7:0.50]
   2625 ; SKX-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   2626 ; SKX-NEXT:    vmovaps %ymm0, (%rsi) # sched: [1:1.00]
   2627 ; SKX-NEXT:    retq # sched: [7:1.00]
   2628 ;
   2629 ; BTVER2-LABEL: test_movaps:
   2630 ; BTVER2:       # %bb.0:
   2631 ; BTVER2-NEXT:    vmovaps (%rdi), %ymm0 # sched: [5:1.00]
   2632 ; BTVER2-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
   2633 ; BTVER2-NEXT:    vmovaps %ymm0, (%rsi) # sched: [1:1.00]
   2634 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2635 ;
   2636 ; ZNVER1-LABEL: test_movaps:
   2637 ; ZNVER1:       # %bb.0:
   2638 ; ZNVER1-NEXT:    vmovaps (%rdi), %ymm0 # sched: [8:0.50]
   2639 ; ZNVER1-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2640 ; ZNVER1-NEXT:    vmovaps %ymm0, (%rsi) # sched: [1:0.50]
   2641 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2642   %1 = load <8 x float>, <8 x float> *%a0, align 32
   2643   %2 = fadd <8 x float> %1, %1
   2644   store <8 x float> %2, <8 x float> *%a1, align 32
   2645   ret <8 x float> %2
   2646 }
   2647 
   2648 define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) {
   2649 ; GENERIC-LABEL: test_movddup:
   2650 ; GENERIC:       # %bb.0:
   2651 ; GENERIC-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
   2652 ; GENERIC-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
   2653 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2654 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2655 ;
   2656 ; SANDY-LABEL: test_movddup:
   2657 ; SANDY:       # %bb.0:
   2658 ; SANDY-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
   2659 ; SANDY-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
   2660 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2661 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2662 ;
   2663 ; HASWELL-LABEL: test_movddup:
   2664 ; HASWELL:       # %bb.0:
   2665 ; HASWELL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
   2666 ; HASWELL-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
   2667 ; HASWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2668 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2669 ;
   2670 ; BROADWELL-LABEL: test_movddup:
   2671 ; BROADWELL:       # %bb.0:
   2672 ; BROADWELL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
   2673 ; BROADWELL-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:0.50]
   2674 ; BROADWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2675 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2676 ;
   2677 ; SKYLAKE-LABEL: test_movddup:
   2678 ; SKYLAKE:       # %bb.0:
   2679 ; SKYLAKE-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
   2680 ; SKYLAKE-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
   2681 ; SKYLAKE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2682 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2683 ;
   2684 ; SKX-LABEL: test_movddup:
   2685 ; SKX:       # %bb.0:
   2686 ; SKX-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
   2687 ; SKX-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
   2688 ; SKX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   2689 ; SKX-NEXT:    retq # sched: [7:1.00]
   2690 ;
   2691 ; BTVER2-LABEL: test_movddup:
   2692 ; BTVER2:       # %bb.0:
   2693 ; BTVER2-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:2.00]
   2694 ; BTVER2-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
   2695 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   2696 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2697 ;
   2698 ; ZNVER1-LABEL: test_movddup:
   2699 ; ZNVER1:       # %bb.0:
   2700 ; ZNVER1-NEXT:    vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [8:0.50]
   2701 ; ZNVER1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:0.50]
   2702 ; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2703 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2704   %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
   2705   %2 = load <4 x double>, <4 x double> *%a1, align 32
   2706   %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
   2707   %4 = fadd <4 x double> %1, %3
   2708   ret <4 x double> %4
   2709 }
   2710 
   2711 define i32 @test_movmskpd(<4 x double> %a0) {
   2712 ; GENERIC-LABEL: test_movmskpd:
   2713 ; GENERIC:       # %bb.0:
   2714 ; GENERIC-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
   2715 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   2716 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2717 ;
   2718 ; SANDY-LABEL: test_movmskpd:
   2719 ; SANDY:       # %bb.0:
   2720 ; SANDY-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
   2721 ; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
   2722 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2723 ;
   2724 ; HASWELL-LABEL: test_movmskpd:
   2725 ; HASWELL:       # %bb.0:
   2726 ; HASWELL-NEXT:    vmovmskpd %ymm0, %eax # sched: [3:1.00]
   2727 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2728 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2729 ;
   2730 ; BROADWELL-LABEL: test_movmskpd:
   2731 ; BROADWELL:       # %bb.0:
   2732 ; BROADWELL-NEXT:    vmovmskpd %ymm0, %eax # sched: [3:1.00]
   2733 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2734 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2735 ;
   2736 ; SKYLAKE-LABEL: test_movmskpd:
   2737 ; SKYLAKE:       # %bb.0:
   2738 ; SKYLAKE-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
   2739 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   2740 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2741 ;
   2742 ; SKX-LABEL: test_movmskpd:
   2743 ; SKX:       # %bb.0:
   2744 ; SKX-NEXT:    vmovmskpd %ymm0, %eax # sched: [2:1.00]
   2745 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   2746 ; SKX-NEXT:    retq # sched: [7:1.00]
   2747 ;
   2748 ; BTVER2-LABEL: test_movmskpd:
   2749 ; BTVER2:       # %bb.0:
   2750 ; BTVER2-NEXT:    vmovmskpd %ymm0, %eax # sched: [3:1.00]
   2751 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2752 ;
   2753 ; ZNVER1-LABEL: test_movmskpd:
   2754 ; ZNVER1:       # %bb.0:
   2755 ; ZNVER1-NEXT:    vmovmskpd %ymm0, %eax # sched: [1:1.00]
   2756 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   2757 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2758   %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
   2759   ret i32 %1
   2760 }
   2761 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
   2762 
   2763 define i32 @test_movmskps(<8 x float> %a0) {
   2764 ; GENERIC-LABEL: test_movmskps:
   2765 ; GENERIC:       # %bb.0:
   2766 ; GENERIC-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
   2767 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   2768 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2769 ;
   2770 ; SANDY-LABEL: test_movmskps:
   2771 ; SANDY:       # %bb.0:
   2772 ; SANDY-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
   2773 ; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
   2774 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2775 ;
   2776 ; HASWELL-LABEL: test_movmskps:
   2777 ; HASWELL:       # %bb.0:
   2778 ; HASWELL-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
   2779 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2780 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2781 ;
   2782 ; BROADWELL-LABEL: test_movmskps:
   2783 ; BROADWELL:       # %bb.0:
   2784 ; BROADWELL-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
   2785 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2786 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2787 ;
   2788 ; SKYLAKE-LABEL: test_movmskps:
   2789 ; SKYLAKE:       # %bb.0:
   2790 ; SKYLAKE-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
   2791 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   2792 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2793 ;
   2794 ; SKX-LABEL: test_movmskps:
   2795 ; SKX:       # %bb.0:
   2796 ; SKX-NEXT:    vmovmskps %ymm0, %eax # sched: [2:1.00]
   2797 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   2798 ; SKX-NEXT:    retq # sched: [7:1.00]
   2799 ;
   2800 ; BTVER2-LABEL: test_movmskps:
   2801 ; BTVER2:       # %bb.0:
   2802 ; BTVER2-NEXT:    vmovmskps %ymm0, %eax # sched: [3:1.00]
   2803 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2804 ;
   2805 ; ZNVER1-LABEL: test_movmskps:
   2806 ; ZNVER1:       # %bb.0:
   2807 ; ZNVER1-NEXT:    vmovmskps %ymm0, %eax # sched: [1:1.00]
   2808 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   2809 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2810   %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
   2811   ret i32 %1
   2812 }
   2813 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
   2814 
   2815 define void @test_movntdq(<4 x i64> %a0, <4 x i64> *%a1) {
   2816 ; GENERIC-LABEL: test_movntdq:
   2817 ; GENERIC:       # %bb.0:
   2818 ; GENERIC-NEXT:    #APP
   2819 ; GENERIC-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
   2820 ; GENERIC-NEXT:    #NO_APP
   2821 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   2822 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2823 ;
   2824 ; SANDY-LABEL: test_movntdq:
   2825 ; SANDY:       # %bb.0:
   2826 ; SANDY-NEXT:    #APP
   2827 ; SANDY-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
   2828 ; SANDY-NEXT:    #NO_APP
   2829 ; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
   2830 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2831 ;
   2832 ; HASWELL-LABEL: test_movntdq:
   2833 ; HASWELL:       # %bb.0:
   2834 ; HASWELL-NEXT:    #APP
   2835 ; HASWELL-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
   2836 ; HASWELL-NEXT:    #NO_APP
   2837 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2838 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2839 ;
   2840 ; BROADWELL-LABEL: test_movntdq:
   2841 ; BROADWELL:       # %bb.0:
   2842 ; BROADWELL-NEXT:    #APP
   2843 ; BROADWELL-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
   2844 ; BROADWELL-NEXT:    #NO_APP
   2845 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   2846 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2847 ;
   2848 ; SKYLAKE-LABEL: test_movntdq:
   2849 ; SKYLAKE:       # %bb.0:
   2850 ; SKYLAKE-NEXT:    #APP
   2851 ; SKYLAKE-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
   2852 ; SKYLAKE-NEXT:    #NO_APP
   2853 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   2854 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2855 ;
   2856 ; SKX-LABEL: test_movntdq:
   2857 ; SKX:       # %bb.0:
   2858 ; SKX-NEXT:    #APP
   2859 ; SKX-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
   2860 ; SKX-NEXT:    #NO_APP
   2861 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   2862 ; SKX-NEXT:    retq # sched: [7:1.00]
   2863 ;
   2864 ; BTVER2-LABEL: test_movntdq:
   2865 ; BTVER2:       # %bb.0:
   2866 ; BTVER2-NEXT:    #APP
   2867 ; BTVER2-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [2:2.00]
   2868 ; BTVER2-NEXT:    #NO_APP
   2869 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2870 ;
   2871 ; ZNVER1-LABEL: test_movntdq:
   2872 ; ZNVER1:       # %bb.0:
   2873 ; ZNVER1-NEXT:    #APP
   2874 ; ZNVER1-NEXT:    vmovntdq %ymm0, (%rdi) # sched: [1:0.50]
   2875 ; ZNVER1-NEXT:    #NO_APP
   2876 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   2877 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2878   call void asm sideeffect "vmovntdq $0, $1", "x,*m"(<4 x i64> %a0, <4 x i64> *%a1)
   2879   ret void
   2880 }
   2881 
   2882 define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) {
   2883 ; GENERIC-LABEL: test_movntpd:
   2884 ; GENERIC:       # %bb.0:
   2885 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2886 ; GENERIC-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
   2887 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2888 ;
   2889 ; SANDY-LABEL: test_movntpd:
   2890 ; SANDY:       # %bb.0:
   2891 ; SANDY-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2892 ; SANDY-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
   2893 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2894 ;
   2895 ; HASWELL-LABEL: test_movntpd:
   2896 ; HASWELL:       # %bb.0:
   2897 ; HASWELL-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2898 ; HASWELL-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
   2899 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2900 ;
   2901 ; BROADWELL-LABEL: test_movntpd:
   2902 ; BROADWELL:       # %bb.0:
   2903 ; BROADWELL-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2904 ; BROADWELL-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
   2905 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2906 ;
   2907 ; SKYLAKE-LABEL: test_movntpd:
   2908 ; SKYLAKE:       # %bb.0:
   2909 ; SKYLAKE-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   2910 ; SKYLAKE-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
   2911 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2912 ;
   2913 ; SKX-LABEL: test_movntpd:
   2914 ; SKX:       # %bb.0:
   2915 ; SKX-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   2916 ; SKX-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
   2917 ; SKX-NEXT:    retq # sched: [7:1.00]
   2918 ;
   2919 ; BTVER2-LABEL: test_movntpd:
   2920 ; BTVER2:       # %bb.0:
   2921 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
   2922 ; BTVER2-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [3:2.00]
   2923 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2924 ;
   2925 ; ZNVER1-LABEL: test_movntpd:
   2926 ; ZNVER1:       # %bb.0:
   2927 ; ZNVER1-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2928 ; ZNVER1-NEXT:    vmovntpd %ymm0, (%rdi) # sched: [1:0.50]
   2929 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2930   %1 = fadd <4 x double> %a0, %a0
   2931   store <4 x double> %1, <4 x double> *%a1, align 32, !nontemporal !0
   2932   ret <4 x double> %1
   2933 }
   2934 
   2935 define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) {
   2936 ; GENERIC-LABEL: test_movntps:
   2937 ; GENERIC:       # %bb.0:
   2938 ; GENERIC-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2939 ; GENERIC-NEXT:    vmovntps %ymm0, (%rdi) # sched: [1:1.00]
   2940 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2941 ;
   2942 ; SANDY-LABEL: test_movntps:
   2943 ; SANDY:       # %bb.0:
   2944 ; SANDY-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2945 ; SANDY-NEXT:    vmovntps %ymm0, (%rdi) # sched: [1:1.00]
   2946 ; SANDY-NEXT:    retq # sched: [1:1.00]
   2947 ;
   2948 ; HASWELL-LABEL: test_movntps:
   2949 ; HASWELL:       # %bb.0:
   2950 ; HASWELL-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2951 ; HASWELL-NEXT:    vmovntps %ymm0, (%rdi) # sched: [1:1.00]
   2952 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   2953 ;
   2954 ; BROADWELL-LABEL: test_movntps:
   2955 ; BROADWELL:       # %bb.0:
   2956 ; BROADWELL-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2957 ; BROADWELL-NEXT:    vmovntps %ymm0, (%rdi) # sched: [1:1.00]
   2958 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   2959 ;
   2960 ; SKYLAKE-LABEL: test_movntps:
   2961 ; SKYLAKE:       # %bb.0:
   2962 ; SKYLAKE-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   2963 ; SKYLAKE-NEXT:    vmovntps %ymm0, (%rdi) # sched: [1:1.00]
   2964 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   2965 ;
   2966 ; SKX-LABEL: test_movntps:
   2967 ; SKX:       # %bb.0:
   2968 ; SKX-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   2969 ; SKX-NEXT:    vmovntps %ymm0, (%rdi) # sched: [1:1.00]
   2970 ; SKX-NEXT:    retq # sched: [7:1.00]
   2971 ;
   2972 ; BTVER2-LABEL: test_movntps:
   2973 ; BTVER2:       # %bb.0:
   2974 ; BTVER2-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
   2975 ; BTVER2-NEXT:    vmovntps %ymm0, (%rdi) # sched: [3:2.00]
   2976 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   2977 ;
   2978 ; ZNVER1-LABEL: test_movntps:
   2979 ; ZNVER1:       # %bb.0:
   2980 ; ZNVER1-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   2981 ; ZNVER1-NEXT:    vmovntps %ymm0, (%rdi) # sched: [1:0.50]
   2982 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   2983   %1 = fadd <8 x float> %a0, %a0
   2984   store <8 x float> %1, <8 x float> *%a1, align 32, !nontemporal !0
   2985   ret <8 x float> %1
   2986 }
   2987 
   2988 define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) {
   2989 ; GENERIC-LABEL: test_movshdup:
   2990 ; GENERIC:       # %bb.0:
   2991 ; GENERIC-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
   2992 ; GENERIC-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
   2993 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   2994 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   2995 ;
   2996 ; SANDY-LABEL: test_movshdup:
   2997 ; SANDY:       # %bb.0:
   2998 ; SANDY-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
   2999 ; SANDY-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
   3000 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3001 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3002 ;
   3003 ; HASWELL-LABEL: test_movshdup:
   3004 ; HASWELL:       # %bb.0:
   3005 ; HASWELL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
   3006 ; HASWELL-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
   3007 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3008 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3009 ;
   3010 ; BROADWELL-LABEL: test_movshdup:
   3011 ; BROADWELL:       # %bb.0:
   3012 ; BROADWELL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
   3013 ; BROADWELL-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:0.50]
   3014 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3015 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3016 ;
   3017 ; SKYLAKE-LABEL: test_movshdup:
   3018 ; SKYLAKE:       # %bb.0:
   3019 ; SKYLAKE-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
   3020 ; SKYLAKE-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
   3021 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3022 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3023 ;
   3024 ; SKX-LABEL: test_movshdup:
   3025 ; SKX:       # %bb.0:
   3026 ; SKX-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
   3027 ; SKX-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
   3028 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3029 ; SKX-NEXT:    retq # sched: [7:1.00]
   3030 ;
   3031 ; BTVER2-LABEL: test_movshdup:
   3032 ; BTVER2:       # %bb.0:
   3033 ; BTVER2-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:2.00]
   3034 ; BTVER2-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
   3035 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   3036 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3037 ;
   3038 ; ZNVER1-LABEL: test_movshdup:
   3039 ; ZNVER1:       # %bb.0:
   3040 ; ZNVER1-NEXT:    vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [8:0.50]
   3041 ; ZNVER1-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:0.50]
   3042 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3043 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3044   %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   3045   %2 = load <8 x float>, <8 x float> *%a1, align 32
   3046   %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   3047   %4 = fadd <8 x float> %1, %3
   3048   ret <8 x float> %4
   3049 }
   3050 
   3051 define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) {
   3052 ; GENERIC-LABEL: test_movsldup:
   3053 ; GENERIC:       # %bb.0:
   3054 ; GENERIC-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
   3055 ; GENERIC-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
   3056 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3057 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3058 ;
   3059 ; SANDY-LABEL: test_movsldup:
   3060 ; SANDY:       # %bb.0:
   3061 ; SANDY-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
   3062 ; SANDY-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
   3063 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3064 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3065 ;
   3066 ; HASWELL-LABEL: test_movsldup:
   3067 ; HASWELL:       # %bb.0:
   3068 ; HASWELL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
   3069 ; HASWELL-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
   3070 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3071 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3072 ;
   3073 ; BROADWELL-LABEL: test_movsldup:
   3074 ; BROADWELL:       # %bb.0:
   3075 ; BROADWELL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
   3076 ; BROADWELL-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:0.50]
   3077 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3078 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3079 ;
   3080 ; SKYLAKE-LABEL: test_movsldup:
   3081 ; SKYLAKE:       # %bb.0:
   3082 ; SKYLAKE-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
   3083 ; SKYLAKE-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
   3084 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3085 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3086 ;
   3087 ; SKX-LABEL: test_movsldup:
   3088 ; SKX:       # %bb.0:
   3089 ; SKX-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
   3090 ; SKX-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
   3091 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3092 ; SKX-NEXT:    retq # sched: [7:1.00]
   3093 ;
   3094 ; BTVER2-LABEL: test_movsldup:
   3095 ; BTVER2:       # %bb.0:
   3096 ; BTVER2-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:2.00]
   3097 ; BTVER2-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
   3098 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   3099 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3100 ;
   3101 ; ZNVER1-LABEL: test_movsldup:
   3102 ; ZNVER1:       # %bb.0:
   3103 ; ZNVER1-NEXT:    vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [8:0.50]
   3104 ; ZNVER1-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:0.50]
   3105 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3106 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3107   %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   3108   %2 = load <8 x float>, <8 x float> *%a1, align 32
   3109   %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   3110   %4 = fadd <8 x float> %1, %3
   3111   ret <8 x float> %4
   3112 }
   3113 
   3114 define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
   3115 ; GENERIC-LABEL: test_movupd:
   3116 ; GENERIC:       # %bb.0:
   3117 ; GENERIC-NEXT:    vmovupd (%rdi), %ymm0 # sched: [7:0.50]
   3118 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3119 ; GENERIC-NEXT:    vmovupd %ymm0, (%rsi) # sched: [1:1.00]
   3120 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3121 ;
   3122 ; SANDY-LABEL: test_movupd:
   3123 ; SANDY:       # %bb.0:
   3124 ; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
   3125 ; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
   3126 ; SANDY-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3127 ; SANDY-NEXT:    vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
   3128 ; SANDY-NEXT:    vmovupd %xmm0, (%rsi) # sched: [1:1.00]
   3129 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3130 ;
   3131 ; HASWELL-LABEL: test_movupd:
   3132 ; HASWELL:       # %bb.0:
   3133 ; HASWELL-NEXT:    vmovupd (%rdi), %ymm0 # sched: [7:0.50]
   3134 ; HASWELL-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3135 ; HASWELL-NEXT:    vmovupd %ymm0, (%rsi) # sched: [1:1.00]
   3136 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3137 ;
   3138 ; BROADWELL-LABEL: test_movupd:
   3139 ; BROADWELL:       # %bb.0:
   3140 ; BROADWELL-NEXT:    vmovupd (%rdi), %ymm0 # sched: [6:0.50]
   3141 ; BROADWELL-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3142 ; BROADWELL-NEXT:    vmovupd %ymm0, (%rsi) # sched: [1:1.00]
   3143 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3144 ;
   3145 ; SKYLAKE-LABEL: test_movupd:
   3146 ; SKYLAKE:       # %bb.0:
   3147 ; SKYLAKE-NEXT:    vmovupd (%rdi), %ymm0 # sched: [7:0.50]
   3148 ; SKYLAKE-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   3149 ; SKYLAKE-NEXT:    vmovupd %ymm0, (%rsi) # sched: [1:1.00]
   3150 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3151 ;
   3152 ; SKX-LABEL: test_movupd:
   3153 ; SKX:       # %bb.0:
   3154 ; SKX-NEXT:    vmovupd (%rdi), %ymm0 # sched: [7:0.50]
   3155 ; SKX-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   3156 ; SKX-NEXT:    vmovupd %ymm0, (%rsi) # sched: [1:1.00]
   3157 ; SKX-NEXT:    retq # sched: [7:1.00]
   3158 ;
   3159 ; BTVER2-LABEL: test_movupd:
   3160 ; BTVER2:       # %bb.0:
   3161 ; BTVER2-NEXT:    vmovupd (%rdi), %ymm0 # sched: [5:1.00]
   3162 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
   3163 ; BTVER2-NEXT:    vmovupd %ymm0, (%rsi) # sched: [1:1.00]
   3164 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3165 ;
   3166 ; ZNVER1-LABEL: test_movupd:
   3167 ; ZNVER1:       # %bb.0:
   3168 ; ZNVER1-NEXT:    vmovupd (%rdi), %ymm0 # sched: [8:0.50]
   3169 ; ZNVER1-NEXT:    vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3170 ; ZNVER1-NEXT:    vmovupd %ymm0, (%rsi) # sched: [1:0.50]
   3171 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3172   %1 = load <4 x double>, <4 x double> *%a0, align 1
   3173   %2 = fadd <4 x double> %1, %1
   3174   store <4 x double> %2, <4 x double> *%a1, align 1
   3175   ret <4 x double> %2
   3176 }
   3177 
   3178 define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
   3179 ; GENERIC-LABEL: test_movups:
   3180 ; GENERIC:       # %bb.0:
   3181 ; GENERIC-NEXT:    vmovups (%rdi), %ymm0 # sched: [7:0.50]
   3182 ; GENERIC-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3183 ; GENERIC-NEXT:    vmovups %ymm0, (%rsi) # sched: [1:1.00]
   3184 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3185 ;
   3186 ; SANDY-LABEL: test_movups:
   3187 ; SANDY:       # %bb.0:
   3188 ; SANDY-NEXT:    vmovups (%rdi), %xmm0 # sched: [6:0.50]
   3189 ; SANDY-NEXT:    vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
   3190 ; SANDY-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3191 ; SANDY-NEXT:    vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
   3192 ; SANDY-NEXT:    vmovups %xmm0, (%rsi) # sched: [1:1.00]
   3193 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3194 ;
   3195 ; HASWELL-LABEL: test_movups:
   3196 ; HASWELL:       # %bb.0:
   3197 ; HASWELL-NEXT:    vmovups (%rdi), %ymm0 # sched: [7:0.50]
   3198 ; HASWELL-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3199 ; HASWELL-NEXT:    vmovups %ymm0, (%rsi) # sched: [1:1.00]
   3200 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3201 ;
   3202 ; BROADWELL-LABEL: test_movups:
   3203 ; BROADWELL:       # %bb.0:
   3204 ; BROADWELL-NEXT:    vmovups (%rdi), %ymm0 # sched: [6:0.50]
   3205 ; BROADWELL-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3206 ; BROADWELL-NEXT:    vmovups %ymm0, (%rsi) # sched: [1:1.00]
   3207 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3208 ;
   3209 ; SKYLAKE-LABEL: test_movups:
   3210 ; SKYLAKE:       # %bb.0:
   3211 ; SKYLAKE-NEXT:    vmovups (%rdi), %ymm0 # sched: [7:0.50]
   3212 ; SKYLAKE-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   3213 ; SKYLAKE-NEXT:    vmovups %ymm0, (%rsi) # sched: [1:1.00]
   3214 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3215 ;
   3216 ; SKX-LABEL: test_movups:
   3217 ; SKX:       # %bb.0:
   3218 ; SKX-NEXT:    vmovups (%rdi), %ymm0 # sched: [7:0.50]
   3219 ; SKX-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
   3220 ; SKX-NEXT:    vmovups %ymm0, (%rsi) # sched: [1:1.00]
   3221 ; SKX-NEXT:    retq # sched: [7:1.00]
   3222 ;
   3223 ; BTVER2-LABEL: test_movups:
   3224 ; BTVER2:       # %bb.0:
   3225 ; BTVER2-NEXT:    vmovups (%rdi), %ymm0 # sched: [5:1.00]
   3226 ; BTVER2-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
   3227 ; BTVER2-NEXT:    vmovups %ymm0, (%rsi) # sched: [1:1.00]
   3228 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3229 ;
   3230 ; ZNVER1-LABEL: test_movups:
   3231 ; ZNVER1:       # %bb.0:
   3232 ; ZNVER1-NEXT:    vmovups (%rdi), %ymm0 # sched: [8:0.50]
   3233 ; ZNVER1-NEXT:    vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
   3234 ; ZNVER1-NEXT:    vmovups %ymm0, (%rsi) # sched: [1:0.50]
   3235 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3236   %1 = load <8 x float>, <8 x float> *%a0, align 1
   3237   %2 = fadd <8 x float> %1, %1
   3238   store <8 x float> %2, <8 x float> *%a1, align 1
   3239   ret <8 x float> %2
   3240 }
   3241 
   3242 define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   3243 ; GENERIC-LABEL: test_mulpd:
   3244 ; GENERIC:       # %bb.0:
   3245 ; GENERIC-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
   3246 ; GENERIC-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
   3247 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3248 ;
   3249 ; SANDY-LABEL: test_mulpd:
   3250 ; SANDY:       # %bb.0:
   3251 ; SANDY-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
   3252 ; SANDY-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
   3253 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3254 ;
   3255 ; HASWELL-LABEL: test_mulpd:
   3256 ; HASWELL:       # %bb.0:
   3257 ; HASWELL-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
   3258 ; HASWELL-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:0.50]
   3259 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3260 ;
   3261 ; BROADWELL-LABEL: test_mulpd:
   3262 ; BROADWELL:       # %bb.0:
   3263 ; BROADWELL-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [3:0.50]
   3264 ; BROADWELL-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
   3265 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3266 ;
   3267 ; SKYLAKE-LABEL: test_mulpd:
   3268 ; SKYLAKE:       # %bb.0:
   3269 ; SKYLAKE-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3270 ; SKYLAKE-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   3271 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3272 ;
   3273 ; SKX-LABEL: test_mulpd:
   3274 ; SKX:       # %bb.0:
   3275 ; SKX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3276 ; SKX-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   3277 ; SKX-NEXT:    retq # sched: [7:1.00]
   3278 ;
   3279 ; BTVER2-LABEL: test_mulpd:
   3280 ; BTVER2:       # %bb.0:
   3281 ; BTVER2-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:4.00]
   3282 ; BTVER2-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:4.00]
   3283 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3284 ;
   3285 ; ZNVER1-LABEL: test_mulpd:
   3286 ; ZNVER1:       # %bb.0:
   3287 ; ZNVER1-NEXT:    vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3288 ; ZNVER1-NEXT:    vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   3289 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3290   %1 = fmul <4 x double> %a0, %a1
   3291   %2 = load <4 x double>, <4 x double> *%a2, align 32
   3292   %3 = fmul <4 x double> %1, %2
   3293   ret <4 x double> %3
   3294 }
   3295 
   3296 define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   3297 ; GENERIC-LABEL: test_mulps:
   3298 ; GENERIC:       # %bb.0:
   3299 ; GENERIC-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
   3300 ; GENERIC-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
   3301 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3302 ;
   3303 ; SANDY-LABEL: test_mulps:
   3304 ; SANDY:       # %bb.0:
   3305 ; SANDY-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
   3306 ; SANDY-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
   3307 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3308 ;
   3309 ; HASWELL-LABEL: test_mulps:
   3310 ; HASWELL:       # %bb.0:
   3311 ; HASWELL-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
   3312 ; HASWELL-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [12:0.50]
   3313 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3314 ;
   3315 ; BROADWELL-LABEL: test_mulps:
   3316 ; BROADWELL:       # %bb.0:
   3317 ; BROADWELL-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [3:0.50]
   3318 ; BROADWELL-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
   3319 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3320 ;
   3321 ; SKYLAKE-LABEL: test_mulps:
   3322 ; SKYLAKE:       # %bb.0:
   3323 ; SKYLAKE-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3324 ; SKYLAKE-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   3325 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3326 ;
   3327 ; SKX-LABEL: test_mulps:
   3328 ; SKX:       # %bb.0:
   3329 ; SKX-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3330 ; SKX-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   3331 ; SKX-NEXT:    retq # sched: [7:1.00]
   3332 ;
   3333 ; BTVER2-LABEL: test_mulps:
   3334 ; BTVER2:       # %bb.0:
   3335 ; BTVER2-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
   3336 ; BTVER2-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
   3337 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3338 ;
   3339 ; ZNVER1-LABEL: test_mulps:
   3340 ; ZNVER1:       # %bb.0:
   3341 ; ZNVER1-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3342 ; ZNVER1-NEXT:    vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   3343 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3344   %1 = fmul <8 x float> %a0, %a1
   3345   %2 = load <8 x float>, <8 x float> *%a2, align 32
   3346   %3 = fmul <8 x float> %1, %2
   3347   ret <8 x float> %3
   3348 }
   3349 
   3350 define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   3351 ; GENERIC-LABEL: orpd:
   3352 ; GENERIC:       # %bb.0:
   3353 ; GENERIC-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3354 ; GENERIC-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3355 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3356 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3357 ;
   3358 ; SANDY-LABEL: orpd:
   3359 ; SANDY:       # %bb.0:
   3360 ; SANDY-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3361 ; SANDY-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3362 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3363 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3364 ;
   3365 ; HASWELL-LABEL: orpd:
   3366 ; HASWELL:       # %bb.0:
   3367 ; HASWELL-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3368 ; HASWELL-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3369 ; HASWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3370 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3371 ;
   3372 ; BROADWELL-LABEL: orpd:
   3373 ; BROADWELL:       # %bb.0:
   3374 ; BROADWELL-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3375 ; BROADWELL-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
   3376 ; BROADWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3377 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3378 ;
   3379 ; SKYLAKE-LABEL: orpd:
   3380 ; SKYLAKE:       # %bb.0:
   3381 ; SKYLAKE-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   3382 ; SKYLAKE-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   3383 ; SKYLAKE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   3384 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3385 ;
   3386 ; SKX-LABEL: orpd:
   3387 ; SKX:       # %bb.0:
   3388 ; SKX-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   3389 ; SKX-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   3390 ; SKX-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   3391 ; SKX-NEXT:    retq # sched: [7:1.00]
   3392 ;
   3393 ; BTVER2-LABEL: orpd:
   3394 ; BTVER2:       # %bb.0:
   3395 ; BTVER2-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3396 ; BTVER2-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
   3397 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
   3398 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3399 ;
   3400 ; ZNVER1-LABEL: orpd:
   3401 ; ZNVER1:       # %bb.0:
   3402 ; ZNVER1-NEXT:    vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
   3403 ; ZNVER1-NEXT:    vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   3404 ; ZNVER1-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3405 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3406   %1 = bitcast <4 x double> %a0 to <4 x i64>
   3407   %2 = bitcast <4 x double> %a1 to <4 x i64>
   3408   %3 = or <4 x i64> %1, %2
   3409   %4 = load <4 x double>, <4 x double> *%a2, align 32
   3410   %5 = bitcast <4 x double> %4 to <4 x i64>
   3411   %6 = or <4 x i64> %3, %5
   3412   %7 = bitcast <4 x i64> %6 to <4 x double>
   3413   %8 = fadd <4 x double> %a1, %7
   3414   ret <4 x double> %8
   3415 }
   3416 
   3417 define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   3418 ; GENERIC-LABEL: test_orps:
   3419 ; GENERIC:       # %bb.0:
   3420 ; GENERIC-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3421 ; GENERIC-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3422 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3423 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3424 ;
   3425 ; SANDY-LABEL: test_orps:
   3426 ; SANDY:       # %bb.0:
   3427 ; SANDY-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3428 ; SANDY-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3429 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3430 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3431 ;
   3432 ; HASWELL-LABEL: test_orps:
   3433 ; HASWELL:       # %bb.0:
   3434 ; HASWELL-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3435 ; HASWELL-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3436 ; HASWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3437 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3438 ;
   3439 ; BROADWELL-LABEL: test_orps:
   3440 ; BROADWELL:       # %bb.0:
   3441 ; BROADWELL-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3442 ; BROADWELL-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
   3443 ; BROADWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3444 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3445 ;
   3446 ; SKYLAKE-LABEL: test_orps:
   3447 ; SKYLAKE:       # %bb.0:
   3448 ; SKYLAKE-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   3449 ; SKYLAKE-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   3450 ; SKYLAKE-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   3451 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3452 ;
   3453 ; SKX-LABEL: test_orps:
   3454 ; SKX:       # %bb.0:
   3455 ; SKX-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   3456 ; SKX-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   3457 ; SKX-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   3458 ; SKX-NEXT:    retq # sched: [7:1.00]
   3459 ;
   3460 ; BTVER2-LABEL: test_orps:
   3461 ; BTVER2:       # %bb.0:
   3462 ; BTVER2-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3463 ; BTVER2-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
   3464 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
   3465 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3466 ;
   3467 ; ZNVER1-LABEL: test_orps:
   3468 ; ZNVER1:       # %bb.0:
   3469 ; ZNVER1-NEXT:    vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
   3470 ; ZNVER1-NEXT:    vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   3471 ; ZNVER1-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3472 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3473   %1 = bitcast <8 x float> %a0 to <4 x i64>
   3474   %2 = bitcast <8 x float> %a1 to <4 x i64>
   3475   %3 = or <4 x i64> %1, %2
   3476   %4 = load <8 x float>, <8 x float> *%a2, align 32
   3477   %5 = bitcast <8 x float> %4 to <4 x i64>
   3478   %6 = or <4 x i64> %3, %5
   3479   %7 = bitcast <4 x i64> %6 to <8 x float>
   3480   %8 = fadd <8 x float> %a1, %7
   3481   ret <8 x float> %8
   3482 }
   3483 
   3484 define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   3485 ; GENERIC-LABEL: test_perm2f128:
   3486 ; GENERIC:       # %bb.0:
   3487 ; GENERIC-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
   3488 ; GENERIC-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
   3489 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3490 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3491 ;
   3492 ; SANDY-LABEL: test_perm2f128:
   3493 ; SANDY:       # %bb.0:
   3494 ; SANDY-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
   3495 ; SANDY-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
   3496 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3497 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3498 ;
   3499 ; HASWELL-LABEL: test_perm2f128:
   3500 ; HASWELL:       # %bb.0:
   3501 ; HASWELL-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
   3502 ; HASWELL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
   3503 ; HASWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3504 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3505 ;
   3506 ; BROADWELL-LABEL: test_perm2f128:
   3507 ; BROADWELL:       # %bb.0:
   3508 ; BROADWELL-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
   3509 ; BROADWELL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00]
   3510 ; BROADWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3511 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3512 ;
   3513 ; SKYLAKE-LABEL: test_perm2f128:
   3514 ; SKYLAKE:       # %bb.0:
   3515 ; SKYLAKE-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
   3516 ; SKYLAKE-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
   3517 ; SKYLAKE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   3518 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3519 ;
   3520 ; SKX-LABEL: test_perm2f128:
   3521 ; SKX:       # %bb.0:
   3522 ; SKX-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
   3523 ; SKX-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
   3524 ; SKX-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   3525 ; SKX-NEXT:    retq # sched: [7:1.00]
   3526 ;
   3527 ; BTVER2-LABEL: test_perm2f128:
   3528 ; BTVER2:       # %bb.0:
   3529 ; BTVER2-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:0.50]
   3530 ; BTVER2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
   3531 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
   3532 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3533 ;
   3534 ; ZNVER1-LABEL: test_perm2f128:
   3535 ; ZNVER1:       # %bb.0:
   3536 ; ZNVER1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [100:0.25]
   3537 ; ZNVER1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [100:0.25]
   3538 ; ZNVER1-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   3539 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3540   %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
   3541   %2 = load <4 x double>, <4 x double> *%a2, align 32
   3542   %3 = shufflevector <4 x double> %a0, <4 x double> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
   3543   %4 = fadd <4 x double> %1, %3
   3544   ret <4 x double> %4
   3545 }
   3546 
   3547 define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) {
   3548 ; GENERIC-LABEL: test_permilpd:
   3549 ; GENERIC:       # %bb.0:
   3550 ; GENERIC-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
   3551 ; GENERIC-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
   3552 ; GENERIC-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3553 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3554 ;
   3555 ; SANDY-LABEL: test_permilpd:
   3556 ; SANDY:       # %bb.0:
   3557 ; SANDY-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
   3558 ; SANDY-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
   3559 ; SANDY-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3560 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3561 ;
   3562 ; HASWELL-LABEL: test_permilpd:
   3563 ; HASWELL:       # %bb.0:
   3564 ; HASWELL-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
   3565 ; HASWELL-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
   3566 ; HASWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3567 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3568 ;
   3569 ; BROADWELL-LABEL: test_permilpd:
   3570 ; BROADWELL:       # %bb.0:
   3571 ; BROADWELL-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
   3572 ; BROADWELL-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00]
   3573 ; BROADWELL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3574 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3575 ;
   3576 ; SKYLAKE-LABEL: test_permilpd:
   3577 ; SKYLAKE:       # %bb.0:
   3578 ; SKYLAKE-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
   3579 ; SKYLAKE-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
   3580 ; SKYLAKE-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3581 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3582 ;
   3583 ; SKX-LABEL: test_permilpd:
   3584 ; SKX:       # %bb.0:
   3585 ; SKX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
   3586 ; SKX-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
   3587 ; SKX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3588 ; SKX-NEXT:    retq # sched: [7:1.00]
   3589 ;
   3590 ; BTVER2-LABEL: test_permilpd:
   3591 ; BTVER2:       # %bb.0:
   3592 ; BTVER2-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00]
   3593 ; BTVER2-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50]
   3594 ; BTVER2-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3595 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3596 ;
   3597 ; ZNVER1-LABEL: test_permilpd:
   3598 ; ZNVER1:       # %bb.0:
   3599 ; ZNVER1-NEXT:    vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [8:0.50]
   3600 ; ZNVER1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50]
   3601 ; ZNVER1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3602 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3603   %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   3604   %2 = load <2 x double>, <2 x double> *%a1, align 16
   3605   %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   3606   %4 = fadd <2 x double> %1, %3
   3607   ret <2 x double> %4
   3608 }
   3609 
   3610 define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) {
   3611 ; GENERIC-LABEL: test_permilpd_ymm:
   3612 ; GENERIC:       # %bb.0:
   3613 ; GENERIC-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
   3614 ; GENERIC-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
   3615 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3616 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3617 ;
   3618 ; SANDY-LABEL: test_permilpd_ymm:
   3619 ; SANDY:       # %bb.0:
   3620 ; SANDY-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
   3621 ; SANDY-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
   3622 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3623 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3624 ;
   3625 ; HASWELL-LABEL: test_permilpd_ymm:
   3626 ; HASWELL:       # %bb.0:
   3627 ; HASWELL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
   3628 ; HASWELL-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
   3629 ; HASWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3630 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3631 ;
   3632 ; BROADWELL-LABEL: test_permilpd_ymm:
   3633 ; BROADWELL:       # %bb.0:
   3634 ; BROADWELL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
   3635 ; BROADWELL-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [7:1.00]
   3636 ; BROADWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3637 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3638 ;
   3639 ; SKYLAKE-LABEL: test_permilpd_ymm:
   3640 ; SKYLAKE:       # %bb.0:
   3641 ; SKYLAKE-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
   3642 ; SKYLAKE-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
   3643 ; SKYLAKE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3644 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3645 ;
   3646 ; SKX-LABEL: test_permilpd_ymm:
   3647 ; SKX:       # %bb.0:
   3648 ; SKX-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
   3649 ; SKX-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
   3650 ; SKX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3651 ; SKX-NEXT:    retq # sched: [7:1.00]
   3652 ;
   3653 ; BTVER2-LABEL: test_permilpd_ymm:
   3654 ; BTVER2:       # %bb.0:
   3655 ; BTVER2-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:2.00]
   3656 ; BTVER2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
   3657 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   3658 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3659 ;
   3660 ; ZNVER1-LABEL: test_permilpd_ymm:
   3661 ; ZNVER1:       # %bb.0:
   3662 ; ZNVER1-NEXT:    vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:0.50]
   3663 ; ZNVER1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:0.50]
   3664 ; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3665 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3666   %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
   3667   %2 = load <4 x double>, <4 x double> *%a1, align 32
   3668   %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
   3669   %4 = fadd <4 x double> %1, %3
   3670   ret <4 x double> %4
   3671 }
   3672 
   3673 define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) {
   3674 ; GENERIC-LABEL: test_permilps:
   3675 ; GENERIC:       # %bb.0:
   3676 ; GENERIC-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
   3677 ; GENERIC-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
   3678 ; GENERIC-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3679 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3680 ;
   3681 ; SANDY-LABEL: test_permilps:
   3682 ; SANDY:       # %bb.0:
   3683 ; SANDY-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
   3684 ; SANDY-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
   3685 ; SANDY-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3686 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3687 ;
   3688 ; HASWELL-LABEL: test_permilps:
   3689 ; HASWELL:       # %bb.0:
   3690 ; HASWELL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
   3691 ; HASWELL-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
   3692 ; HASWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3693 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3694 ;
   3695 ; BROADWELL-LABEL: test_permilps:
   3696 ; BROADWELL:       # %bb.0:
   3697 ; BROADWELL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
   3698 ; BROADWELL-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
   3699 ; BROADWELL-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3700 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3701 ;
   3702 ; SKYLAKE-LABEL: test_permilps:
   3703 ; SKYLAKE:       # %bb.0:
   3704 ; SKYLAKE-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
   3705 ; SKYLAKE-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
   3706 ; SKYLAKE-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3707 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3708 ;
   3709 ; SKX-LABEL: test_permilps:
   3710 ; SKX:       # %bb.0:
   3711 ; SKX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
   3712 ; SKX-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
   3713 ; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
   3714 ; SKX-NEXT:    retq # sched: [7:1.00]
   3715 ;
   3716 ; BTVER2-LABEL: test_permilps:
   3717 ; BTVER2:       # %bb.0:
   3718 ; BTVER2-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
   3719 ; BTVER2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50]
   3720 ; BTVER2-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3721 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3722 ;
   3723 ; ZNVER1-LABEL: test_permilps:
   3724 ; ZNVER1:       # %bb.0:
   3725 ; ZNVER1-NEXT:    vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50]
   3726 ; ZNVER1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50]
   3727 ; ZNVER1-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
   3728 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3729   %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   3730   %2 = load <4 x float>, <4 x float> *%a1, align 16
   3731   %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   3732   %4 = fadd <4 x float> %1, %3
   3733   ret <4 x float> %4
   3734 }
   3735 
   3736 define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) {
   3737 ; GENERIC-LABEL: test_permilps_ymm:
   3738 ; GENERIC:       # %bb.0:
   3739 ; GENERIC-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
   3740 ; GENERIC-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
   3741 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3742 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3743 ;
   3744 ; SANDY-LABEL: test_permilps_ymm:
   3745 ; SANDY:       # %bb.0:
   3746 ; SANDY-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
   3747 ; SANDY-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
   3748 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3749 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3750 ;
   3751 ; HASWELL-LABEL: test_permilps_ymm:
   3752 ; HASWELL:       # %bb.0:
   3753 ; HASWELL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
   3754 ; HASWELL-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
   3755 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3756 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3757 ;
   3758 ; BROADWELL-LABEL: test_permilps_ymm:
   3759 ; BROADWELL:       # %bb.0:
   3760 ; BROADWELL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
   3761 ; BROADWELL-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [7:1.00]
   3762 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3763 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3764 ;
   3765 ; SKYLAKE-LABEL: test_permilps_ymm:
   3766 ; SKYLAKE:       # %bb.0:
   3767 ; SKYLAKE-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
   3768 ; SKYLAKE-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
   3769 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3770 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3771 ;
   3772 ; SKX-LABEL: test_permilps_ymm:
   3773 ; SKX:       # %bb.0:
   3774 ; SKX-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
   3775 ; SKX-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
   3776 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   3777 ; SKX-NEXT:    retq # sched: [7:1.00]
   3778 ;
   3779 ; BTVER2-LABEL: test_permilps_ymm:
   3780 ; BTVER2:       # %bb.0:
   3781 ; BTVER2-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:2.00]
   3782 ; BTVER2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
   3783 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   3784 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3785 ;
   3786 ; ZNVER1-LABEL: test_permilps_ymm:
   3787 ; ZNVER1:       # %bb.0:
   3788 ; ZNVER1-NEXT:    vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:0.50]
   3789 ; ZNVER1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.50]
   3790 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   3791 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3792   %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   3793   %2 = load <8 x float>, <8 x float> *%a1, align 32
   3794   %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   3795   %4 = fadd <8 x float> %1, %3
   3796   ret <8 x float> %4
   3797 }
   3798 
   3799 define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
   3800 ; GENERIC-LABEL: test_permilvarpd:
   3801 ; GENERIC:       # %bb.0:
   3802 ; GENERIC-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3803 ; GENERIC-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3804 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3805 ;
   3806 ; SANDY-LABEL: test_permilvarpd:
   3807 ; SANDY:       # %bb.0:
   3808 ; SANDY-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3809 ; SANDY-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3810 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3811 ;
   3812 ; HASWELL-LABEL: test_permilvarpd:
   3813 ; HASWELL:       # %bb.0:
   3814 ; HASWELL-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3815 ; HASWELL-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3816 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3817 ;
   3818 ; BROADWELL-LABEL: test_permilvarpd:
   3819 ; BROADWELL:       # %bb.0:
   3820 ; BROADWELL-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3821 ; BROADWELL-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   3822 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3823 ;
   3824 ; SKYLAKE-LABEL: test_permilvarpd:
   3825 ; SKYLAKE:       # %bb.0:
   3826 ; SKYLAKE-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3827 ; SKYLAKE-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3828 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3829 ;
   3830 ; SKX-LABEL: test_permilvarpd:
   3831 ; SKX:       # %bb.0:
   3832 ; SKX-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3833 ; SKX-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3834 ; SKX-NEXT:    retq # sched: [7:1.00]
   3835 ;
   3836 ; BTVER2-LABEL: test_permilvarpd:
   3837 ; BTVER2:       # %bb.0:
   3838 ; BTVER2-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
   3839 ; BTVER2-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
   3840 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3841 ;
   3842 ; ZNVER1-LABEL: test_permilvarpd:
   3843 ; ZNVER1:       # %bb.0:
   3844 ; ZNVER1-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3845 ; ZNVER1-NEXT:    vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   3846 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3847   %1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1)
   3848   %2 = load <2 x i64>, <2 x i64> *%a2, align 16
   3849   %3 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> %2)
   3850   ret <2 x double> %3
   3851 }
   3852 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
   3853 
   3854 define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
   3855 ; GENERIC-LABEL: test_permilvarpd_ymm:
   3856 ; GENERIC:       # %bb.0:
   3857 ; GENERIC-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3858 ; GENERIC-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3859 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3860 ;
   3861 ; SANDY-LABEL: test_permilvarpd_ymm:
   3862 ; SANDY:       # %bb.0:
   3863 ; SANDY-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3864 ; SANDY-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3865 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3866 ;
   3867 ; HASWELL-LABEL: test_permilvarpd_ymm:
   3868 ; HASWELL:       # %bb.0:
   3869 ; HASWELL-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3870 ; HASWELL-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3871 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3872 ;
   3873 ; BROADWELL-LABEL: test_permilvarpd_ymm:
   3874 ; BROADWELL:       # %bb.0:
   3875 ; BROADWELL-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3876 ; BROADWELL-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
   3877 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3878 ;
   3879 ; SKYLAKE-LABEL: test_permilvarpd_ymm:
   3880 ; SKYLAKE:       # %bb.0:
   3881 ; SKYLAKE-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3882 ; SKYLAKE-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3883 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3884 ;
   3885 ; SKX-LABEL: test_permilvarpd_ymm:
   3886 ; SKX:       # %bb.0:
   3887 ; SKX-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3888 ; SKX-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3889 ; SKX-NEXT:    retq # sched: [7:1.00]
   3890 ;
   3891 ; BTVER2-LABEL: test_permilvarpd_ymm:
   3892 ; BTVER2:       # %bb.0:
   3893 ; BTVER2-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
   3894 ; BTVER2-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
   3895 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3896 ;
   3897 ; ZNVER1-LABEL: test_permilvarpd_ymm:
   3898 ; ZNVER1:       # %bb.0:
   3899 ; ZNVER1-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
   3900 ; ZNVER1-NEXT:    vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   3901 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3902   %1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1)
   3903   %2 = load <4 x i64>, <4 x i64> *%a2, align 32
   3904   %3 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %1, <4 x i64> %2)
   3905   ret <4 x double> %3
   3906 }
   3907 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
   3908 
   3909 define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
   3910 ; GENERIC-LABEL: test_permilvarps:
   3911 ; GENERIC:       # %bb.0:
   3912 ; GENERIC-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3913 ; GENERIC-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3914 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3915 ;
   3916 ; SANDY-LABEL: test_permilvarps:
   3917 ; SANDY:       # %bb.0:
   3918 ; SANDY-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3919 ; SANDY-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3920 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3921 ;
   3922 ; HASWELL-LABEL: test_permilvarps:
   3923 ; HASWELL:       # %bb.0:
   3924 ; HASWELL-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3925 ; HASWELL-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3926 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3927 ;
   3928 ; BROADWELL-LABEL: test_permilvarps:
   3929 ; BROADWELL:       # %bb.0:
   3930 ; BROADWELL-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3931 ; BROADWELL-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
   3932 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3933 ;
   3934 ; SKYLAKE-LABEL: test_permilvarps:
   3935 ; SKYLAKE:       # %bb.0:
   3936 ; SKYLAKE-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3937 ; SKYLAKE-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3938 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3939 ;
   3940 ; SKX-LABEL: test_permilvarps:
   3941 ; SKX:       # %bb.0:
   3942 ; SKX-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
   3943 ; SKX-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
   3944 ; SKX-NEXT:    retq # sched: [7:1.00]
   3945 ;
   3946 ; BTVER2-LABEL: test_permilvarps:
   3947 ; BTVER2:       # %bb.0:
   3948 ; BTVER2-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
   3949 ; BTVER2-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
   3950 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   3951 ;
   3952 ; ZNVER1-LABEL: test_permilvarps:
   3953 ; ZNVER1:       # %bb.0:
   3954 ; ZNVER1-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
   3955 ; ZNVER1-NEXT:    vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
   3956 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   3957   %1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1)
   3958   %2 = load <4 x i32>, <4 x i32> *%a2, align 16
   3959   %3 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> %2)
   3960   ret <4 x float> %3
   3961 }
   3962 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
   3963 
   3964 define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
   3965 ; GENERIC-LABEL: test_permilvarps_ymm:
   3966 ; GENERIC:       # %bb.0:
   3967 ; GENERIC-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3968 ; GENERIC-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3969 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   3970 ;
   3971 ; SANDY-LABEL: test_permilvarps_ymm:
   3972 ; SANDY:       # %bb.0:
   3973 ; SANDY-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3974 ; SANDY-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3975 ; SANDY-NEXT:    retq # sched: [1:1.00]
   3976 ;
   3977 ; HASWELL-LABEL: test_permilvarps_ymm:
   3978 ; HASWELL:       # %bb.0:
   3979 ; HASWELL-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3980 ; HASWELL-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3981 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   3982 ;
   3983 ; BROADWELL-LABEL: test_permilvarps_ymm:
   3984 ; BROADWELL:       # %bb.0:
   3985 ; BROADWELL-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3986 ; BROADWELL-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
   3987 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   3988 ;
   3989 ; SKYLAKE-LABEL: test_permilvarps_ymm:
   3990 ; SKYLAKE:       # %bb.0:
   3991 ; SKYLAKE-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3992 ; SKYLAKE-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3993 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   3994 ;
   3995 ; SKX-LABEL: test_permilvarps_ymm:
   3996 ; SKX:       # %bb.0:
   3997 ; SKX-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   3998 ; SKX-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   3999 ; SKX-NEXT:    retq # sched: [7:1.00]
   4000 ;
   4001 ; BTVER2-LABEL: test_permilvarps_ymm:
   4002 ; BTVER2:       # %bb.0:
   4003 ; BTVER2-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
   4004 ; BTVER2-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
   4005 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4006 ;
   4007 ; ZNVER1-LABEL: test_permilvarps_ymm:
   4008 ; ZNVER1:       # %bb.0:
   4009 ; ZNVER1-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
   4010 ; ZNVER1-NEXT:    vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   4011 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4012   %1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1)
   4013   %2 = load <8 x i32>, <8 x i32> *%a2, align 32
   4014   %3 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> %2)
   4015   ret <8 x float> %3
   4016 }
   4017 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
   4018 
   4019 define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) {
   4020 ; GENERIC-LABEL: test_rcpps:
   4021 ; GENERIC:       # %bb.0:
   4022 ; GENERIC-NEXT:    vrcpps (%rdi), %ymm1 # sched: [14:2.00]
   4023 ; GENERIC-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
   4024 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4025 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4026 ;
   4027 ; SANDY-LABEL: test_rcpps:
   4028 ; SANDY:       # %bb.0:
   4029 ; SANDY-NEXT:    vrcpps (%rdi), %ymm1 # sched: [14:2.00]
   4030 ; SANDY-NEXT:    vrcpps %ymm0, %ymm0 # sched: [7:2.00]
   4031 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4032 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4033 ;
   4034 ; HASWELL-LABEL: test_rcpps:
   4035 ; HASWELL:       # %bb.0:
   4036 ; HASWELL-NEXT:    vrcpps (%rdi), %ymm1 # sched: [18:2.00]
   4037 ; HASWELL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [11:2.00]
   4038 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4039 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4040 ;
   4041 ; BROADWELL-LABEL: test_rcpps:
   4042 ; BROADWELL:       # %bb.0:
   4043 ; BROADWELL-NEXT:    vrcpps %ymm0, %ymm0 # sched: [11:2.00]
   4044 ; BROADWELL-NEXT:    vrcpps (%rdi), %ymm1 # sched: [17:2.00]
   4045 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4046 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4047 ;
   4048 ; SKYLAKE-LABEL: test_rcpps:
   4049 ; SKYLAKE:       # %bb.0:
   4050 ; SKYLAKE-NEXT:    vrcpps %ymm0, %ymm0 # sched: [4:1.00]
   4051 ; SKYLAKE-NEXT:    vrcpps (%rdi), %ymm1 # sched: [11:1.00]
   4052 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4053 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4054 ;
   4055 ; SKX-LABEL: test_rcpps:
   4056 ; SKX:       # %bb.0:
   4057 ; SKX-NEXT:    vrcpps %ymm0, %ymm0 # sched: [4:1.00]
   4058 ; SKX-NEXT:    vrcpps (%rdi), %ymm1 # sched: [11:1.00]
   4059 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4060 ; SKX-NEXT:    retq # sched: [7:1.00]
   4061 ;
   4062 ; BTVER2-LABEL: test_rcpps:
   4063 ; BTVER2:       # %bb.0:
   4064 ; BTVER2-NEXT:    vrcpps (%rdi), %ymm1 # sched: [7:2.00]
   4065 ; BTVER2-NEXT:    vrcpps %ymm0, %ymm0 # sched: [2:2.00]
   4066 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4067 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4068 ;
   4069 ; ZNVER1-LABEL: test_rcpps:
   4070 ; ZNVER1:       # %bb.0:
   4071 ; ZNVER1-NEXT:    vrcpps (%rdi), %ymm1 # sched: [12:0.50]
   4072 ; ZNVER1-NEXT:    vrcpps %ymm0, %ymm0 # sched: [5:0.50]
   4073 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4074 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4075   %1 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0)
   4076   %2 = load <8 x float>, <8 x float> *%a1, align 32
   4077   %3 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %2)
   4078   %4 = fadd <8 x float> %1, %3
   4079   ret <8 x float> %4
   4080 }
   4081 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
   4082 
   4083 define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) {
   4084 ; GENERIC-LABEL: test_roundpd:
   4085 ; GENERIC:       # %bb.0:
   4086 ; GENERIC-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
   4087 ; GENERIC-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
   4088 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4089 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4090 ;
   4091 ; SANDY-LABEL: test_roundpd:
   4092 ; SANDY:       # %bb.0:
   4093 ; SANDY-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
   4094 ; SANDY-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
   4095 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4096 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4097 ;
   4098 ; HASWELL-LABEL: test_roundpd:
   4099 ; HASWELL:       # %bb.0:
   4100 ; HASWELL-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50]
   4101 ; HASWELL-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [13:2.00]
   4102 ; HASWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4103 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4104 ;
   4105 ; BROADWELL-LABEL: test_roundpd:
   4106 ; BROADWELL:       # %bb.0:
   4107 ; BROADWELL-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [12:2.00]
   4108 ; BROADWELL-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50]
   4109 ; BROADWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4110 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4111 ;
   4112 ; SKYLAKE-LABEL: test_roundpd:
   4113 ; SKYLAKE:       # %bb.0:
   4114 ; SKYLAKE-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00]
   4115 ; SKYLAKE-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00]
   4116 ; SKYLAKE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4117 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4118 ;
   4119 ; SKX-LABEL: test_roundpd:
   4120 ; SKX:       # %bb.0:
   4121 ; SKX-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00]
   4122 ; SKX-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00]
   4123 ; SKX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4124 ; SKX-NEXT:    retq # sched: [7:1.00]
   4125 ;
   4126 ; BTVER2-LABEL: test_roundpd:
   4127 ; BTVER2:       # %bb.0:
   4128 ; BTVER2-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00]
   4129 ; BTVER2-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [3:2.00]
   4130 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4131 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4132 ;
   4133 ; ZNVER1-LABEL: test_roundpd:
   4134 ; ZNVER1:       # %bb.0:
   4135 ; ZNVER1-NEXT:    vroundpd $7, (%rdi), %ymm1 # sched: [11:1.00]
   4136 ; ZNVER1-NEXT:    vroundpd $7, %ymm0, %ymm0 # sched: [4:1.00]
   4137 ; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4138 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4139   %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7)
   4140   %2 = load <4 x double>, <4 x double> *%a1, align 32
   4141   %3 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %2, i32 7)
   4142   %4 = fadd <4 x double> %1, %3
   4143   ret <4 x double> %4
   4144 }
   4145 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
   4146 
   4147 define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) {
   4148 ; GENERIC-LABEL: test_roundps:
   4149 ; GENERIC:       # %bb.0:
   4150 ; GENERIC-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
   4151 ; GENERIC-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
   4152 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4153 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4154 ;
   4155 ; SANDY-LABEL: test_roundps:
   4156 ; SANDY:       # %bb.0:
   4157 ; SANDY-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
   4158 ; SANDY-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
   4159 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4160 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4161 ;
   4162 ; HASWELL-LABEL: test_roundps:
   4163 ; HASWELL:       # %bb.0:
   4164 ; HASWELL-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [6:0.50]
   4165 ; HASWELL-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [13:2.00]
   4166 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4167 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4168 ;
   4169 ; BROADWELL-LABEL: test_roundps:
   4170 ; BROADWELL:       # %bb.0:
   4171 ; BROADWELL-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [12:2.00]
   4172 ; BROADWELL-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [6:0.50]
   4173 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4174 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4175 ;
   4176 ; SKYLAKE-LABEL: test_roundps:
   4177 ; SKYLAKE:       # %bb.0:
   4178 ; SKYLAKE-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [8:1.00]
   4179 ; SKYLAKE-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [15:1.00]
   4180 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4181 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4182 ;
   4183 ; SKX-LABEL: test_roundps:
   4184 ; SKX:       # %bb.0:
   4185 ; SKX-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [8:1.00]
   4186 ; SKX-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [15:1.00]
   4187 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4188 ; SKX-NEXT:    retq # sched: [7:1.00]
   4189 ;
   4190 ; BTVER2-LABEL: test_roundps:
   4191 ; BTVER2:       # %bb.0:
   4192 ; BTVER2-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [8:2.00]
   4193 ; BTVER2-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [3:2.00]
   4194 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4195 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4196 ;
   4197 ; ZNVER1-LABEL: test_roundps:
   4198 ; ZNVER1:       # %bb.0:
   4199 ; ZNVER1-NEXT:    vroundps $7, (%rdi), %ymm1 # sched: [11:1.00]
   4200 ; ZNVER1-NEXT:    vroundps $7, %ymm0, %ymm0 # sched: [4:1.00]
   4201 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4202 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4203   %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7)
   4204   %2 = load <8 x float>, <8 x float> *%a1, align 32
   4205   %3 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %2, i32 7)
   4206   %4 = fadd <8 x float> %1, %3
   4207   ret <8 x float> %4
   4208 }
   4209 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
   4210 
   4211 define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) {
   4212 ; GENERIC-LABEL: test_rsqrtps:
   4213 ; GENERIC:       # %bb.0:
   4214 ; GENERIC-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
   4215 ; GENERIC-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
   4216 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4217 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4218 ;
   4219 ; SANDY-LABEL: test_rsqrtps:
   4220 ; SANDY:       # %bb.0:
   4221 ; SANDY-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
   4222 ; SANDY-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
   4223 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4224 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4225 ;
   4226 ; HASWELL-LABEL: test_rsqrtps:
   4227 ; HASWELL:       # %bb.0:
   4228 ; HASWELL-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [18:2.00]
   4229 ; HASWELL-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [11:2.00]
   4230 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4231 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4232 ;
   4233 ; BROADWELL-LABEL: test_rsqrtps:
   4234 ; BROADWELL:       # %bb.0:
   4235 ; BROADWELL-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [11:2.00]
   4236 ; BROADWELL-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [17:2.00]
   4237 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4238 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4239 ;
   4240 ; SKYLAKE-LABEL: test_rsqrtps:
   4241 ; SKYLAKE:       # %bb.0:
   4242 ; SKYLAKE-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [4:1.00]
   4243 ; SKYLAKE-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [11:1.00]
   4244 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4245 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4246 ;
   4247 ; SKX-LABEL: test_rsqrtps:
   4248 ; SKX:       # %bb.0:
   4249 ; SKX-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [4:1.00]
   4250 ; SKX-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [11:1.00]
   4251 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4252 ; SKX-NEXT:    retq # sched: [7:1.00]
   4253 ;
   4254 ; BTVER2-LABEL: test_rsqrtps:
   4255 ; BTVER2:       # %bb.0:
   4256 ; BTVER2-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [7:2.00]
   4257 ; BTVER2-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [2:2.00]
   4258 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4259 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4260 ;
   4261 ; ZNVER1-LABEL: test_rsqrtps:
   4262 ; ZNVER1:       # %bb.0:
   4263 ; ZNVER1-NEXT:    vrsqrtps (%rdi), %ymm1 # sched: [12:0.50]
   4264 ; ZNVER1-NEXT:    vrsqrtps %ymm0, %ymm0 # sched: [5:1.00]
   4265 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4266 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4267   %1 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0)
   4268   %2 = load <8 x float>, <8 x float> *%a1, align 32
   4269   %3 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %2)
   4270   %4 = fadd <8 x float> %1, %3
   4271   ret <8 x float> %4
   4272 }
   4273 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
   4274 
   4275 define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   4276 ; GENERIC-LABEL: test_shufpd:
   4277 ; GENERIC:       # %bb.0:
   4278 ; GENERIC-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
   4279 ; GENERIC-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
   4280 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4281 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4282 ;
   4283 ; SANDY-LABEL: test_shufpd:
   4284 ; SANDY:       # %bb.0:
   4285 ; SANDY-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
   4286 ; SANDY-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
   4287 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4288 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4289 ;
   4290 ; HASWELL-LABEL: test_shufpd:
   4291 ; HASWELL:       # %bb.0:
   4292 ; HASWELL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
   4293 ; HASWELL-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
   4294 ; HASWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4295 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4296 ;
   4297 ; BROADWELL-LABEL: test_shufpd:
   4298 ; BROADWELL:       # %bb.0:
   4299 ; BROADWELL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
   4300 ; BROADWELL-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [7:1.00]
   4301 ; BROADWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4302 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4303 ;
   4304 ; SKYLAKE-LABEL: test_shufpd:
   4305 ; SKYLAKE:       # %bb.0:
   4306 ; SKYLAKE-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
   4307 ; SKYLAKE-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
   4308 ; SKYLAKE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4309 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4310 ;
   4311 ; SKX-LABEL: test_shufpd:
   4312 ; SKX:       # %bb.0:
   4313 ; SKX-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
   4314 ; SKX-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
   4315 ; SKX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4316 ; SKX-NEXT:    retq # sched: [7:1.00]
   4317 ;
   4318 ; BTVER2-LABEL: test_shufpd:
   4319 ; BTVER2:       # %bb.0:
   4320 ; BTVER2-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
   4321 ; BTVER2-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [6:2.00]
   4322 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4323 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4324 ;
   4325 ; ZNVER1-LABEL: test_shufpd:
   4326 ; ZNVER1:       # %bb.0:
   4327 ; ZNVER1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:0.50]
   4328 ; ZNVER1-NEXT:    vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:0.50]
   4329 ; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4330 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4331   %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 2, i32 7>
   4332   %2 = load <4 x double>, <4 x double> *%a2, align 32
   4333   %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 1, i32 4, i32 2, i32 7>
   4334   %4 = fadd <4 x double> %1, %3
   4335   ret <4 x double> %4
   4336 }
   4337 
   4338 define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
   4339 ; GENERIC-LABEL: test_shufps:
   4340 ; GENERIC:       # %bb.0:
   4341 ; GENERIC-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
   4342 ; GENERIC-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
   4343 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4344 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4345 ;
   4346 ; SANDY-LABEL: test_shufps:
   4347 ; SANDY:       # %bb.0:
   4348 ; SANDY-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
   4349 ; SANDY-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
   4350 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4351 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4352 ;
   4353 ; HASWELL-LABEL: test_shufps:
   4354 ; HASWELL:       # %bb.0:
   4355 ; HASWELL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
   4356 ; HASWELL-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
   4357 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4358 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4359 ;
   4360 ; BROADWELL-LABEL: test_shufps:
   4361 ; BROADWELL:       # %bb.0:
   4362 ; BROADWELL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
   4363 ; BROADWELL-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [7:1.00]
   4364 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4365 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4366 ;
   4367 ; SKYLAKE-LABEL: test_shufps:
   4368 ; SKYLAKE:       # %bb.0:
   4369 ; SKYLAKE-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
   4370 ; SKYLAKE-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
   4371 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4372 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4373 ;
   4374 ; SKX-LABEL: test_shufps:
   4375 ; SKX:       # %bb.0:
   4376 ; SKX-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
   4377 ; SKX-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
   4378 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4379 ; SKX-NEXT:    retq # sched: [7:1.00]
   4380 ;
   4381 ; BTVER2-LABEL: test_shufps:
   4382 ; BTVER2:       # %bb.0:
   4383 ; BTVER2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
   4384 ; BTVER2-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [6:2.00]
   4385 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4386 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4387 ;
   4388 ; ZNVER1-LABEL: test_shufps:
   4389 ; ZNVER1:       # %bb.0:
   4390 ; ZNVER1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:0.50]
   4391 ; ZNVER1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:0.50]
   4392 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4393 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4394   %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
   4395   %2 = load <8 x float>, <8 x float> *%a2, align 32
   4396   %3 = shufflevector <8 x float> %a1, <8 x float> %2, <8 x i32> <i32 0, i32 3, i32 8, i32 8, i32 4, i32 7, i32 12, i32 12>
   4397   %4 = fadd <8 x float> %1, %3
   4398   ret <8 x float> %4
   4399 }
   4400 
   4401 define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
   4402 ; GENERIC-LABEL: test_sqrtpd:
   4403 ; GENERIC:       # %bb.0:
   4404 ; GENERIC-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [52:44.00]
   4405 ; GENERIC-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [45:44.00]
   4406 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4407 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4408 ;
   4409 ; SANDY-LABEL: test_sqrtpd:
   4410 ; SANDY:       # %bb.0:
   4411 ; SANDY-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [52:44.00]
   4412 ; SANDY-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [45:44.00]
   4413 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4414 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4415 ;
   4416 ; HASWELL-LABEL: test_sqrtpd:
   4417 ; HASWELL:       # %bb.0:
   4418 ; HASWELL-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [42:28.00]
   4419 ; HASWELL-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [35:28.00]
   4420 ; HASWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4421 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4422 ;
   4423 ; BROADWELL-LABEL: test_sqrtpd:
   4424 ; BROADWELL:       # %bb.0:
   4425 ; BROADWELL-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [29:28.00]
   4426 ; BROADWELL-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [35:28.00]
   4427 ; BROADWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4428 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4429 ;
   4430 ; SKYLAKE-LABEL: test_sqrtpd:
   4431 ; SKYLAKE:       # %bb.0:
   4432 ; SKYLAKE-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [18:12.00]
   4433 ; SKYLAKE-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [25:12.00]
   4434 ; SKYLAKE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4435 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4436 ;
   4437 ; SKX-LABEL: test_sqrtpd:
   4438 ; SKX:       # %bb.0:
   4439 ; SKX-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [18:12.00]
   4440 ; SKX-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [25:12.00]
   4441 ; SKX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4442 ; SKX-NEXT:    retq # sched: [7:1.00]
   4443 ;
   4444 ; BTVER2-LABEL: test_sqrtpd:
   4445 ; BTVER2:       # %bb.0:
   4446 ; BTVER2-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [59:54.00]
   4447 ; BTVER2-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [54:54.00]
   4448 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4449 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4450 ;
   4451 ; ZNVER1-LABEL: test_sqrtpd:
   4452 ; ZNVER1:       # %bb.0:
   4453 ; ZNVER1-NEXT:    vsqrtpd (%rdi), %ymm1 # sched: [47:40.00]
   4454 ; ZNVER1-NEXT:    vsqrtpd %ymm0, %ymm0 # sched: [40:40.00]
   4455 ; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4456 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4457   %1 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0)
   4458   %2 = load <4 x double>, <4 x double> *%a1, align 32
   4459   %3 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %2)
   4460   %4 = fadd <4 x double> %1, %3
   4461   ret <4 x double> %4
   4462 }
   4463 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
   4464 
   4465 define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
   4466 ; GENERIC-LABEL: test_sqrtps:
   4467 ; GENERIC:       # %bb.0:
   4468 ; GENERIC-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [36:28.00]
   4469 ; GENERIC-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [29:28.00]
   4470 ; GENERIC-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4471 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4472 ;
   4473 ; SANDY-LABEL: test_sqrtps:
   4474 ; SANDY:       # %bb.0:
   4475 ; SANDY-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [36:28.00]
   4476 ; SANDY-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [29:28.00]
   4477 ; SANDY-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4478 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4479 ;
   4480 ; HASWELL-LABEL: test_sqrtps:
   4481 ; HASWELL:       # %bb.0:
   4482 ; HASWELL-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [28:14.00]
   4483 ; HASWELL-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [21:14.00]
   4484 ; HASWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4485 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4486 ;
   4487 ; BROADWELL-LABEL: test_sqrtps:
   4488 ; BROADWELL:       # %bb.0:
   4489 ; BROADWELL-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [21:14.00]
   4490 ; BROADWELL-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [27:14.00]
   4491 ; BROADWELL-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4492 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4493 ;
   4494 ; SKYLAKE-LABEL: test_sqrtps:
   4495 ; SKYLAKE:       # %bb.0:
   4496 ; SKYLAKE-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [12:6.00]
   4497 ; SKYLAKE-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [19:6.00]
   4498 ; SKYLAKE-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4499 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4500 ;
   4501 ; SKX-LABEL: test_sqrtps:
   4502 ; SKX:       # %bb.0:
   4503 ; SKX-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [12:6.00]
   4504 ; SKX-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [19:6.00]
   4505 ; SKX-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4506 ; SKX-NEXT:    retq # sched: [7:1.00]
   4507 ;
   4508 ; BTVER2-LABEL: test_sqrtps:
   4509 ; BTVER2:       # %bb.0:
   4510 ; BTVER2-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [47:42.00]
   4511 ; BTVER2-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [42:42.00]
   4512 ; BTVER2-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4513 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4514 ;
   4515 ; ZNVER1-LABEL: test_sqrtps:
   4516 ; ZNVER1:       # %bb.0:
   4517 ; ZNVER1-NEXT:    vsqrtps (%rdi), %ymm1 # sched: [35:28.00]
   4518 ; ZNVER1-NEXT:    vsqrtps %ymm0, %ymm0 # sched: [28:28.00]
   4519 ; ZNVER1-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4520 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4521   %1 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0)
   4522   %2 = load <8 x float>, <8 x float> *%a1, align 32
   4523   %3 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %2)
   4524   %4 = fadd <8 x float> %1, %3
   4525   ret <8 x float> %4
   4526 }
   4527 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
   4528 
   4529 define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   4530 ; GENERIC-LABEL: test_subpd:
   4531 ; GENERIC:       # %bb.0:
   4532 ; GENERIC-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4533 ; GENERIC-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   4534 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4535 ;
   4536 ; SANDY-LABEL: test_subpd:
   4537 ; SANDY:       # %bb.0:
   4538 ; SANDY-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4539 ; SANDY-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   4540 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4541 ;
   4542 ; HASWELL-LABEL: test_subpd:
   4543 ; HASWELL:       # %bb.0:
   4544 ; HASWELL-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4545 ; HASWELL-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   4546 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4547 ;
   4548 ; BROADWELL-LABEL: test_subpd:
   4549 ; BROADWELL:       # %bb.0:
   4550 ; BROADWELL-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4551 ; BROADWELL-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
   4552 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4553 ;
   4554 ; SKYLAKE-LABEL: test_subpd:
   4555 ; SKYLAKE:       # %bb.0:
   4556 ; SKYLAKE-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4557 ; SKYLAKE-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   4558 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4559 ;
   4560 ; SKX-LABEL: test_subpd:
   4561 ; SKX:       # %bb.0:
   4562 ; SKX-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4563 ; SKX-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   4564 ; SKX-NEXT:    retq # sched: [7:1.00]
   4565 ;
   4566 ; BTVER2-LABEL: test_subpd:
   4567 ; BTVER2:       # %bb.0:
   4568 ; BTVER2-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4569 ; BTVER2-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
   4570 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4571 ;
   4572 ; ZNVER1-LABEL: test_subpd:
   4573 ; ZNVER1:       # %bb.0:
   4574 ; ZNVER1-NEXT:    vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4575 ; ZNVER1-NEXT:    vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   4576 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4577   %1 = fsub <4 x double> %a0, %a1
   4578   %2 = load <4 x double>, <4 x double> *%a2, align 32
   4579   %3 = fsub <4 x double> %1, %2
   4580   ret <4 x double> %3
   4581 }
   4582 
   4583 define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   4584 ; GENERIC-LABEL: test_subps:
   4585 ; GENERIC:       # %bb.0:
   4586 ; GENERIC-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4587 ; GENERIC-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   4588 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4589 ;
   4590 ; SANDY-LABEL: test_subps:
   4591 ; SANDY:       # %bb.0:
   4592 ; SANDY-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4593 ; SANDY-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   4594 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4595 ;
   4596 ; HASWELL-LABEL: test_subps:
   4597 ; HASWELL:       # %bb.0:
   4598 ; HASWELL-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4599 ; HASWELL-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   4600 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4601 ;
   4602 ; BROADWELL-LABEL: test_subps:
   4603 ; BROADWELL:       # %bb.0:
   4604 ; BROADWELL-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4605 ; BROADWELL-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
   4606 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4607 ;
   4608 ; SKYLAKE-LABEL: test_subps:
   4609 ; SKYLAKE:       # %bb.0:
   4610 ; SKYLAKE-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4611 ; SKYLAKE-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   4612 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4613 ;
   4614 ; SKX-LABEL: test_subps:
   4615 ; SKX:       # %bb.0:
   4616 ; SKX-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   4617 ; SKX-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
   4618 ; SKX-NEXT:    retq # sched: [7:1.00]
   4619 ;
   4620 ; BTVER2-LABEL: test_subps:
   4621 ; BTVER2:       # %bb.0:
   4622 ; BTVER2-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   4623 ; BTVER2-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
   4624 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4625 ;
   4626 ; ZNVER1-LABEL: test_subps:
   4627 ; ZNVER1:       # %bb.0:
   4628 ; ZNVER1-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4629 ; ZNVER1-NEXT:    vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
   4630 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4631   %1 = fsub <8 x float> %a0, %a1
   4632   %2 = load <8 x float>, <8 x float> *%a2, align 32
   4633   %3 = fsub <8 x float> %1, %2
   4634   ret <8 x float> %3
   4635 }
   4636 
   4637 define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
   4638 ; GENERIC-LABEL: test_testpd:
   4639 ; GENERIC:       # %bb.0:
   4640 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   4641 ; GENERIC-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:1.00]
   4642 ; GENERIC-NEXT:    setb %al # sched: [1:0.50]
   4643 ; GENERIC-NEXT:    vtestpd (%rdi), %xmm0 # sched: [7:1.00]
   4644 ; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
   4645 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4646 ;
   4647 ; SANDY-LABEL: test_testpd:
   4648 ; SANDY:       # %bb.0:
   4649 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   4650 ; SANDY-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:1.00]
   4651 ; SANDY-NEXT:    setb %al # sched: [1:0.50]
   4652 ; SANDY-NEXT:    vtestpd (%rdi), %xmm0 # sched: [7:1.00]
   4653 ; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
   4654 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4655 ;
   4656 ; HASWELL-LABEL: test_testpd:
   4657 ; HASWELL:       # %bb.0:
   4658 ; HASWELL-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4659 ; HASWELL-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:1.00]
   4660 ; HASWELL-NEXT:    setb %al # sched: [1:0.50]
   4661 ; HASWELL-NEXT:    vtestpd (%rdi), %xmm0 # sched: [7:1.00]
   4662 ; HASWELL-NEXT:    adcl $0, %eax # sched: [2:0.50]
   4663 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4664 ;
   4665 ; BROADWELL-LABEL: test_testpd:
   4666 ; BROADWELL:       # %bb.0:
   4667 ; BROADWELL-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4668 ; BROADWELL-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:1.00]
   4669 ; BROADWELL-NEXT:    setb %al # sched: [1:0.50]
   4670 ; BROADWELL-NEXT:    vtestpd (%rdi), %xmm0 # sched: [6:1.00]
   4671 ; BROADWELL-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4672 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4673 ;
   4674 ; SKYLAKE-LABEL: test_testpd:
   4675 ; SKYLAKE:       # %bb.0:
   4676 ; SKYLAKE-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4677 ; SKYLAKE-NEXT:    vtestpd %xmm1, %xmm0 # sched: [2:1.00]
   4678 ; SKYLAKE-NEXT:    setb %al # sched: [1:0.50]
   4679 ; SKYLAKE-NEXT:    vtestpd (%rdi), %xmm0 # sched: [8:1.00]
   4680 ; SKYLAKE-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4681 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4682 ;
   4683 ; SKX-LABEL: test_testpd:
   4684 ; SKX:       # %bb.0:
   4685 ; SKX-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4686 ; SKX-NEXT:    vtestpd %xmm1, %xmm0 # sched: [2:1.00]
   4687 ; SKX-NEXT:    setb %al # sched: [1:0.50]
   4688 ; SKX-NEXT:    vtestpd (%rdi), %xmm0 # sched: [8:1.00]
   4689 ; SKX-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4690 ; SKX-NEXT:    retq # sched: [7:1.00]
   4691 ;
   4692 ; BTVER2-LABEL: test_testpd:
   4693 ; BTVER2:       # %bb.0:
   4694 ; BTVER2-NEXT:    xorl %eax, %eax # sched: [0:0.50]
   4695 ; BTVER2-NEXT:    vtestpd %xmm1, %xmm0 # sched: [3:1.00]
   4696 ; BTVER2-NEXT:    setb %al # sched: [1:0.50]
   4697 ; BTVER2-NEXT:    vtestpd (%rdi), %xmm0 # sched: [8:1.00]
   4698 ; BTVER2-NEXT:    adcl $0, %eax # sched: [1:1.00]
   4699 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4700 ;
   4701 ; ZNVER1-LABEL: test_testpd:
   4702 ; ZNVER1:       # %bb.0:
   4703 ; ZNVER1-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4704 ; ZNVER1-NEXT:    vtestpd %xmm1, %xmm0 # sched: [1:0.25]
   4705 ; ZNVER1-NEXT:    setb %al # sched: [1:0.25]
   4706 ; ZNVER1-NEXT:    vtestpd (%rdi), %xmm0 # sched: [8:0.50]
   4707 ; ZNVER1-NEXT:    adcl $0, %eax # sched: [1:0.25]
   4708 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4709   %1 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1)
   4710   %2 = load <2 x double>, <2 x double> *%a2, align 16
   4711   %3 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %2)
   4712   %4 = add i32 %1, %3
   4713   ret i32 %4
   4714 }
   4715 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
   4716 
   4717 define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   4718 ; GENERIC-LABEL: test_testpd_ymm:
   4719 ; GENERIC:       # %bb.0:
   4720 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   4721 ; GENERIC-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:1.00]
   4722 ; GENERIC-NEXT:    setb %al # sched: [1:0.50]
   4723 ; GENERIC-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
   4724 ; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
   4725 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   4726 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4727 ;
   4728 ; SANDY-LABEL: test_testpd_ymm:
   4729 ; SANDY:       # %bb.0:
   4730 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   4731 ; SANDY-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:1.00]
   4732 ; SANDY-NEXT:    setb %al # sched: [1:0.50]
   4733 ; SANDY-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
   4734 ; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
   4735 ; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
   4736 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4737 ;
   4738 ; HASWELL-LABEL: test_testpd_ymm:
   4739 ; HASWELL:       # %bb.0:
   4740 ; HASWELL-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4741 ; HASWELL-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:1.00]
   4742 ; HASWELL-NEXT:    setb %al # sched: [1:0.50]
   4743 ; HASWELL-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:1.00]
   4744 ; HASWELL-NEXT:    adcl $0, %eax # sched: [2:0.50]
   4745 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   4746 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4747 ;
   4748 ; BROADWELL-LABEL: test_testpd_ymm:
   4749 ; BROADWELL:       # %bb.0:
   4750 ; BROADWELL-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4751 ; BROADWELL-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:1.00]
   4752 ; BROADWELL-NEXT:    setb %al # sched: [1:0.50]
   4753 ; BROADWELL-NEXT:    vtestpd (%rdi), %ymm0 # sched: [7:1.00]
   4754 ; BROADWELL-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4755 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   4756 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4757 ;
   4758 ; SKYLAKE-LABEL: test_testpd_ymm:
   4759 ; SKYLAKE:       # %bb.0:
   4760 ; SKYLAKE-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4761 ; SKYLAKE-NEXT:    vtestpd %ymm1, %ymm0 # sched: [2:1.00]
   4762 ; SKYLAKE-NEXT:    setb %al # sched: [1:0.50]
   4763 ; SKYLAKE-NEXT:    vtestpd (%rdi), %ymm0 # sched: [9:1.00]
   4764 ; SKYLAKE-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4765 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   4766 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4767 ;
   4768 ; SKX-LABEL: test_testpd_ymm:
   4769 ; SKX:       # %bb.0:
   4770 ; SKX-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4771 ; SKX-NEXT:    vtestpd %ymm1, %ymm0 # sched: [2:1.00]
   4772 ; SKX-NEXT:    setb %al # sched: [1:0.50]
   4773 ; SKX-NEXT:    vtestpd (%rdi), %ymm0 # sched: [9:1.00]
   4774 ; SKX-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4775 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   4776 ; SKX-NEXT:    retq # sched: [7:1.00]
   4777 ;
   4778 ; BTVER2-LABEL: test_testpd_ymm:
   4779 ; BTVER2:       # %bb.0:
   4780 ; BTVER2-NEXT:    xorl %eax, %eax # sched: [0:0.50]
   4781 ; BTVER2-NEXT:    vtestpd %ymm1, %ymm0 # sched: [4:2.00]
   4782 ; BTVER2-NEXT:    setb %al # sched: [1:0.50]
   4783 ; BTVER2-NEXT:    vtestpd (%rdi), %ymm0 # sched: [9:2.00]
   4784 ; BTVER2-NEXT:    adcl $0, %eax # sched: [1:1.00]
   4785 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4786 ;
   4787 ; ZNVER1-LABEL: test_testpd_ymm:
   4788 ; ZNVER1:       # %bb.0:
   4789 ; ZNVER1-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4790 ; ZNVER1-NEXT:    vtestpd %ymm1, %ymm0 # sched: [1:0.25]
   4791 ; ZNVER1-NEXT:    setb %al # sched: [1:0.25]
   4792 ; ZNVER1-NEXT:    vtestpd (%rdi), %ymm0 # sched: [8:0.50]
   4793 ; ZNVER1-NEXT:    adcl $0, %eax # sched: [1:0.25]
   4794 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   4795 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4796   %1 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1)
   4797   %2 = load <4 x double>, <4 x double> *%a2, align 32
   4798   %3 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %2)
   4799   %4 = add i32 %1, %3
   4800   ret i32 %4
   4801 }
   4802 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
   4803 
   4804 define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
   4805 ; GENERIC-LABEL: test_testps:
   4806 ; GENERIC:       # %bb.0:
   4807 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   4808 ; GENERIC-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:1.00]
   4809 ; GENERIC-NEXT:    setb %al # sched: [1:0.50]
   4810 ; GENERIC-NEXT:    vtestps (%rdi), %xmm0 # sched: [7:1.00]
   4811 ; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
   4812 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4813 ;
   4814 ; SANDY-LABEL: test_testps:
   4815 ; SANDY:       # %bb.0:
   4816 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   4817 ; SANDY-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:1.00]
   4818 ; SANDY-NEXT:    setb %al # sched: [1:0.50]
   4819 ; SANDY-NEXT:    vtestps (%rdi), %xmm0 # sched: [7:1.00]
   4820 ; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
   4821 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4822 ;
   4823 ; HASWELL-LABEL: test_testps:
   4824 ; HASWELL:       # %bb.0:
   4825 ; HASWELL-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4826 ; HASWELL-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:1.00]
   4827 ; HASWELL-NEXT:    setb %al # sched: [1:0.50]
   4828 ; HASWELL-NEXT:    vtestps (%rdi), %xmm0 # sched: [7:1.00]
   4829 ; HASWELL-NEXT:    adcl $0, %eax # sched: [2:0.50]
   4830 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4831 ;
   4832 ; BROADWELL-LABEL: test_testps:
   4833 ; BROADWELL:       # %bb.0:
   4834 ; BROADWELL-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4835 ; BROADWELL-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:1.00]
   4836 ; BROADWELL-NEXT:    setb %al # sched: [1:0.50]
   4837 ; BROADWELL-NEXT:    vtestps (%rdi), %xmm0 # sched: [6:1.00]
   4838 ; BROADWELL-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4839 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4840 ;
   4841 ; SKYLAKE-LABEL: test_testps:
   4842 ; SKYLAKE:       # %bb.0:
   4843 ; SKYLAKE-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4844 ; SKYLAKE-NEXT:    vtestps %xmm1, %xmm0 # sched: [2:1.00]
   4845 ; SKYLAKE-NEXT:    setb %al # sched: [1:0.50]
   4846 ; SKYLAKE-NEXT:    vtestps (%rdi), %xmm0 # sched: [8:1.00]
   4847 ; SKYLAKE-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4848 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4849 ;
   4850 ; SKX-LABEL: test_testps:
   4851 ; SKX:       # %bb.0:
   4852 ; SKX-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4853 ; SKX-NEXT:    vtestps %xmm1, %xmm0 # sched: [2:1.00]
   4854 ; SKX-NEXT:    setb %al # sched: [1:0.50]
   4855 ; SKX-NEXT:    vtestps (%rdi), %xmm0 # sched: [8:1.00]
   4856 ; SKX-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4857 ; SKX-NEXT:    retq # sched: [7:1.00]
   4858 ;
   4859 ; BTVER2-LABEL: test_testps:
   4860 ; BTVER2:       # %bb.0:
   4861 ; BTVER2-NEXT:    xorl %eax, %eax # sched: [0:0.50]
   4862 ; BTVER2-NEXT:    vtestps %xmm1, %xmm0 # sched: [3:1.00]
   4863 ; BTVER2-NEXT:    setb %al # sched: [1:0.50]
   4864 ; BTVER2-NEXT:    vtestps (%rdi), %xmm0 # sched: [8:1.00]
   4865 ; BTVER2-NEXT:    adcl $0, %eax # sched: [1:1.00]
   4866 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4867 ;
   4868 ; ZNVER1-LABEL: test_testps:
   4869 ; ZNVER1:       # %bb.0:
   4870 ; ZNVER1-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4871 ; ZNVER1-NEXT:    vtestps %xmm1, %xmm0 # sched: [1:0.25]
   4872 ; ZNVER1-NEXT:    setb %al # sched: [1:0.25]
   4873 ; ZNVER1-NEXT:    vtestps (%rdi), %xmm0 # sched: [8:0.50]
   4874 ; ZNVER1-NEXT:    adcl $0, %eax # sched: [1:0.25]
   4875 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4876   %1 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1)
   4877   %2 = load <4 x float>, <4 x float> *%a2, align 16
   4878   %3 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %2)
   4879   %4 = add i32 %1, %3
   4880   ret i32 %4
   4881 }
   4882 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
   4883 
   4884 define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   4885 ; GENERIC-LABEL: test_testps_ymm:
   4886 ; GENERIC:       # %bb.0:
   4887 ; GENERIC-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   4888 ; GENERIC-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:1.00]
   4889 ; GENERIC-NEXT:    setb %al # sched: [1:0.50]
   4890 ; GENERIC-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
   4891 ; GENERIC-NEXT:    adcl $0, %eax # sched: [2:0.67]
   4892 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   4893 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4894 ;
   4895 ; SANDY-LABEL: test_testps_ymm:
   4896 ; SANDY:       # %bb.0:
   4897 ; SANDY-NEXT:    xorl %eax, %eax # sched: [1:0.33]
   4898 ; SANDY-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:1.00]
   4899 ; SANDY-NEXT:    setb %al # sched: [1:0.50]
   4900 ; SANDY-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
   4901 ; SANDY-NEXT:    adcl $0, %eax # sched: [2:0.67]
   4902 ; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
   4903 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4904 ;
   4905 ; HASWELL-LABEL: test_testps_ymm:
   4906 ; HASWELL:       # %bb.0:
   4907 ; HASWELL-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4908 ; HASWELL-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:1.00]
   4909 ; HASWELL-NEXT:    setb %al # sched: [1:0.50]
   4910 ; HASWELL-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:1.00]
   4911 ; HASWELL-NEXT:    adcl $0, %eax # sched: [2:0.50]
   4912 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   4913 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4914 ;
   4915 ; BROADWELL-LABEL: test_testps_ymm:
   4916 ; BROADWELL:       # %bb.0:
   4917 ; BROADWELL-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4918 ; BROADWELL-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:1.00]
   4919 ; BROADWELL-NEXT:    setb %al # sched: [1:0.50]
   4920 ; BROADWELL-NEXT:    vtestps (%rdi), %ymm0 # sched: [7:1.00]
   4921 ; BROADWELL-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4922 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   4923 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4924 ;
   4925 ; SKYLAKE-LABEL: test_testps_ymm:
   4926 ; SKYLAKE:       # %bb.0:
   4927 ; SKYLAKE-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4928 ; SKYLAKE-NEXT:    vtestps %ymm1, %ymm0 # sched: [2:1.00]
   4929 ; SKYLAKE-NEXT:    setb %al # sched: [1:0.50]
   4930 ; SKYLAKE-NEXT:    vtestps (%rdi), %ymm0 # sched: [9:1.00]
   4931 ; SKYLAKE-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4932 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   4933 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   4934 ;
   4935 ; SKX-LABEL: test_testps_ymm:
   4936 ; SKX:       # %bb.0:
   4937 ; SKX-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4938 ; SKX-NEXT:    vtestps %ymm1, %ymm0 # sched: [2:1.00]
   4939 ; SKX-NEXT:    setb %al # sched: [1:0.50]
   4940 ; SKX-NEXT:    vtestps (%rdi), %ymm0 # sched: [9:1.00]
   4941 ; SKX-NEXT:    adcl $0, %eax # sched: [1:0.50]
   4942 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   4943 ; SKX-NEXT:    retq # sched: [7:1.00]
   4944 ;
   4945 ; BTVER2-LABEL: test_testps_ymm:
   4946 ; BTVER2:       # %bb.0:
   4947 ; BTVER2-NEXT:    xorl %eax, %eax # sched: [0:0.50]
   4948 ; BTVER2-NEXT:    vtestps %ymm1, %ymm0 # sched: [4:2.00]
   4949 ; BTVER2-NEXT:    setb %al # sched: [1:0.50]
   4950 ; BTVER2-NEXT:    vtestps (%rdi), %ymm0 # sched: [9:2.00]
   4951 ; BTVER2-NEXT:    adcl $0, %eax # sched: [1:1.00]
   4952 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   4953 ;
   4954 ; ZNVER1-LABEL: test_testps_ymm:
   4955 ; ZNVER1:       # %bb.0:
   4956 ; ZNVER1-NEXT:    xorl %eax, %eax # sched: [1:0.25]
   4957 ; ZNVER1-NEXT:    vtestps %ymm1, %ymm0 # sched: [1:0.25]
   4958 ; ZNVER1-NEXT:    setb %al # sched: [1:0.25]
   4959 ; ZNVER1-NEXT:    vtestps (%rdi), %ymm0 # sched: [8:0.50]
   4960 ; ZNVER1-NEXT:    adcl $0, %eax # sched: [1:0.25]
   4961 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   4962 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   4963   %1 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1)
   4964   %2 = load <8 x float>, <8 x float> *%a2, align 32
   4965   %3 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %2)
   4966   %4 = add i32 %1, %3
   4967   ret i32 %4
   4968 }
   4969 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
   4970 
   4971 define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   4972 ; GENERIC-LABEL: test_unpckhpd:
   4973 ; GENERIC:       # %bb.0:
   4974 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
   4975 ; GENERIC-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
   4976 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4977 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   4978 ;
   4979 ; SANDY-LABEL: test_unpckhpd:
   4980 ; SANDY:       # %bb.0:
   4981 ; SANDY-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
   4982 ; SANDY-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
   4983 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4984 ; SANDY-NEXT:    retq # sched: [1:1.00]
   4985 ;
   4986 ; HASWELL-LABEL: test_unpckhpd:
   4987 ; HASWELL:       # %bb.0:
   4988 ; HASWELL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
   4989 ; HASWELL-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
   4990 ; HASWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4991 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   4992 ;
   4993 ; BROADWELL-LABEL: test_unpckhpd:
   4994 ; BROADWELL:       # %bb.0:
   4995 ; BROADWELL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
   4996 ; BROADWELL-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [7:1.00]
   4997 ; BROADWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   4998 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   4999 ;
   5000 ; SKYLAKE-LABEL: test_unpckhpd:
   5001 ; SKYLAKE:       # %bb.0:
   5002 ; SKYLAKE-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
   5003 ; SKYLAKE-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
   5004 ; SKYLAKE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   5005 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5006 ;
   5007 ; SKX-LABEL: test_unpckhpd:
   5008 ; SKX:       # %bb.0:
   5009 ; SKX-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
   5010 ; SKX-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
   5011 ; SKX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   5012 ; SKX-NEXT:    retq # sched: [7:1.00]
   5013 ;
   5014 ; BTVER2-LABEL: test_unpckhpd:
   5015 ; BTVER2:       # %bb.0:
   5016 ; BTVER2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
   5017 ; BTVER2-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [6:2.00]
   5018 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   5019 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5020 ;
   5021 ; ZNVER1-LABEL: test_unpckhpd:
   5022 ; ZNVER1:       # %bb.0:
   5023 ; ZNVER1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50]
   5024 ; ZNVER1-NEXT:    vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:0.50]
   5025 ; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   5026 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5027   %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   5028   %2 = load <4 x double>, <4 x double> *%a2, align 32
   5029   %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   5030   %4 = fadd <4 x double> %1, %3
   5031   ret <4 x double> %4
   5032 }
   5033 
   5034 define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
   5035 ; GENERIC-LABEL: test_unpckhps:
   5036 ; GENERIC:       # %bb.0:
   5037 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
   5038 ; GENERIC-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
   5039 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5040 ;
   5041 ; SANDY-LABEL: test_unpckhps:
   5042 ; SANDY:       # %bb.0:
   5043 ; SANDY-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
   5044 ; SANDY-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
   5045 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5046 ;
   5047 ; HASWELL-LABEL: test_unpckhps:
   5048 ; HASWELL:       # %bb.0:
   5049 ; HASWELL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
   5050 ; HASWELL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
   5051 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5052 ;
   5053 ; BROADWELL-LABEL: test_unpckhps:
   5054 ; BROADWELL:       # %bb.0:
   5055 ; BROADWELL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
   5056 ; BROADWELL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
   5057 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5058 ;
   5059 ; SKYLAKE-LABEL: test_unpckhps:
   5060 ; SKYLAKE:       # %bb.0:
   5061 ; SKYLAKE-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
   5062 ; SKYLAKE-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
   5063 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5064 ;
   5065 ; SKX-LABEL: test_unpckhps:
   5066 ; SKX:       # %bb.0:
   5067 ; SKX-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
   5068 ; SKX-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
   5069 ; SKX-NEXT:    retq # sched: [7:1.00]
   5070 ;
   5071 ; BTVER2-LABEL: test_unpckhps:
   5072 ; BTVER2:       # %bb.0:
   5073 ; BTVER2-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
   5074 ; BTVER2-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:2.00]
   5075 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5076 ;
   5077 ; ZNVER1-LABEL: test_unpckhps:
   5078 ; ZNVER1:       # %bb.0:
   5079 ; ZNVER1-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50]
   5080 ; ZNVER1-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
   5081 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5082   %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   5083   %2 = load <8 x float>, <8 x float> *%a2, align 32
   5084   %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   5085   ret <8 x float> %3
   5086 }
   5087 
   5088 define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   5089 ; GENERIC-LABEL: test_unpcklpd:
   5090 ; GENERIC:       # %bb.0:
   5091 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
   5092 ; GENERIC-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
   5093 ; GENERIC-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   5094 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5095 ;
   5096 ; SANDY-LABEL: test_unpcklpd:
   5097 ; SANDY:       # %bb.0:
   5098 ; SANDY-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
   5099 ; SANDY-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
   5100 ; SANDY-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   5101 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5102 ;
   5103 ; HASWELL-LABEL: test_unpcklpd:
   5104 ; HASWELL:       # %bb.0:
   5105 ; HASWELL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
   5106 ; HASWELL-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
   5107 ; HASWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   5108 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5109 ;
   5110 ; BROADWELL-LABEL: test_unpcklpd:
   5111 ; BROADWELL:       # %bb.0:
   5112 ; BROADWELL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
   5113 ; BROADWELL-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [7:1.00]
   5114 ; BROADWELL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   5115 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5116 ;
   5117 ; SKYLAKE-LABEL: test_unpcklpd:
   5118 ; SKYLAKE:       # %bb.0:
   5119 ; SKYLAKE-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
   5120 ; SKYLAKE-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
   5121 ; SKYLAKE-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   5122 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5123 ;
   5124 ; SKX-LABEL: test_unpcklpd:
   5125 ; SKX:       # %bb.0:
   5126 ; SKX-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
   5127 ; SKX-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
   5128 ; SKX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
   5129 ; SKX-NEXT:    retq # sched: [7:1.00]
   5130 ;
   5131 ; BTVER2-LABEL: test_unpcklpd:
   5132 ; BTVER2:       # %bb.0:
   5133 ; BTVER2-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
   5134 ; BTVER2-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [6:2.00]
   5135 ; BTVER2-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
   5136 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5137 ;
   5138 ; ZNVER1-LABEL: test_unpcklpd:
   5139 ; ZNVER1:       # %bb.0:
   5140 ; ZNVER1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50]
   5141 ; ZNVER1-NEXT:    vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:0.50]
   5142 ; ZNVER1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
   5143 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5144   %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   5145   %2 = load <4 x double>, <4 x double> *%a2, align 32
   5146   %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   5147   %4 = fadd <4 x double> %1, %3
   5148   ret <4 x double> %4
   5149 }
   5150 
   5151 define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
   5152 ; GENERIC-LABEL: test_unpcklps:
   5153 ; GENERIC:       # %bb.0:
   5154 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
   5155 ; GENERIC-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
   5156 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5157 ;
   5158 ; SANDY-LABEL: test_unpcklps:
   5159 ; SANDY:       # %bb.0:
   5160 ; SANDY-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
   5161 ; SANDY-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
   5162 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5163 ;
   5164 ; HASWELL-LABEL: test_unpcklps:
   5165 ; HASWELL:       # %bb.0:
   5166 ; HASWELL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
   5167 ; HASWELL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
   5168 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5169 ;
   5170 ; BROADWELL-LABEL: test_unpcklps:
   5171 ; BROADWELL:       # %bb.0:
   5172 ; BROADWELL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
   5173 ; BROADWELL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
   5174 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5175 ;
   5176 ; SKYLAKE-LABEL: test_unpcklps:
   5177 ; SKYLAKE:       # %bb.0:
   5178 ; SKYLAKE-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
   5179 ; SKYLAKE-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
   5180 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5181 ;
   5182 ; SKX-LABEL: test_unpcklps:
   5183 ; SKX:       # %bb.0:
   5184 ; SKX-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
   5185 ; SKX-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
   5186 ; SKX-NEXT:    retq # sched: [7:1.00]
   5187 ;
   5188 ; BTVER2-LABEL: test_unpcklps:
   5189 ; BTVER2:       # %bb.0:
   5190 ; BTVER2-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
   5191 ; BTVER2-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:2.00]
   5192 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5193 ;
   5194 ; ZNVER1-LABEL: test_unpcklps:
   5195 ; ZNVER1:       # %bb.0:
   5196 ; ZNVER1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50]
   5197 ; ZNVER1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
   5198 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5199   %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   5200   %2 = load <8 x float>, <8 x float> *%a2, align 32
   5201   %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   5202   ret <8 x float> %3
   5203 }
   5204 
   5205 define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
   5206 ; GENERIC-LABEL: test_xorpd:
   5207 ; GENERIC:       # %bb.0:
   5208 ; GENERIC-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5209 ; GENERIC-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   5210 ; GENERIC-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5211 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5212 ;
   5213 ; SANDY-LABEL: test_xorpd:
   5214 ; SANDY:       # %bb.0:
   5215 ; SANDY-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5216 ; SANDY-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   5217 ; SANDY-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5218 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5219 ;
   5220 ; HASWELL-LABEL: test_xorpd:
   5221 ; HASWELL:       # %bb.0:
   5222 ; HASWELL-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5223 ; HASWELL-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   5224 ; HASWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5225 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5226 ;
   5227 ; BROADWELL-LABEL: test_xorpd:
   5228 ; BROADWELL:       # %bb.0:
   5229 ; BROADWELL-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5230 ; BROADWELL-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
   5231 ; BROADWELL-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5232 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5233 ;
   5234 ; SKYLAKE-LABEL: test_xorpd:
   5235 ; SKYLAKE:       # %bb.0:
   5236 ; SKYLAKE-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   5237 ; SKYLAKE-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   5238 ; SKYLAKE-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   5239 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5240 ;
   5241 ; SKX-LABEL: test_xorpd:
   5242 ; SKX:       # %bb.0:
   5243 ; SKX-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   5244 ; SKX-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   5245 ; SKX-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   5246 ; SKX-NEXT:    retq # sched: [7:1.00]
   5247 ;
   5248 ; BTVER2-LABEL: test_xorpd:
   5249 ; BTVER2:       # %bb.0:
   5250 ; BTVER2-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5251 ; BTVER2-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
   5252 ; BTVER2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
   5253 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5254 ;
   5255 ; ZNVER1-LABEL: test_xorpd:
   5256 ; ZNVER1:       # %bb.0:
   5257 ; ZNVER1-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
   5258 ; ZNVER1-NEXT:    vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   5259 ; ZNVER1-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5260 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5261   %1 = bitcast <4 x double> %a0 to <4 x i64>
   5262   %2 = bitcast <4 x double> %a1 to <4 x i64>
   5263   %3 = xor <4 x i64> %1, %2
   5264   %4 = load <4 x double>, <4 x double> *%a2, align 32
   5265   %5 = bitcast <4 x double> %4 to <4 x i64>
   5266   %6 = xor <4 x i64> %3, %5
   5267   %7 = bitcast <4 x i64> %6 to <4 x double>
   5268   %8 = fadd <4 x double> %a1, %7
   5269   ret <4 x double> %8
   5270 }
   5271 
   5272 define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
   5273 ; GENERIC-LABEL: test_xorps:
   5274 ; GENERIC:       # %bb.0:
   5275 ; GENERIC-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5276 ; GENERIC-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   5277 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5278 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5279 ;
   5280 ; SANDY-LABEL: test_xorps:
   5281 ; SANDY:       # %bb.0:
   5282 ; SANDY-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5283 ; SANDY-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   5284 ; SANDY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5285 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5286 ;
   5287 ; HASWELL-LABEL: test_xorps:
   5288 ; HASWELL:       # %bb.0:
   5289 ; HASWELL-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5290 ; HASWELL-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
   5291 ; HASWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5292 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5293 ;
   5294 ; BROADWELL-LABEL: test_xorps:
   5295 ; BROADWELL:       # %bb.0:
   5296 ; BROADWELL-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5297 ; BROADWELL-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
   5298 ; BROADWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5299 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5300 ;
   5301 ; SKYLAKE-LABEL: test_xorps:
   5302 ; SKYLAKE:       # %bb.0:
   5303 ; SKYLAKE-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   5304 ; SKYLAKE-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   5305 ; SKYLAKE-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   5306 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5307 ;
   5308 ; SKX-LABEL: test_xorps:
   5309 ; SKX:       # %bb.0:
   5310 ; SKX-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
   5311 ; SKX-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   5312 ; SKX-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
   5313 ; SKX-NEXT:    retq # sched: [7:1.00]
   5314 ;
   5315 ; BTVER2-LABEL: test_xorps:
   5316 ; BTVER2:       # %bb.0:
   5317 ; BTVER2-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
   5318 ; BTVER2-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
   5319 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
   5320 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5321 ;
   5322 ; ZNVER1-LABEL: test_xorps:
   5323 ; ZNVER1:       # %bb.0:
   5324 ; ZNVER1-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
   5325 ; ZNVER1-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
   5326 ; ZNVER1-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
   5327 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5328   %1 = bitcast <8 x float> %a0 to <4 x i64>
   5329   %2 = bitcast <8 x float> %a1 to <4 x i64>
   5330   %3 = xor <4 x i64> %1, %2
   5331   %4 = load <8 x float>, <8 x float> *%a2, align 32
   5332   %5 = bitcast <8 x float> %4 to <4 x i64>
   5333   %6 = xor <4 x i64> %3, %5
   5334   %7 = bitcast <4 x i64> %6 to <8 x float>
   5335   %8 = fadd <8 x float> %a1, %7
   5336   ret <8 x float> %8
   5337 }
   5338 
   5339 define void @test_zeroall() {
   5340 ; GENERIC-LABEL: test_zeroall:
   5341 ; GENERIC:       # %bb.0:
   5342 ; GENERIC-NEXT:    vzeroall # sched: [100:0.33]
   5343 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5344 ;
   5345 ; SANDY-LABEL: test_zeroall:
   5346 ; SANDY:       # %bb.0:
   5347 ; SANDY-NEXT:    vzeroall # sched: [100:0.33]
   5348 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5349 ;
   5350 ; HASWELL-LABEL: test_zeroall:
   5351 ; HASWELL:       # %bb.0:
   5352 ; HASWELL-NEXT:    vzeroall # sched: [16:16.00]
   5353 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5354 ;
   5355 ; BROADWELL-LABEL: test_zeroall:
   5356 ; BROADWELL:       # %bb.0:
   5357 ; BROADWELL-NEXT:    vzeroall # sched: [16:16.00]
   5358 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5359 ;
   5360 ; SKYLAKE-LABEL: test_zeroall:
   5361 ; SKYLAKE:       # %bb.0:
   5362 ; SKYLAKE-NEXT:    vzeroall # sched: [16:4.00]
   5363 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5364 ;
   5365 ; SKX-LABEL: test_zeroall:
   5366 ; SKX:       # %bb.0:
   5367 ; SKX-NEXT:    vzeroall # sched: [16:4.00]
   5368 ; SKX-NEXT:    retq # sched: [7:1.00]
   5369 ;
   5370 ; BTVER2-LABEL: test_zeroall:
   5371 ; BTVER2:       # %bb.0:
   5372 ; BTVER2-NEXT:    vzeroall # sched: [90:36.50]
   5373 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5374 ;
   5375 ; ZNVER1-LABEL: test_zeroall:
   5376 ; ZNVER1:       # %bb.0:
   5377 ; ZNVER1-NEXT:    vzeroall # sched: [100:0.25]
   5378 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5379   call void @llvm.x86.avx.vzeroall()
   5380   ret void
   5381 }
   5382 declare void @llvm.x86.avx.vzeroall() nounwind
   5383 
   5384 define void @test_zeroupper() {
   5385 ; GENERIC-LABEL: test_zeroupper:
   5386 ; GENERIC:       # %bb.0:
   5387 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
   5388 ; GENERIC-NEXT:    retq # sched: [1:1.00]
   5389 ;
   5390 ; SANDY-LABEL: test_zeroupper:
   5391 ; SANDY:       # %bb.0:
   5392 ; SANDY-NEXT:    vzeroupper # sched: [100:0.33]
   5393 ; SANDY-NEXT:    retq # sched: [1:1.00]
   5394 ;
   5395 ; HASWELL-LABEL: test_zeroupper:
   5396 ; HASWELL:       # %bb.0:
   5397 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
   5398 ; HASWELL-NEXT:    retq # sched: [7:1.00]
   5399 ;
   5400 ; BROADWELL-LABEL: test_zeroupper:
   5401 ; BROADWELL:       # %bb.0:
   5402 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
   5403 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
   5404 ;
   5405 ; SKYLAKE-LABEL: test_zeroupper:
   5406 ; SKYLAKE:       # %bb.0:
   5407 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
   5408 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
   5409 ;
   5410 ; SKX-LABEL: test_zeroupper:
   5411 ; SKX:       # %bb.0:
   5412 ; SKX-NEXT:    vzeroupper # sched: [4:1.00]
   5413 ; SKX-NEXT:    retq # sched: [7:1.00]
   5414 ;
   5415 ; BTVER2-LABEL: test_zeroupper:
   5416 ; BTVER2:       # %bb.0:
   5417 ; BTVER2-NEXT:    vzeroupper # sched: [46:18.50]
   5418 ; BTVER2-NEXT:    retq # sched: [4:1.00]
   5419 ;
   5420 ; ZNVER1-LABEL: test_zeroupper:
   5421 ; ZNVER1:       # %bb.0:
   5422 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
   5423 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
   5424   call void @llvm.x86.avx.vzeroupper()
   5425   ret void
   5426 }
   5427 declare void @llvm.x86.avx.vzeroupper() nounwind
   5428 
   5429 !0 = !{i32 1}
   5430