1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 11 12 define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 13 ; GENERIC-LABEL: test_addpd: 14 ; GENERIC: # %bb.0: 15 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 16 ; GENERIC-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 17 ; GENERIC-NEXT: retq # sched: [1:1.00] 18 ; 19 ; SANDY-LABEL: test_addpd: 20 ; SANDY: # %bb.0: 21 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 22 ; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 23 ; SANDY-NEXT: retq # sched: [1:1.00] 24 ; 25 ; HASWELL-LABEL: test_addpd: 26 ; HASWELL: # %bb.0: 27 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 28 ; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 29 ; HASWELL-NEXT: retq # sched: [7:1.00] 30 ; 31 ; BROADWELL-LABEL: test_addpd: 32 ; BROADWELL: # %bb.0: 33 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 34 ; BROADWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 35 ; BROADWELL-NEXT: retq # sched: [7:1.00] 36 ; 37 ; SKYLAKE-LABEL: test_addpd: 38 ; SKYLAKE: # %bb.0: 39 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 40 ; SKYLAKE-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 41 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 42 ; 43 ; SKX-LABEL: test_addpd: 44 ; SKX: # %bb.0: 45 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 46 ; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 47 ; SKX-NEXT: retq # sched: [7:1.00] 48 ; 49 ; BTVER2-LABEL: test_addpd: 50 ; BTVER2: # %bb.0: 51 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 52 ; BTVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 53 ; BTVER2-NEXT: retq # sched: [4:1.00] 54 ; 55 ; ZNVER1-LABEL: test_addpd: 56 ; ZNVER1: # %bb.0: 57 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 58 ; ZNVER1-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 59 ; ZNVER1-NEXT: retq # sched: [1:0.50] 60 %1 = fadd <4 x double> %a0, %a1 61 %2 = load <4 x double>, <4 x double> *%a2, align 32 62 %3 = fadd <4 x double> %1, %2 63 ret <4 x double> %3 64 } 65 66 define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 67 ; GENERIC-LABEL: test_addps: 68 ; GENERIC: # %bb.0: 69 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 70 ; GENERIC-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 71 ; GENERIC-NEXT: retq # sched: [1:1.00] 72 ; 73 ; SANDY-LABEL: test_addps: 74 ; SANDY: # %bb.0: 75 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 76 ; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 77 ; SANDY-NEXT: retq # sched: [1:1.00] 78 ; 79 ; HASWELL-LABEL: test_addps: 80 ; HASWELL: # %bb.0: 81 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 82 ; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 83 ; HASWELL-NEXT: retq # sched: [7:1.00] 84 ; 85 ; BROADWELL-LABEL: test_addps: 86 ; BROADWELL: # %bb.0: 87 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 88 ; BROADWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 89 ; BROADWELL-NEXT: retq # sched: [7:1.00] 90 ; 91 ; SKYLAKE-LABEL: test_addps: 92 ; SKYLAKE: # %bb.0: 93 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 94 ; SKYLAKE-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 95 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 96 ; 97 ; SKX-LABEL: test_addps: 98 ; SKX: # %bb.0: 99 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 100 ; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 101 ; SKX-NEXT: retq # sched: [7:1.00] 102 ; 103 ; BTVER2-LABEL: test_addps: 104 ; BTVER2: # %bb.0: 105 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 106 ; BTVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 107 ; BTVER2-NEXT: retq # sched: [4:1.00] 108 ; 109 ; ZNVER1-LABEL: test_addps: 110 ; ZNVER1: # %bb.0: 111 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 112 ; ZNVER1-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 113 ; ZNVER1-NEXT: retq # sched: [1:0.50] 114 %1 = fadd <8 x float> %a0, %a1 115 %2 = load <8 x float>, <8 x float> *%a2, align 32 116 %3 = fadd <8 x float> %1, %2 117 ret <8 x float> %3 118 } 119 120 define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 121 ; GENERIC-LABEL: test_addsubpd: 122 ; GENERIC: # %bb.0: 123 ; GENERIC-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 124 ; GENERIC-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 125 ; GENERIC-NEXT: retq # sched: [1:1.00] 126 ; 127 ; SANDY-LABEL: test_addsubpd: 128 ; SANDY: # %bb.0: 129 ; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 130 ; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 131 ; SANDY-NEXT: retq # sched: [1:1.00] 132 ; 133 ; HASWELL-LABEL: test_addsubpd: 134 ; HASWELL: # %bb.0: 135 ; HASWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 136 ; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 137 ; HASWELL-NEXT: retq # sched: [7:1.00] 138 ; 139 ; BROADWELL-LABEL: test_addsubpd: 140 ; BROADWELL: # %bb.0: 141 ; BROADWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 142 ; BROADWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 143 ; BROADWELL-NEXT: retq # sched: [7:1.00] 144 ; 145 ; SKYLAKE-LABEL: test_addsubpd: 146 ; SKYLAKE: # %bb.0: 147 ; SKYLAKE-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 148 ; SKYLAKE-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 149 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 150 ; 151 ; SKX-LABEL: test_addsubpd: 152 ; SKX: # %bb.0: 153 ; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 154 ; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 155 ; SKX-NEXT: retq # sched: [7:1.00] 156 ; 157 ; BTVER2-LABEL: test_addsubpd: 158 ; BTVER2: # %bb.0: 159 ; BTVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 160 ; BTVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 161 ; BTVER2-NEXT: retq # sched: [4:1.00] 162 ; 163 ; ZNVER1-LABEL: test_addsubpd: 164 ; ZNVER1: # %bb.0: 165 ; ZNVER1-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 166 ; ZNVER1-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 167 ; ZNVER1-NEXT: retq # sched: [1:0.50] 168 %1 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) 169 %2 = load <4 x double>, <4 x double> *%a2, align 32 170 %3 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %1, <4 x double> %2) 171 ret <4 x double> %3 172 } 173 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 174 175 define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 176 ; GENERIC-LABEL: test_addsubps: 177 ; GENERIC: # %bb.0: 178 ; GENERIC-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 179 ; GENERIC-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 180 ; GENERIC-NEXT: retq # sched: [1:1.00] 181 ; 182 ; SANDY-LABEL: test_addsubps: 183 ; SANDY: # %bb.0: 184 ; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 185 ; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 186 ; SANDY-NEXT: retq # sched: [1:1.00] 187 ; 188 ; HASWELL-LABEL: test_addsubps: 189 ; HASWELL: # %bb.0: 190 ; HASWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 191 ; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 192 ; HASWELL-NEXT: retq # sched: [7:1.00] 193 ; 194 ; BROADWELL-LABEL: test_addsubps: 195 ; BROADWELL: # %bb.0: 196 ; BROADWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 197 ; BROADWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 198 ; BROADWELL-NEXT: retq # sched: [7:1.00] 199 ; 200 ; SKYLAKE-LABEL: test_addsubps: 201 ; SKYLAKE: # %bb.0: 202 ; SKYLAKE-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 203 ; SKYLAKE-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 204 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 205 ; 206 ; SKX-LABEL: test_addsubps: 207 ; SKX: # %bb.0: 208 ; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 209 ; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 210 ; SKX-NEXT: retq # sched: [7:1.00] 211 ; 212 ; BTVER2-LABEL: test_addsubps: 213 ; BTVER2: # %bb.0: 214 ; BTVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 215 ; BTVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 216 ; BTVER2-NEXT: retq # sched: [4:1.00] 217 ; 218 ; ZNVER1-LABEL: test_addsubps: 219 ; ZNVER1: # %bb.0: 220 ; ZNVER1-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 221 ; ZNVER1-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 222 ; ZNVER1-NEXT: retq # sched: [1:0.50] 223 %1 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) 224 %2 = load <8 x float>, <8 x float> *%a2, align 32 225 %3 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %1, <8 x float> %2) 226 ret <8 x float> %3 227 } 228 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 229 230 define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 231 ; GENERIC-LABEL: test_andnotpd: 232 ; GENERIC: # %bb.0: 233 ; GENERIC-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 234 ; GENERIC-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 235 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 236 ; GENERIC-NEXT: retq # sched: [1:1.00] 237 ; 238 ; SANDY-LABEL: test_andnotpd: 239 ; SANDY: # %bb.0: 240 ; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 241 ; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 242 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 243 ; SANDY-NEXT: retq # sched: [1:1.00] 244 ; 245 ; HASWELL-LABEL: test_andnotpd: 246 ; HASWELL: # %bb.0: 247 ; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 248 ; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 249 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 250 ; HASWELL-NEXT: retq # sched: [7:1.00] 251 ; 252 ; BROADWELL-LABEL: test_andnotpd: 253 ; BROADWELL: # %bb.0: 254 ; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 255 ; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 256 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 257 ; BROADWELL-NEXT: retq # sched: [7:1.00] 258 ; 259 ; SKYLAKE-LABEL: test_andnotpd: 260 ; SKYLAKE: # %bb.0: 261 ; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 262 ; SKYLAKE-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 263 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 264 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 265 ; 266 ; SKX-LABEL: test_andnotpd: 267 ; SKX: # %bb.0: 268 ; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 269 ; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 270 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 271 ; SKX-NEXT: retq # sched: [7:1.00] 272 ; 273 ; BTVER2-LABEL: test_andnotpd: 274 ; BTVER2: # %bb.0: 275 ; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 276 ; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] 277 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 278 ; BTVER2-NEXT: retq # sched: [4:1.00] 279 ; 280 ; ZNVER1-LABEL: test_andnotpd: 281 ; ZNVER1: # %bb.0: 282 ; ZNVER1-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 283 ; ZNVER1-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 284 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 285 ; ZNVER1-NEXT: retq # sched: [1:0.50] 286 %1 = bitcast <4 x double> %a0 to <4 x i64> 287 %2 = bitcast <4 x double> %a1 to <4 x i64> 288 %3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1> 289 %4 = and <4 x i64> %3, %2 290 %5 = load <4 x double>, <4 x double> *%a2, align 32 291 %6 = bitcast <4 x double> %5 to <4 x i64> 292 %7 = xor <4 x i64> %4, <i64 -1, i64 -1, i64 -1, i64 -1> 293 %8 = and <4 x i64> %6, %7 294 %9 = bitcast <4 x i64> %8 to <4 x double> 295 %10 = fadd <4 x double> %a1, %9 296 ret <4 x double> %10 297 } 298 299 define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 300 ; GENERIC-LABEL: test_andnotps: 301 ; GENERIC: # %bb.0: 302 ; GENERIC-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 303 ; GENERIC-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 304 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 305 ; GENERIC-NEXT: retq # sched: [1:1.00] 306 ; 307 ; SANDY-LABEL: test_andnotps: 308 ; SANDY: # %bb.0: 309 ; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 310 ; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 311 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 312 ; SANDY-NEXT: retq # sched: [1:1.00] 313 ; 314 ; HASWELL-LABEL: test_andnotps: 315 ; HASWELL: # %bb.0: 316 ; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 317 ; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 318 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 319 ; HASWELL-NEXT: retq # sched: [7:1.00] 320 ; 321 ; BROADWELL-LABEL: test_andnotps: 322 ; BROADWELL: # %bb.0: 323 ; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 324 ; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 325 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 326 ; BROADWELL-NEXT: retq # sched: [7:1.00] 327 ; 328 ; SKYLAKE-LABEL: test_andnotps: 329 ; SKYLAKE: # %bb.0: 330 ; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 331 ; SKYLAKE-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 332 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 333 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 334 ; 335 ; SKX-LABEL: test_andnotps: 336 ; SKX: # %bb.0: 337 ; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 338 ; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 339 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 340 ; SKX-NEXT: retq # sched: [7:1.00] 341 ; 342 ; BTVER2-LABEL: test_andnotps: 343 ; BTVER2: # %bb.0: 344 ; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 345 ; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] 346 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 347 ; BTVER2-NEXT: retq # sched: [4:1.00] 348 ; 349 ; ZNVER1-LABEL: test_andnotps: 350 ; ZNVER1: # %bb.0: 351 ; ZNVER1-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 352 ; ZNVER1-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 353 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 354 ; ZNVER1-NEXT: retq # sched: [1:0.50] 355 %1 = bitcast <8 x float> %a0 to <4 x i64> 356 %2 = bitcast <8 x float> %a1 to <4 x i64> 357 %3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1> 358 %4 = and <4 x i64> %3, %2 359 %5 = load <8 x float>, <8 x float> *%a2, align 32 360 %6 = bitcast <8 x float> %5 to <4 x i64> 361 %7 = xor <4 x i64> %4, <i64 -1, i64 -1, i64 -1, i64 -1> 362 %8 = and <4 x i64> %6, %7 363 %9 = bitcast <4 x i64> %8 to <8 x float> 364 %10 = fadd <8 x float> %a1, %9 365 ret <8 x float> %10 366 } 367 368 define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 369 ; GENERIC-LABEL: test_andpd: 370 ; GENERIC: # %bb.0: 371 ; GENERIC-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 372 ; GENERIC-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 373 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 374 ; GENERIC-NEXT: retq # sched: [1:1.00] 375 ; 376 ; SANDY-LABEL: test_andpd: 377 ; SANDY: # %bb.0: 378 ; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 379 ; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 380 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 381 ; SANDY-NEXT: retq # sched: [1:1.00] 382 ; 383 ; HASWELL-LABEL: test_andpd: 384 ; HASWELL: # %bb.0: 385 ; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 386 ; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 387 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 388 ; HASWELL-NEXT: retq # sched: [7:1.00] 389 ; 390 ; BROADWELL-LABEL: test_andpd: 391 ; BROADWELL: # %bb.0: 392 ; BROADWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 393 ; BROADWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 394 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 395 ; BROADWELL-NEXT: retq # sched: [7:1.00] 396 ; 397 ; SKYLAKE-LABEL: test_andpd: 398 ; SKYLAKE: # %bb.0: 399 ; SKYLAKE-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 400 ; SKYLAKE-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 401 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 402 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 403 ; 404 ; SKX-LABEL: test_andpd: 405 ; SKX: # %bb.0: 406 ; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 407 ; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 408 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 409 ; SKX-NEXT: retq # sched: [7:1.00] 410 ; 411 ; BTVER2-LABEL: test_andpd: 412 ; BTVER2: # %bb.0: 413 ; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 414 ; BTVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] 415 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 416 ; BTVER2-NEXT: retq # sched: [4:1.00] 417 ; 418 ; ZNVER1-LABEL: test_andpd: 419 ; ZNVER1: # %bb.0: 420 ; ZNVER1-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 421 ; ZNVER1-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 422 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 423 ; ZNVER1-NEXT: retq # sched: [1:0.50] 424 %1 = bitcast <4 x double> %a0 to <4 x i64> 425 %2 = bitcast <4 x double> %a1 to <4 x i64> 426 %3 = and <4 x i64> %1, %2 427 %4 = load <4 x double>, <4 x double> *%a2, align 32 428 %5 = bitcast <4 x double> %4 to <4 x i64> 429 %6 = and <4 x i64> %3, %5 430 %7 = bitcast <4 x i64> %6 to <4 x double> 431 %8 = fadd <4 x double> %a1, %7 432 ret <4 x double> %8 433 } 434 435 define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 436 ; GENERIC-LABEL: test_andps: 437 ; GENERIC: # %bb.0: 438 ; GENERIC-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 439 ; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 440 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 441 ; GENERIC-NEXT: retq # sched: [1:1.00] 442 ; 443 ; SANDY-LABEL: test_andps: 444 ; SANDY: # %bb.0: 445 ; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 446 ; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 447 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 448 ; SANDY-NEXT: retq # sched: [1:1.00] 449 ; 450 ; HASWELL-LABEL: test_andps: 451 ; HASWELL: # %bb.0: 452 ; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 453 ; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 454 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 455 ; HASWELL-NEXT: retq # sched: [7:1.00] 456 ; 457 ; BROADWELL-LABEL: test_andps: 458 ; BROADWELL: # %bb.0: 459 ; BROADWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 460 ; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 461 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 462 ; BROADWELL-NEXT: retq # sched: [7:1.00] 463 ; 464 ; SKYLAKE-LABEL: test_andps: 465 ; SKYLAKE: # %bb.0: 466 ; SKYLAKE-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 467 ; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 468 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 469 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 470 ; 471 ; SKX-LABEL: test_andps: 472 ; SKX: # %bb.0: 473 ; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 474 ; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 475 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 476 ; SKX-NEXT: retq # sched: [7:1.00] 477 ; 478 ; BTVER2-LABEL: test_andps: 479 ; BTVER2: # %bb.0: 480 ; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 481 ; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] 482 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 483 ; BTVER2-NEXT: retq # sched: [4:1.00] 484 ; 485 ; ZNVER1-LABEL: test_andps: 486 ; ZNVER1: # %bb.0: 487 ; ZNVER1-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 488 ; ZNVER1-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 489 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 490 ; ZNVER1-NEXT: retq # sched: [1:0.50] 491 %1 = bitcast <8 x float> %a0 to <4 x i64> 492 %2 = bitcast <8 x float> %a1 to <4 x i64> 493 %3 = and <4 x i64> %1, %2 494 %4 = load <8 x float>, <8 x float> *%a2, align 32 495 %5 = bitcast <8 x float> %4 to <4 x i64> 496 %6 = and <4 x i64> %3, %5 497 %7 = bitcast <4 x i64> %6 to <8 x float> 498 %8 = fadd <8 x float> %a1, %7 499 ret <8 x float> %8 500 } 501 502 define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 503 ; GENERIC-LABEL: test_blendpd: 504 ; GENERIC: # %bb.0: 505 ; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] 506 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 507 ; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] 508 ; GENERIC-NEXT: retq # sched: [1:1.00] 509 ; 510 ; SANDY-LABEL: test_blendpd: 511 ; SANDY: # %bb.0: 512 ; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] 513 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 514 ; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] 515 ; SANDY-NEXT: retq # sched: [1:1.00] 516 ; 517 ; HASWELL-LABEL: test_blendpd: 518 ; HASWELL: # %bb.0: 519 ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] 520 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 521 ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] 522 ; HASWELL-NEXT: retq # sched: [7:1.00] 523 ; 524 ; BROADWELL-LABEL: test_blendpd: 525 ; BROADWELL: # %bb.0: 526 ; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] 527 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 528 ; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [7:0.50] 529 ; BROADWELL-NEXT: retq # sched: [7:1.00] 530 ; 531 ; SKYLAKE-LABEL: test_blendpd: 532 ; SKYLAKE: # %bb.0: 533 ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] 534 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 535 ; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] 536 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 537 ; 538 ; SKX-LABEL: test_blendpd: 539 ; SKX: # %bb.0: 540 ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] 541 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 542 ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] 543 ; SKX-NEXT: retq # sched: [7:1.00] 544 ; 545 ; BTVER2-LABEL: test_blendpd: 546 ; BTVER2: # %bb.0: 547 ; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] 548 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 549 ; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [6:2.00] 550 ; BTVER2-NEXT: retq # sched: [4:1.00] 551 ; 552 ; ZNVER1-LABEL: test_blendpd: 553 ; ZNVER1: # %bb.0: 554 ; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] 555 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 556 ; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] 557 ; ZNVER1-NEXT: retq # sched: [1:0.50] 558 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 559 %2 = load <4 x double>, <4 x double> *%a2, align 32 560 %3 = fadd <4 x double> %a1, %1 561 %4 = shufflevector <4 x double> %3, <4 x double> %2, <4 x i32> <i32 0, i32 5, i32 6, i32 3> 562 ret <4 x double> %4 563 } 564 565 define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 566 ; GENERIC-LABEL: test_blendps: 567 ; GENERIC: # %bb.0: 568 ; GENERIC-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] 569 ; GENERIC-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] 570 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 571 ; GENERIC-NEXT: retq # sched: [1:1.00] 572 ; 573 ; SANDY-LABEL: test_blendps: 574 ; SANDY: # %bb.0: 575 ; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] 576 ; SANDY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] 577 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 578 ; SANDY-NEXT: retq # sched: [1:1.00] 579 ; 580 ; HASWELL-LABEL: test_blendps: 581 ; HASWELL: # %bb.0: 582 ; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] 583 ; HASWELL-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] 584 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 585 ; HASWELL-NEXT: retq # sched: [7:1.00] 586 ; 587 ; BROADWELL-LABEL: test_blendps: 588 ; BROADWELL: # %bb.0: 589 ; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] 590 ; BROADWELL-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [7:0.50] 591 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 592 ; BROADWELL-NEXT: retq # sched: [7:1.00] 593 ; 594 ; SKYLAKE-LABEL: test_blendps: 595 ; SKYLAKE: # %bb.0: 596 ; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] 597 ; SKYLAKE-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] 598 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 599 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 600 ; 601 ; SKX-LABEL: test_blendps: 602 ; SKX: # %bb.0: 603 ; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] 604 ; SKX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] 605 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 606 ; SKX-NEXT: retq # sched: [7:1.00] 607 ; 608 ; BTVER2-LABEL: test_blendps: 609 ; BTVER2: # %bb.0: 610 ; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00] 611 ; BTVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [6:2.00] 612 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 613 ; BTVER2-NEXT: retq # sched: [4:1.00] 614 ; 615 ; ZNVER1-LABEL: test_blendps: 616 ; ZNVER1: # %bb.0: 617 ; ZNVER1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] 618 ; ZNVER1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] 619 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 620 ; ZNVER1-NEXT: retq # sched: [1:0.50] 621 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7> 622 %2 = load <8 x float>, <8 x float> *%a2, align 32 623 %3 = shufflevector <8 x float> %a1, <8 x float> %2, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 13, i32 14, i32 7> 624 %4 = fadd <8 x float> %1, %3 625 ret <8 x float> %4 626 } 627 628 define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { 629 ; GENERIC-LABEL: test_blendvpd: 630 ; GENERIC: # %bb.0: 631 ; GENERIC-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] 632 ; GENERIC-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 633 ; GENERIC-NEXT: retq # sched: [1:1.00] 634 ; 635 ; SANDY-LABEL: test_blendvpd: 636 ; SANDY: # %bb.0: 637 ; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] 638 ; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 639 ; SANDY-NEXT: retq # sched: [1:1.00] 640 ; 641 ; HASWELL-LABEL: test_blendvpd: 642 ; HASWELL: # %bb.0: 643 ; HASWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 644 ; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 645 ; HASWELL-NEXT: retq # sched: [7:1.00] 646 ; 647 ; BROADWELL-LABEL: test_blendvpd: 648 ; BROADWELL: # %bb.0: 649 ; BROADWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 650 ; BROADWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 651 ; BROADWELL-NEXT: retq # sched: [7:1.00] 652 ; 653 ; SKYLAKE-LABEL: test_blendvpd: 654 ; SKYLAKE: # %bb.0: 655 ; SKYLAKE-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] 656 ; SKYLAKE-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] 657 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 658 ; 659 ; SKX-LABEL: test_blendvpd: 660 ; SKX: # %bb.0: 661 ; SKX-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] 662 ; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] 663 ; SKX-NEXT: retq # sched: [7:1.00] 664 ; 665 ; BTVER2-LABEL: test_blendvpd: 666 ; BTVER2: # %bb.0: 667 ; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] 668 ; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] 669 ; BTVER2-NEXT: retq # sched: [4:1.00] 670 ; 671 ; ZNVER1-LABEL: test_blendvpd: 672 ; ZNVER1: # %bb.0: 673 ; ZNVER1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 674 ; ZNVER1-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 675 ; ZNVER1-NEXT: retq # sched: [1:0.50] 676 %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) 677 %2 = load <4 x double>, <4 x double> *%a3, align 32 678 %3 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %1, <4 x double> %2, <4 x double> %a2) 679 ret <4 x double> %3 680 } 681 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone 682 683 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { 684 ; GENERIC-LABEL: test_blendvps: 685 ; GENERIC: # %bb.0: 686 ; GENERIC-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] 687 ; GENERIC-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 688 ; GENERIC-NEXT: retq # sched: [1:1.00] 689 ; 690 ; SANDY-LABEL: test_blendvps: 691 ; SANDY: # %bb.0: 692 ; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] 693 ; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 694 ; SANDY-NEXT: retq # sched: [1:1.00] 695 ; 696 ; HASWELL-LABEL: test_blendvps: 697 ; HASWELL: # %bb.0: 698 ; HASWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 699 ; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 700 ; HASWELL-NEXT: retq # sched: [7:1.00] 701 ; 702 ; BROADWELL-LABEL: test_blendvps: 703 ; BROADWELL: # %bb.0: 704 ; BROADWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 705 ; BROADWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 706 ; BROADWELL-NEXT: retq # sched: [7:1.00] 707 ; 708 ; SKYLAKE-LABEL: test_blendvps: 709 ; SKYLAKE: # %bb.0: 710 ; SKYLAKE-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] 711 ; SKYLAKE-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] 712 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 713 ; 714 ; SKX-LABEL: test_blendvps: 715 ; SKX: # %bb.0: 716 ; SKX-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] 717 ; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] 718 ; SKX-NEXT: retq # sched: [7:1.00] 719 ; 720 ; BTVER2-LABEL: test_blendvps: 721 ; BTVER2: # %bb.0: 722 ; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00] 723 ; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00] 724 ; BTVER2-NEXT: retq # sched: [4:1.00] 725 ; 726 ; ZNVER1-LABEL: test_blendvps: 727 ; ZNVER1: # %bb.0: 728 ; ZNVER1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 729 ; ZNVER1-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 730 ; ZNVER1-NEXT: retq # sched: [1:0.50] 731 %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) 732 %2 = load <8 x float>, <8 x float> *%a3, align 32 733 %3 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %1, <8 x float> %2, <8 x float> %a2) 734 ret <8 x float> %3 735 } 736 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone 737 738 define <8 x float> @test_broadcastf128(<4 x float> *%a0) { 739 ; GENERIC-LABEL: test_broadcastf128: 740 ; GENERIC: # %bb.0: 741 ; GENERIC-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00] 742 ; GENERIC-NEXT: retq # sched: [1:1.00] 743 ; 744 ; SANDY-LABEL: test_broadcastf128: 745 ; SANDY: # %bb.0: 746 ; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00] 747 ; SANDY-NEXT: retq # sched: [1:1.00] 748 ; 749 ; HASWELL-LABEL: test_broadcastf128: 750 ; HASWELL: # %bb.0: 751 ; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] 752 ; HASWELL-NEXT: retq # sched: [7:1.00] 753 ; 754 ; BROADWELL-LABEL: test_broadcastf128: 755 ; BROADWELL: # %bb.0: 756 ; BROADWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:0.50] 757 ; BROADWELL-NEXT: retq # sched: [7:1.00] 758 ; 759 ; SKYLAKE-LABEL: test_broadcastf128: 760 ; SKYLAKE: # %bb.0: 761 ; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] 762 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 763 ; 764 ; SKX-LABEL: test_broadcastf128: 765 ; SKX: # %bb.0: 766 ; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50] 767 ; SKX-NEXT: retq # sched: [7:1.00] 768 ; 769 ; BTVER2-LABEL: test_broadcastf128: 770 ; BTVER2: # %bb.0: 771 ; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:1.00] 772 ; BTVER2-NEXT: retq # sched: [4:1.00] 773 ; 774 ; ZNVER1-LABEL: test_broadcastf128: 775 ; ZNVER1: # %bb.0: 776 ; ZNVER1-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [8:0.50] 777 ; ZNVER1-NEXT: retq # sched: [1:0.50] 778 %1 = load <4 x float>, <4 x float> *%a0, align 32 779 %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 780 ret <8 x float> %2 781 } 782 783 define <4 x double> @test_broadcastsd_ymm(double *%a0) { 784 ; GENERIC-LABEL: test_broadcastsd_ymm: 785 ; GENERIC: # %bb.0: 786 ; GENERIC-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] 787 ; GENERIC-NEXT: retq # sched: [1:1.00] 788 ; 789 ; SANDY-LABEL: test_broadcastsd_ymm: 790 ; SANDY: # %bb.0: 791 ; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] 792 ; SANDY-NEXT: retq # sched: [1:1.00] 793 ; 794 ; HASWELL-LABEL: test_broadcastsd_ymm: 795 ; HASWELL: # %bb.0: 796 ; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] 797 ; HASWELL-NEXT: retq # sched: [7:1.00] 798 ; 799 ; BROADWELL-LABEL: test_broadcastsd_ymm: 800 ; BROADWELL: # %bb.0: 801 ; BROADWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:0.50] 802 ; BROADWELL-NEXT: retq # sched: [7:1.00] 803 ; 804 ; SKYLAKE-LABEL: test_broadcastsd_ymm: 805 ; SKYLAKE: # %bb.0: 806 ; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] 807 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 808 ; 809 ; SKX-LABEL: test_broadcastsd_ymm: 810 ; SKX: # %bb.0: 811 ; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] 812 ; SKX-NEXT: retq # sched: [7:1.00] 813 ; 814 ; BTVER2-LABEL: test_broadcastsd_ymm: 815 ; BTVER2: # %bb.0: 816 ; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00] 817 ; BTVER2-NEXT: retq # sched: [4:1.00] 818 ; 819 ; ZNVER1-LABEL: test_broadcastsd_ymm: 820 ; ZNVER1: # %bb.0: 821 ; ZNVER1-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [8:0.50] 822 ; ZNVER1-NEXT: retq # sched: [1:0.50] 823 %1 = load double, double *%a0, align 8 824 %2 = insertelement <4 x double> undef, double %1, i32 0 825 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer 826 ret <4 x double> %3 827 } 828 829 define <4 x float> @test_broadcastss(float *%a0) { 830 ; GENERIC-LABEL: test_broadcastss: 831 ; GENERIC: # %bb.0: 832 ; GENERIC-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] 833 ; GENERIC-NEXT: retq # sched: [1:1.00] 834 ; 835 ; SANDY-LABEL: test_broadcastss: 836 ; SANDY: # %bb.0: 837 ; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] 838 ; SANDY-NEXT: retq # sched: [1:1.00] 839 ; 840 ; HASWELL-LABEL: test_broadcastss: 841 ; HASWELL: # %bb.0: 842 ; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] 843 ; HASWELL-NEXT: retq # sched: [7:1.00] 844 ; 845 ; BROADWELL-LABEL: test_broadcastss: 846 ; BROADWELL: # %bb.0: 847 ; BROADWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:0.50] 848 ; BROADWELL-NEXT: retq # sched: [7:1.00] 849 ; 850 ; SKYLAKE-LABEL: test_broadcastss: 851 ; SKYLAKE: # %bb.0: 852 ; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] 853 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 854 ; 855 ; SKX-LABEL: test_broadcastss: 856 ; SKX: # %bb.0: 857 ; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] 858 ; SKX-NEXT: retq # sched: [7:1.00] 859 ; 860 ; BTVER2-LABEL: test_broadcastss: 861 ; BTVER2: # %bb.0: 862 ; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:1.00] 863 ; BTVER2-NEXT: retq # sched: [4:1.00] 864 ; 865 ; ZNVER1-LABEL: test_broadcastss: 866 ; ZNVER1: # %bb.0: 867 ; ZNVER1-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [8:0.50] 868 ; ZNVER1-NEXT: retq # sched: [1:0.50] 869 %1 = load float, float *%a0, align 4 870 %2 = insertelement <4 x float> undef, float %1, i32 0 871 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer 872 ret <4 x float> %3 873 } 874 875 define <8 x float> @test_broadcastss_ymm(float *%a0) { 876 ; GENERIC-LABEL: test_broadcastss_ymm: 877 ; GENERIC: # %bb.0: 878 ; GENERIC-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] 879 ; GENERIC-NEXT: retq # sched: [1:1.00] 880 ; 881 ; SANDY-LABEL: test_broadcastss_ymm: 882 ; SANDY: # %bb.0: 883 ; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] 884 ; SANDY-NEXT: retq # sched: [1:1.00] 885 ; 886 ; HASWELL-LABEL: test_broadcastss_ymm: 887 ; HASWELL: # %bb.0: 888 ; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] 889 ; HASWELL-NEXT: retq # sched: [7:1.00] 890 ; 891 ; BROADWELL-LABEL: test_broadcastss_ymm: 892 ; BROADWELL: # %bb.0: 893 ; BROADWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:0.50] 894 ; BROADWELL-NEXT: retq # sched: [7:1.00] 895 ; 896 ; SKYLAKE-LABEL: test_broadcastss_ymm: 897 ; SKYLAKE: # %bb.0: 898 ; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] 899 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 900 ; 901 ; SKX-LABEL: test_broadcastss_ymm: 902 ; SKX: # %bb.0: 903 ; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50] 904 ; SKX-NEXT: retq # sched: [7:1.00] 905 ; 906 ; BTVER2-LABEL: test_broadcastss_ymm: 907 ; BTVER2: # %bb.0: 908 ; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00] 909 ; BTVER2-NEXT: retq # sched: [4:1.00] 910 ; 911 ; ZNVER1-LABEL: test_broadcastss_ymm: 912 ; ZNVER1: # %bb.0: 913 ; ZNVER1-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [8:0.50] 914 ; ZNVER1-NEXT: retq # sched: [1:0.50] 915 %1 = load float, float *%a0, align 4 916 %2 = insertelement <8 x float> undef, float %1, i32 0 917 %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer 918 ret <8 x float> %3 919 } 920 921 define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 922 ; GENERIC-LABEL: test_cmppd: 923 ; GENERIC: # %bb.0: 924 ; GENERIC-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 925 ; GENERIC-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 926 ; GENERIC-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 927 ; GENERIC-NEXT: retq # sched: [1:1.00] 928 ; 929 ; SANDY-LABEL: test_cmppd: 930 ; SANDY: # %bb.0: 931 ; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 932 ; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 933 ; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 934 ; SANDY-NEXT: retq # sched: [1:1.00] 935 ; 936 ; HASWELL-LABEL: test_cmppd: 937 ; HASWELL: # %bb.0: 938 ; HASWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 939 ; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 940 ; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 941 ; HASWELL-NEXT: retq # sched: [7:1.00] 942 ; 943 ; BROADWELL-LABEL: test_cmppd: 944 ; BROADWELL: # %bb.0: 945 ; BROADWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 946 ; BROADWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 947 ; BROADWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 948 ; BROADWELL-NEXT: retq # sched: [7:1.00] 949 ; 950 ; SKYLAKE-LABEL: test_cmppd: 951 ; SKYLAKE: # %bb.0: 952 ; SKYLAKE-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50] 953 ; SKYLAKE-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 954 ; SKYLAKE-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 955 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 956 ; 957 ; SKX-LABEL: test_cmppd: 958 ; SKX: # %bb.0: 959 ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50] 960 ; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 961 ; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 962 ; SKX-NEXT: retq # sched: [7:1.00] 963 ; 964 ; BTVER2-LABEL: test_cmppd: 965 ; BTVER2: # %bb.0: 966 ; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00] 967 ; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] 968 ; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 969 ; BTVER2-NEXT: retq # sched: [4:1.00] 970 ; 971 ; ZNVER1-LABEL: test_cmppd: 972 ; ZNVER1: # %bb.0: 973 ; ZNVER1-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 974 ; ZNVER1-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 975 ; ZNVER1-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 976 ; ZNVER1-NEXT: retq # sched: [1:0.50] 977 %1 = fcmp oeq <4 x double> %a0, %a1 978 %2 = load <4 x double>, <4 x double> *%a2, align 32 979 %3 = fcmp oeq <4 x double> %a0, %2 980 %4 = sext <4 x i1> %1 to <4 x i64> 981 %5 = sext <4 x i1> %3 to <4 x i64> 982 %6 = or <4 x i64> %4, %5 983 %7 = bitcast <4 x i64> %6 to <4 x double> 984 ret <4 x double> %7 985 } 986 987 define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 988 ; GENERIC-LABEL: test_cmpps: 989 ; GENERIC: # %bb.0: 990 ; GENERIC-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 991 ; GENERIC-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 992 ; GENERIC-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 993 ; GENERIC-NEXT: retq # sched: [1:1.00] 994 ; 995 ; SANDY-LABEL: test_cmpps: 996 ; SANDY: # %bb.0: 997 ; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 998 ; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 999 ; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 1000 ; SANDY-NEXT: retq # sched: [1:1.00] 1001 ; 1002 ; HASWELL-LABEL: test_cmpps: 1003 ; HASWELL: # %bb.0: 1004 ; HASWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 1005 ; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 1006 ; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 1007 ; HASWELL-NEXT: retq # sched: [7:1.00] 1008 ; 1009 ; BROADWELL-LABEL: test_cmpps: 1010 ; BROADWELL: # %bb.0: 1011 ; BROADWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 1012 ; BROADWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 1013 ; BROADWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 1014 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1015 ; 1016 ; SKYLAKE-LABEL: test_cmpps: 1017 ; SKYLAKE: # %bb.0: 1018 ; SKYLAKE-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50] 1019 ; SKYLAKE-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 1020 ; SKYLAKE-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 1021 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1022 ; 1023 ; SKX-LABEL: test_cmpps: 1024 ; SKX: # %bb.0: 1025 ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50] 1026 ; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 1027 ; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 1028 ; SKX-NEXT: retq # sched: [7:1.00] 1029 ; 1030 ; BTVER2-LABEL: test_cmpps: 1031 ; BTVER2: # %bb.0: 1032 ; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00] 1033 ; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] 1034 ; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] 1035 ; BTVER2-NEXT: retq # sched: [4:1.00] 1036 ; 1037 ; ZNVER1-LABEL: test_cmpps: 1038 ; ZNVER1: # %bb.0: 1039 ; ZNVER1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 1040 ; ZNVER1-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 1041 ; ZNVER1-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 1042 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1043 %1 = fcmp oeq <8 x float> %a0, %a1 1044 %2 = load <8 x float>, <8 x float> *%a2, align 32 1045 %3 = fcmp oeq <8 x float> %a0, %2 1046 %4 = sext <8 x i1> %1 to <8 x i32> 1047 %5 = sext <8 x i1> %3 to <8 x i32> 1048 %6 = or <8 x i32> %4, %5 1049 %7 = bitcast <8 x i32> %6 to <8 x float> 1050 ret <8 x float> %7 1051 } 1052 1053 define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { 1054 ; GENERIC-LABEL: test_cvtdq2pd: 1055 ; GENERIC: # %bb.0: 1056 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] 1057 ; GENERIC-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] 1058 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1059 ; GENERIC-NEXT: retq # sched: [1:1.00] 1060 ; 1061 ; SANDY-LABEL: test_cvtdq2pd: 1062 ; SANDY: # %bb.0: 1063 ; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] 1064 ; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] 1065 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1066 ; SANDY-NEXT: retq # sched: [1:1.00] 1067 ; 1068 ; HASWELL-LABEL: test_cvtdq2pd: 1069 ; HASWELL: # %bb.0: 1070 ; HASWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] 1071 ; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00] 1072 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1073 ; HASWELL-NEXT: retq # sched: [7:1.00] 1074 ; 1075 ; BROADWELL-LABEL: test_cvtdq2pd: 1076 ; BROADWELL: # %bb.0: 1077 ; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [11:1.00] 1078 ; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] 1079 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1080 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1081 ; 1082 ; SKYLAKE-LABEL: test_cvtdq2pd: 1083 ; SKYLAKE: # %bb.0: 1084 ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] 1085 ; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00] 1086 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 1087 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1088 ; 1089 ; SKX-LABEL: test_cvtdq2pd: 1090 ; SKX: # %bb.0: 1091 ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] 1092 ; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00] 1093 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 1094 ; SKX-NEXT: retq # sched: [7:1.00] 1095 ; 1096 ; BTVER2-LABEL: test_cvtdq2pd: 1097 ; BTVER2: # %bb.0: 1098 ; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00] 1099 ; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:2.00] 1100 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 1101 ; BTVER2-NEXT: retq # sched: [4:1.00] 1102 ; 1103 ; ZNVER1-LABEL: test_cvtdq2pd: 1104 ; ZNVER1: # %bb.0: 1105 ; ZNVER1-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00] 1106 ; ZNVER1-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [5:1.00] 1107 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1108 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1109 %1 = sitofp <4 x i32> %a0 to <4 x double> 1110 %2 = load <4 x i32>, <4 x i32> *%a1, align 16 1111 %3 = sitofp <4 x i32> %2 to <4 x double> 1112 %4 = fadd <4 x double> %1, %3 1113 ret <4 x double> %4 1114 } 1115 1116 define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { 1117 ; GENERIC-LABEL: test_cvtdq2ps: 1118 ; GENERIC: # %bb.0: 1119 ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] 1120 ; GENERIC-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00] 1121 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1122 ; GENERIC-NEXT: retq # sched: [1:1.00] 1123 ; 1124 ; SANDY-LABEL: test_cvtdq2ps: 1125 ; SANDY: # %bb.0: 1126 ; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] 1127 ; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] 1128 ; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50] 1129 ; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00] 1130 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1131 ; SANDY-NEXT: retq # sched: [1:1.00] 1132 ; 1133 ; HASWELL-LABEL: test_cvtdq2ps: 1134 ; HASWELL: # %bb.0: 1135 ; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] 1136 ; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00] 1137 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1138 ; HASWELL-NEXT: retq # sched: [7:1.00] 1139 ; 1140 ; BROADWELL-LABEL: test_cvtdq2ps: 1141 ; BROADWELL: # %bb.0: 1142 ; BROADWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] 1143 ; BROADWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:1.00] 1144 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1145 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1146 ; 1147 ; SKYLAKE-LABEL: test_cvtdq2ps: 1148 ; SKYLAKE: # %bb.0: 1149 ; SKYLAKE-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] 1150 ; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50] 1151 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 1152 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1153 ; 1154 ; SKX-LABEL: test_cvtdq2ps: 1155 ; SKX: # %bb.0: 1156 ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] 1157 ; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50] 1158 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 1159 ; SKX-NEXT: retq # sched: [7:1.00] 1160 ; 1161 ; BTVER2-LABEL: test_cvtdq2ps: 1162 ; BTVER2: # %bb.0: 1163 ; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00] 1164 ; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:2.00] 1165 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 1166 ; BTVER2-NEXT: retq # sched: [4:1.00] 1167 ; 1168 ; ZNVER1-LABEL: test_cvtdq2ps: 1169 ; ZNVER1: # %bb.0: 1170 ; ZNVER1-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [12:1.00] 1171 ; ZNVER1-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [5:1.00] 1172 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 1173 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1174 %1 = sitofp <8 x i32> %a0 to <8 x float> 1175 %2 = load <8 x i32>, <8 x i32> *%a1, align 16 1176 %3 = sitofp <8 x i32> %2 to <8 x float> 1177 %4 = fadd <8 x float> %1, %3 1178 ret <8 x float> %4 1179 } 1180 1181 define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { 1182 ; GENERIC-LABEL: test_cvtpd2dq: 1183 ; GENERIC: # %bb.0: 1184 ; GENERIC-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00] 1185 ; GENERIC-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00] 1186 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] 1187 ; GENERIC-NEXT: retq # sched: [1:1.00] 1188 ; 1189 ; SANDY-LABEL: test_cvtpd2dq: 1190 ; SANDY: # %bb.0: 1191 ; SANDY-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00] 1192 ; SANDY-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00] 1193 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] 1194 ; SANDY-NEXT: retq # sched: [1:1.00] 1195 ; 1196 ; HASWELL-LABEL: test_cvtpd2dq: 1197 ; HASWELL: # %bb.0: 1198 ; HASWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00] 1199 ; HASWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00] 1200 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1201 ; HASWELL-NEXT: retq # sched: [7:1.00] 1202 ; 1203 ; BROADWELL-LABEL: test_cvtpd2dq: 1204 ; BROADWELL: # %bb.0: 1205 ; BROADWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00] 1206 ; BROADWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00] 1207 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1208 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1209 ; 1210 ; SKYLAKE-LABEL: test_cvtpd2dq: 1211 ; SKYLAKE: # %bb.0: 1212 ; SKYLAKE-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00] 1213 ; SKYLAKE-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00] 1214 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1215 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1216 ; 1217 ; SKX-LABEL: test_cvtpd2dq: 1218 ; SKX: # %bb.0: 1219 ; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00] 1220 ; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:0.50] 1221 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1222 ; SKX-NEXT: retq # sched: [7:1.00] 1223 ; 1224 ; BTVER2-LABEL: test_cvtpd2dq: 1225 ; BTVER2: # %bb.0: 1226 ; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00] 1227 ; BTVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:2.00] 1228 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50] 1229 ; BTVER2-NEXT: retq # sched: [4:1.00] 1230 ; 1231 ; ZNVER1-LABEL: test_cvtpd2dq: 1232 ; ZNVER1: # %bb.0: 1233 ; ZNVER1-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [12:1.00] 1234 ; ZNVER1-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [5:1.00] 1235 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67] 1236 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1237 %1 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) 1238 %2 = load <4 x double>, <4 x double> *%a1, align 32 1239 %3 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %2) 1240 %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1241 ret <8 x i32> %4 1242 } 1243 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone 1244 1245 define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) { 1246 ; GENERIC-LABEL: test_cvttpd2dq: 1247 ; GENERIC: # %bb.0: 1248 ; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] 1249 ; GENERIC-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] 1250 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] 1251 ; GENERIC-NEXT: retq # sched: [1:1.00] 1252 ; 1253 ; SANDY-LABEL: test_cvttpd2dq: 1254 ; SANDY: # %bb.0: 1255 ; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] 1256 ; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] 1257 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] 1258 ; SANDY-NEXT: retq # sched: [1:1.00] 1259 ; 1260 ; HASWELL-LABEL: test_cvttpd2dq: 1261 ; HASWELL: # %bb.0: 1262 ; HASWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00] 1263 ; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] 1264 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1265 ; HASWELL-NEXT: retq # sched: [7:1.00] 1266 ; 1267 ; BROADWELL-LABEL: test_cvttpd2dq: 1268 ; BROADWELL: # %bb.0: 1269 ; BROADWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00] 1270 ; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] 1271 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1272 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1273 ; 1274 ; SKYLAKE-LABEL: test_cvttpd2dq: 1275 ; SKYLAKE: # %bb.0: 1276 ; SKYLAKE-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] 1277 ; SKYLAKE-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] 1278 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1279 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1280 ; 1281 ; SKX-LABEL: test_cvttpd2dq: 1282 ; SKX: # %bb.0: 1283 ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] 1284 ; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:0.50] 1285 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1286 ; SKX-NEXT: retq # sched: [7:1.00] 1287 ; 1288 ; BTVER2-LABEL: test_cvttpd2dq: 1289 ; BTVER2: # %bb.0: 1290 ; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00] 1291 ; BTVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:2.00] 1292 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50] 1293 ; BTVER2-NEXT: retq # sched: [4:1.00] 1294 ; 1295 ; ZNVER1-LABEL: test_cvttpd2dq: 1296 ; ZNVER1: # %bb.0: 1297 ; ZNVER1-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [12:1.00] 1298 ; ZNVER1-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [5:1.00] 1299 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67] 1300 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1301 %1 = fptosi <4 x double> %a0 to <4 x i32> 1302 %2 = load <4 x double>, <4 x double> *%a1, align 32 1303 %3 = fptosi <4 x double> %2 to <4 x i32> 1304 %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1305 ret <8 x i32> %4 1306 } 1307 1308 define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { 1309 ; GENERIC-LABEL: test_cvtpd2ps: 1310 ; GENERIC: # %bb.0: 1311 ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] 1312 ; GENERIC-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] 1313 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] 1314 ; GENERIC-NEXT: retq # sched: [1:1.00] 1315 ; 1316 ; SANDY-LABEL: test_cvtpd2ps: 1317 ; SANDY: # %bb.0: 1318 ; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] 1319 ; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] 1320 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] 1321 ; SANDY-NEXT: retq # sched: [1:1.00] 1322 ; 1323 ; HASWELL-LABEL: test_cvtpd2ps: 1324 ; HASWELL: # %bb.0: 1325 ; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00] 1326 ; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] 1327 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1328 ; HASWELL-NEXT: retq # sched: [7:1.00] 1329 ; 1330 ; BROADWELL-LABEL: test_cvtpd2ps: 1331 ; BROADWELL: # %bb.0: 1332 ; BROADWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00] 1333 ; BROADWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] 1334 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1335 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1336 ; 1337 ; SKYLAKE-LABEL: test_cvtpd2ps: 1338 ; SKYLAKE: # %bb.0: 1339 ; SKYLAKE-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] 1340 ; SKYLAKE-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] 1341 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1342 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1343 ; 1344 ; SKX-LABEL: test_cvtpd2ps: 1345 ; SKX: # %bb.0: 1346 ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00] 1347 ; SKX-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00] 1348 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] 1349 ; SKX-NEXT: retq # sched: [7:1.00] 1350 ; 1351 ; BTVER2-LABEL: test_cvtpd2ps: 1352 ; BTVER2: # %bb.0: 1353 ; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00] 1354 ; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00] 1355 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50] 1356 ; BTVER2-NEXT: retq # sched: [4:1.00] 1357 ; 1358 ; ZNVER1-LABEL: test_cvtpd2ps: 1359 ; ZNVER1: # %bb.0: 1360 ; ZNVER1-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] 1361 ; ZNVER1-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [5:1.00] 1362 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67] 1363 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1364 %1 = fptrunc <4 x double> %a0 to <4 x float> 1365 %2 = load <4 x double>, <4 x double> *%a1, align 32 1366 %3 = fptrunc <4 x double> %2 to <4 x float> 1367 %4 = shufflevector <4 x float> %1, <4 x float> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1368 ret <8 x float> %4 1369 } 1370 1371 define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { 1372 ; GENERIC-LABEL: test_cvtps2dq: 1373 ; GENERIC: # %bb.0: 1374 ; GENERIC-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] 1375 ; GENERIC-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00] 1376 ; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1377 ; GENERIC-NEXT: retq # sched: [1:1.00] 1378 ; 1379 ; SANDY-LABEL: test_cvtps2dq: 1380 ; SANDY: # %bb.0: 1381 ; SANDY-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] 1382 ; SANDY-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00] 1383 ; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1384 ; SANDY-NEXT: retq # sched: [1:1.00] 1385 ; 1386 ; HASWELL-LABEL: test_cvtps2dq: 1387 ; HASWELL: # %bb.0: 1388 ; HASWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] 1389 ; HASWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00] 1390 ; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1391 ; HASWELL-NEXT: retq # sched: [7:1.00] 1392 ; 1393 ; BROADWELL-LABEL: test_cvtps2dq: 1394 ; BROADWELL: # %bb.0: 1395 ; BROADWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00] 1396 ; BROADWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [9:1.00] 1397 ; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1398 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1399 ; 1400 ; SKYLAKE-LABEL: test_cvtps2dq: 1401 ; SKYLAKE: # %bb.0: 1402 ; SKYLAKE-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50] 1403 ; SKYLAKE-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50] 1404 ; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1405 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1406 ; 1407 ; SKX-LABEL: test_cvtps2dq: 1408 ; SKX: # %bb.0: 1409 ; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50] 1410 ; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50] 1411 ; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1412 ; SKX-NEXT: retq # sched: [7:1.00] 1413 ; 1414 ; BTVER2-LABEL: test_cvtps2dq: 1415 ; BTVER2: # %bb.0: 1416 ; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00] 1417 ; BTVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:2.00] 1418 ; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1419 ; BTVER2-NEXT: retq # sched: [4:1.00] 1420 ; 1421 ; ZNVER1-LABEL: test_cvtps2dq: 1422 ; ZNVER1: # %bb.0: 1423 ; ZNVER1-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [12:1.00] 1424 ; ZNVER1-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [5:1.00] 1425 ; ZNVER1-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1426 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1427 %1 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) 1428 %2 = load <8 x float>, <8 x float> *%a1, align 32 1429 %3 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %2) 1430 %4 = or <8 x i32> %1, %3 1431 ret <8 x i32> %4 1432 } 1433 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone 1434 1435 define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) { 1436 ; GENERIC-LABEL: test_cvttps2dq: 1437 ; GENERIC: # %bb.0: 1438 ; GENERIC-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] 1439 ; GENERIC-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00] 1440 ; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1441 ; GENERIC-NEXT: retq # sched: [1:1.00] 1442 ; 1443 ; SANDY-LABEL: test_cvttps2dq: 1444 ; SANDY: # %bb.0: 1445 ; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] 1446 ; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00] 1447 ; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1448 ; SANDY-NEXT: retq # sched: [1:1.00] 1449 ; 1450 ; HASWELL-LABEL: test_cvttps2dq: 1451 ; HASWELL: # %bb.0: 1452 ; HASWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] 1453 ; HASWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00] 1454 ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1455 ; HASWELL-NEXT: retq # sched: [7:1.00] 1456 ; 1457 ; BROADWELL-LABEL: test_cvttps2dq: 1458 ; BROADWELL: # %bb.0: 1459 ; BROADWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] 1460 ; BROADWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [9:1.00] 1461 ; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1462 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1463 ; 1464 ; SKYLAKE-LABEL: test_cvttps2dq: 1465 ; SKYLAKE: # %bb.0: 1466 ; SKYLAKE-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.50] 1467 ; SKYLAKE-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50] 1468 ; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1469 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1470 ; 1471 ; SKX-LABEL: test_cvttps2dq: 1472 ; SKX: # %bb.0: 1473 ; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:0.50] 1474 ; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50] 1475 ; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1476 ; SKX-NEXT: retq # sched: [7:1.00] 1477 ; 1478 ; BTVER2-LABEL: test_cvttps2dq: 1479 ; BTVER2: # %bb.0: 1480 ; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00] 1481 ; BTVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:2.00] 1482 ; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1483 ; BTVER2-NEXT: retq # sched: [4:1.00] 1484 ; 1485 ; ZNVER1-LABEL: test_cvttps2dq: 1486 ; ZNVER1: # %bb.0: 1487 ; ZNVER1-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [12:1.00] 1488 ; ZNVER1-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [5:1.00] 1489 ; ZNVER1-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1490 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1491 %1 = fptosi <8 x float> %a0 to <8 x i32> 1492 %2 = load <8 x float>, <8 x float> *%a1, align 32 1493 %3 = fptosi <8 x float> %2 to <8 x i32> 1494 %4 = or <8 x i32> %1, %3 1495 ret <8 x i32> %4 1496 } 1497 1498 define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 1499 ; GENERIC-LABEL: test_divpd: 1500 ; GENERIC: # %bb.0: 1501 ; GENERIC-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00] 1502 ; GENERIC-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00] 1503 ; GENERIC-NEXT: retq # sched: [1:1.00] 1504 ; 1505 ; SANDY-LABEL: test_divpd: 1506 ; SANDY: # %bb.0: 1507 ; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00] 1508 ; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00] 1509 ; SANDY-NEXT: retq # sched: [1:1.00] 1510 ; 1511 ; HASWELL-LABEL: test_divpd: 1512 ; HASWELL: # %bb.0: 1513 ; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:28.00] 1514 ; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [42:28.00] 1515 ; HASWELL-NEXT: retq # sched: [7:1.00] 1516 ; 1517 ; BROADWELL-LABEL: test_divpd: 1518 ; BROADWELL: # %bb.0: 1519 ; BROADWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [23:16.00] 1520 ; BROADWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [29:16.00] 1521 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1522 ; 1523 ; SKYLAKE-LABEL: test_divpd: 1524 ; SKYLAKE: # %bb.0: 1525 ; SKYLAKE-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:5.00] 1526 ; SKYLAKE-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00] 1527 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1528 ; 1529 ; SKX-LABEL: test_divpd: 1530 ; SKX: # %bb.0: 1531 ; SKX-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:5.00] 1532 ; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00] 1533 ; SKX-NEXT: retq # sched: [7:1.00] 1534 ; 1535 ; BTVER2-LABEL: test_divpd: 1536 ; BTVER2: # %bb.0: 1537 ; BTVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [38:38.00] 1538 ; BTVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [43:38.00] 1539 ; BTVER2-NEXT: retq # sched: [4:1.00] 1540 ; 1541 ; ZNVER1-LABEL: test_divpd: 1542 ; ZNVER1: # %bb.0: 1543 ; ZNVER1-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [15:15.00] 1544 ; ZNVER1-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [22:22.00] 1545 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1546 %1 = fdiv <4 x double> %a0, %a1 1547 %2 = load <4 x double>, <4 x double> *%a2, align 32 1548 %3 = fdiv <4 x double> %1, %2 1549 ret <4 x double> %3 1550 } 1551 1552 define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 1553 ; GENERIC-LABEL: test_divps: 1554 ; GENERIC: # %bb.0: 1555 ; GENERIC-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00] 1556 ; GENERIC-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00] 1557 ; GENERIC-NEXT: retq # sched: [1:1.00] 1558 ; 1559 ; SANDY-LABEL: test_divps: 1560 ; SANDY: # %bb.0: 1561 ; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00] 1562 ; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00] 1563 ; SANDY-NEXT: retq # sched: [1:1.00] 1564 ; 1565 ; HASWELL-LABEL: test_divps: 1566 ; HASWELL: # %bb.0: 1567 ; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:14.00] 1568 ; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [28:14.00] 1569 ; HASWELL-NEXT: retq # sched: [7:1.00] 1570 ; 1571 ; BROADWELL-LABEL: test_divps: 1572 ; BROADWELL: # %bb.0: 1573 ; BROADWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [17:10.00] 1574 ; BROADWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [23:10.00] 1575 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1576 ; 1577 ; SKYLAKE-LABEL: test_divps: 1578 ; SKYLAKE: # %bb.0: 1579 ; SKYLAKE-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:5.00] 1580 ; SKYLAKE-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00] 1581 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1582 ; 1583 ; SKX-LABEL: test_divps: 1584 ; SKX: # %bb.0: 1585 ; SKX-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:5.00] 1586 ; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00] 1587 ; SKX-NEXT: retq # sched: [7:1.00] 1588 ; 1589 ; BTVER2-LABEL: test_divps: 1590 ; BTVER2: # %bb.0: 1591 ; BTVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [38:38.00] 1592 ; BTVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [43:38.00] 1593 ; BTVER2-NEXT: retq # sched: [4:1.00] 1594 ; 1595 ; ZNVER1-LABEL: test_divps: 1596 ; ZNVER1: # %bb.0: 1597 ; ZNVER1-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [12:12.00] 1598 ; ZNVER1-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [19:19.00] 1599 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1600 %1 = fdiv <8 x float> %a0, %a1 1601 %2 = load <8 x float>, <8 x float> *%a2, align 32 1602 %3 = fdiv <8 x float> %1, %2 1603 ret <8 x float> %3 1604 } 1605 1606 define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 1607 ; GENERIC-LABEL: test_dpps: 1608 ; GENERIC: # %bb.0: 1609 ; GENERIC-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] 1610 ; GENERIC-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00] 1611 ; GENERIC-NEXT: retq # sched: [1:1.00] 1612 ; 1613 ; SANDY-LABEL: test_dpps: 1614 ; SANDY: # %bb.0: 1615 ; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] 1616 ; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00] 1617 ; SANDY-NEXT: retq # sched: [1:1.00] 1618 ; 1619 ; HASWELL-LABEL: test_dpps: 1620 ; HASWELL: # %bb.0: 1621 ; HASWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00] 1622 ; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [21:2.00] 1623 ; HASWELL-NEXT: retq # sched: [7:1.00] 1624 ; 1625 ; BROADWELL-LABEL: test_dpps: 1626 ; BROADWELL: # %bb.0: 1627 ; BROADWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00] 1628 ; BROADWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:2.00] 1629 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1630 ; 1631 ; SKYLAKE-LABEL: test_dpps: 1632 ; SKYLAKE: # %bb.0: 1633 ; SKYLAKE-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.50] 1634 ; SKYLAKE-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.50] 1635 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1636 ; 1637 ; SKX-LABEL: test_dpps: 1638 ; SKX: # %bb.0: 1639 ; SKX-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33] 1640 ; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33] 1641 ; SKX-NEXT: retq # sched: [7:1.00] 1642 ; 1643 ; BTVER2-LABEL: test_dpps: 1644 ; BTVER2: # %bb.0: 1645 ; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:6.00] 1646 ; BTVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [17:6.00] 1647 ; BTVER2-NEXT: retq # sched: [4:1.00] 1648 ; 1649 ; ZNVER1-LABEL: test_dpps: 1650 ; ZNVER1: # %bb.0: 1651 ; ZNVER1-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 1652 ; ZNVER1-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 1653 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1654 %1 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) 1655 %2 = load <8 x float>, <8 x float> *%a2, align 32 1656 %3 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %1, <8 x float> %2, i8 7) 1657 ret <8 x float> %3 1658 } 1659 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 1660 1661 define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x float> *%a2) { 1662 ; GENERIC-LABEL: test_extractf128: 1663 ; GENERIC: # %bb.0: 1664 ; GENERIC-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] 1665 ; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] 1666 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 1667 ; GENERIC-NEXT: retq # sched: [1:1.00] 1668 ; 1669 ; SANDY-LABEL: test_extractf128: 1670 ; SANDY: # %bb.0: 1671 ; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] 1672 ; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] 1673 ; SANDY-NEXT: vzeroupper # sched: [100:0.33] 1674 ; SANDY-NEXT: retq # sched: [1:1.00] 1675 ; 1676 ; HASWELL-LABEL: test_extractf128: 1677 ; HASWELL: # %bb.0: 1678 ; HASWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] 1679 ; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] 1680 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 1681 ; HASWELL-NEXT: retq # sched: [7:1.00] 1682 ; 1683 ; BROADWELL-LABEL: test_extractf128: 1684 ; BROADWELL: # %bb.0: 1685 ; BROADWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] 1686 ; BROADWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] 1687 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 1688 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1689 ; 1690 ; SKYLAKE-LABEL: test_extractf128: 1691 ; SKYLAKE: # %bb.0: 1692 ; SKYLAKE-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] 1693 ; SKYLAKE-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] 1694 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 1695 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1696 ; 1697 ; SKX-LABEL: test_extractf128: 1698 ; SKX: # %bb.0: 1699 ; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] 1700 ; SKX-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] 1701 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 1702 ; SKX-NEXT: retq # sched: [7:1.00] 1703 ; 1704 ; BTVER2-LABEL: test_extractf128: 1705 ; BTVER2: # %bb.0: 1706 ; BTVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50] 1707 ; BTVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] 1708 ; BTVER2-NEXT: retq # sched: [4:1.00] 1709 ; 1710 ; ZNVER1-LABEL: test_extractf128: 1711 ; ZNVER1: # %bb.0: 1712 ; ZNVER1-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.33] 1713 ; ZNVER1-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [8:0.50] 1714 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 1715 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1716 %1 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1717 %2 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1718 store <4 x float> %2, <4 x float> *%a2 1719 ret <4 x float> %1 1720 } 1721 1722 define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 1723 ; GENERIC-LABEL: test_haddpd: 1724 ; GENERIC: # %bb.0: 1725 ; GENERIC-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1726 ; GENERIC-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1727 ; GENERIC-NEXT: retq # sched: [1:1.00] 1728 ; 1729 ; SANDY-LABEL: test_haddpd: 1730 ; SANDY: # %bb.0: 1731 ; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1732 ; SANDY-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1733 ; SANDY-NEXT: retq # sched: [1:1.00] 1734 ; 1735 ; HASWELL-LABEL: test_haddpd: 1736 ; HASWELL: # %bb.0: 1737 ; HASWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1738 ; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1739 ; HASWELL-NEXT: retq # sched: [7:1.00] 1740 ; 1741 ; BROADWELL-LABEL: test_haddpd: 1742 ; BROADWELL: # %bb.0: 1743 ; BROADWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1744 ; BROADWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00] 1745 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1746 ; 1747 ; SKYLAKE-LABEL: test_haddpd: 1748 ; SKYLAKE: # %bb.0: 1749 ; SKYLAKE-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] 1750 ; SKYLAKE-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 1751 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1752 ; 1753 ; SKX-LABEL: test_haddpd: 1754 ; SKX: # %bb.0: 1755 ; SKX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] 1756 ; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 1757 ; SKX-NEXT: retq # sched: [7:1.00] 1758 ; 1759 ; BTVER2-LABEL: test_haddpd: 1760 ; BTVER2: # %bb.0: 1761 ; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 1762 ; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 1763 ; BTVER2-NEXT: retq # sched: [4:1.00] 1764 ; 1765 ; ZNVER1-LABEL: test_haddpd: 1766 ; ZNVER1: # %bb.0: 1767 ; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 1768 ; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 1769 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1770 %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) 1771 %2 = load <4 x double>, <4 x double> *%a2, align 32 1772 %3 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %1, <4 x double> %2) 1773 ret <4 x double> %3 1774 } 1775 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone 1776 1777 define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 1778 ; GENERIC-LABEL: test_haddps: 1779 ; GENERIC: # %bb.0: 1780 ; GENERIC-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1781 ; GENERIC-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1782 ; GENERIC-NEXT: retq # sched: [1:1.00] 1783 ; 1784 ; SANDY-LABEL: test_haddps: 1785 ; SANDY: # %bb.0: 1786 ; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1787 ; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1788 ; SANDY-NEXT: retq # sched: [1:1.00] 1789 ; 1790 ; HASWELL-LABEL: test_haddps: 1791 ; HASWELL: # %bb.0: 1792 ; HASWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1793 ; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1794 ; HASWELL-NEXT: retq # sched: [7:1.00] 1795 ; 1796 ; BROADWELL-LABEL: test_haddps: 1797 ; BROADWELL: # %bb.0: 1798 ; BROADWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1799 ; BROADWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [11:2.00] 1800 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1801 ; 1802 ; SKYLAKE-LABEL: test_haddps: 1803 ; SKYLAKE: # %bb.0: 1804 ; SKYLAKE-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] 1805 ; SKYLAKE-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 1806 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1807 ; 1808 ; SKX-LABEL: test_haddps: 1809 ; SKX: # %bb.0: 1810 ; SKX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] 1811 ; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 1812 ; SKX-NEXT: retq # sched: [7:1.00] 1813 ; 1814 ; BTVER2-LABEL: test_haddps: 1815 ; BTVER2: # %bb.0: 1816 ; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 1817 ; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 1818 ; BTVER2-NEXT: retq # sched: [4:1.00] 1819 ; 1820 ; ZNVER1-LABEL: test_haddps: 1821 ; ZNVER1: # %bb.0: 1822 ; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 1823 ; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 1824 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1825 %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) 1826 %2 = load <8 x float>, <8 x float> *%a2, align 32 1827 %3 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %1, <8 x float> %2) 1828 ret <8 x float> %3 1829 } 1830 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone 1831 1832 define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 1833 ; GENERIC-LABEL: test_hsubpd: 1834 ; GENERIC: # %bb.0: 1835 ; GENERIC-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1836 ; GENERIC-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1837 ; GENERIC-NEXT: retq # sched: [1:1.00] 1838 ; 1839 ; SANDY-LABEL: test_hsubpd: 1840 ; SANDY: # %bb.0: 1841 ; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1842 ; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1843 ; SANDY-NEXT: retq # sched: [1:1.00] 1844 ; 1845 ; HASWELL-LABEL: test_hsubpd: 1846 ; HASWELL: # %bb.0: 1847 ; HASWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1848 ; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1849 ; HASWELL-NEXT: retq # sched: [7:1.00] 1850 ; 1851 ; BROADWELL-LABEL: test_hsubpd: 1852 ; BROADWELL: # %bb.0: 1853 ; BROADWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1854 ; BROADWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00] 1855 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1856 ; 1857 ; SKYLAKE-LABEL: test_hsubpd: 1858 ; SKYLAKE: # %bb.0: 1859 ; SKYLAKE-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] 1860 ; SKYLAKE-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 1861 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1862 ; 1863 ; SKX-LABEL: test_hsubpd: 1864 ; SKX: # %bb.0: 1865 ; SKX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00] 1866 ; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 1867 ; SKX-NEXT: retq # sched: [7:1.00] 1868 ; 1869 ; BTVER2-LABEL: test_hsubpd: 1870 ; BTVER2: # %bb.0: 1871 ; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 1872 ; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 1873 ; BTVER2-NEXT: retq # sched: [4:1.00] 1874 ; 1875 ; ZNVER1-LABEL: test_hsubpd: 1876 ; ZNVER1: # %bb.0: 1877 ; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 1878 ; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 1879 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1880 %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) 1881 %2 = load <4 x double>, <4 x double> *%a2, align 32 1882 %3 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %1, <4 x double> %2) 1883 ret <4 x double> %3 1884 } 1885 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone 1886 1887 define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 1888 ; GENERIC-LABEL: test_hsubps: 1889 ; GENERIC: # %bb.0: 1890 ; GENERIC-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1891 ; GENERIC-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1892 ; GENERIC-NEXT: retq # sched: [1:1.00] 1893 ; 1894 ; SANDY-LABEL: test_hsubps: 1895 ; SANDY: # %bb.0: 1896 ; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1897 ; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1898 ; SANDY-NEXT: retq # sched: [1:1.00] 1899 ; 1900 ; HASWELL-LABEL: test_hsubps: 1901 ; HASWELL: # %bb.0: 1902 ; HASWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1903 ; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 1904 ; HASWELL-NEXT: retq # sched: [7:1.00] 1905 ; 1906 ; BROADWELL-LABEL: test_hsubps: 1907 ; BROADWELL: # %bb.0: 1908 ; BROADWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 1909 ; BROADWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [11:2.00] 1910 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1911 ; 1912 ; SKYLAKE-LABEL: test_hsubps: 1913 ; SKYLAKE: # %bb.0: 1914 ; SKYLAKE-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] 1915 ; SKYLAKE-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 1916 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1917 ; 1918 ; SKX-LABEL: test_hsubps: 1919 ; SKX: # %bb.0: 1920 ; SKX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00] 1921 ; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 1922 ; SKX-NEXT: retq # sched: [7:1.00] 1923 ; 1924 ; BTVER2-LABEL: test_hsubps: 1925 ; BTVER2: # %bb.0: 1926 ; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 1927 ; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 1928 ; BTVER2-NEXT: retq # sched: [4:1.00] 1929 ; 1930 ; ZNVER1-LABEL: test_hsubps: 1931 ; ZNVER1: # %bb.0: 1932 ; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 1933 ; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 1934 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1935 %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) 1936 %2 = load <8 x float>, <8 x float> *%a2, align 32 1937 %3 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %1, <8 x float> %2) 1938 ret <8 x float> %3 1939 } 1940 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone 1941 1942 define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 1943 ; GENERIC-LABEL: test_insertf128: 1944 ; GENERIC: # %bb.0: 1945 ; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] 1946 ; GENERIC-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1947 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 1948 ; GENERIC-NEXT: retq # sched: [1:1.00] 1949 ; 1950 ; SANDY-LABEL: test_insertf128: 1951 ; SANDY: # %bb.0: 1952 ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] 1953 ; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1954 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 1955 ; SANDY-NEXT: retq # sched: [1:1.00] 1956 ; 1957 ; HASWELL-LABEL: test_insertf128: 1958 ; HASWELL: # %bb.0: 1959 ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 1960 ; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1961 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 1962 ; HASWELL-NEXT: retq # sched: [7:1.00] 1963 ; 1964 ; BROADWELL-LABEL: test_insertf128: 1965 ; BROADWELL: # %bb.0: 1966 ; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 1967 ; BROADWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50] 1968 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 1969 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1970 ; 1971 ; SKYLAKE-LABEL: test_insertf128: 1972 ; SKYLAKE: # %bb.0: 1973 ; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 1974 ; SKYLAKE-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1975 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 1976 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1977 ; 1978 ; SKX-LABEL: test_insertf128: 1979 ; SKX: # %bb.0: 1980 ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 1981 ; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1982 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 1983 ; SKX-NEXT: retq # sched: [7:1.00] 1984 ; 1985 ; BTVER2-LABEL: test_insertf128: 1986 ; BTVER2: # %bb.0: 1987 ; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:0.50] 1988 ; BTVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] 1989 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 1990 ; BTVER2-NEXT: retq # sched: [4:1.00] 1991 ; 1992 ; ZNVER1-LABEL: test_insertf128: 1993 ; ZNVER1: # %bb.0: 1994 ; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.67] 1995 ; ZNVER1-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.67] 1996 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 1997 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1998 %1 = shufflevector <4 x float> %a1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 1999 %2 = shufflevector <8 x float> %a0, <8 x float> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 2000 %3 = load <4 x float>, <4 x float> *%a2, align 16 2001 %4 = shufflevector <4 x float> %3, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 2002 %5 = shufflevector <8 x float> %a0, <8 x float> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 2003 %6 = fadd <8 x float> %2, %5 2004 ret <8 x float> %6 2005 } 2006 2007 define <32 x i8> @test_lddqu(i8* %a0) { 2008 ; GENERIC-LABEL: test_lddqu: 2009 ; GENERIC: # %bb.0: 2010 ; GENERIC-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] 2011 ; GENERIC-NEXT: retq # sched: [1:1.00] 2012 ; 2013 ; SANDY-LABEL: test_lddqu: 2014 ; SANDY: # %bb.0: 2015 ; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] 2016 ; SANDY-NEXT: retq # sched: [1:1.00] 2017 ; 2018 ; HASWELL-LABEL: test_lddqu: 2019 ; HASWELL: # %bb.0: 2020 ; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] 2021 ; HASWELL-NEXT: retq # sched: [7:1.00] 2022 ; 2023 ; BROADWELL-LABEL: test_lddqu: 2024 ; BROADWELL: # %bb.0: 2025 ; BROADWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50] 2026 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2027 ; 2028 ; SKYLAKE-LABEL: test_lddqu: 2029 ; SKYLAKE: # %bb.0: 2030 ; SKYLAKE-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] 2031 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2032 ; 2033 ; SKX-LABEL: test_lddqu: 2034 ; SKX: # %bb.0: 2035 ; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50] 2036 ; SKX-NEXT: retq # sched: [7:1.00] 2037 ; 2038 ; BTVER2-LABEL: test_lddqu: 2039 ; BTVER2: # %bb.0: 2040 ; BTVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:1.00] 2041 ; BTVER2-NEXT: retq # sched: [4:1.00] 2042 ; 2043 ; ZNVER1-LABEL: test_lddqu: 2044 ; ZNVER1: # %bb.0: 2045 ; ZNVER1-NEXT: vlddqu (%rdi), %ymm0 # sched: [8:0.50] 2046 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2047 %1 = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) 2048 ret <32 x i8> %1 2049 } 2050 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly 2051 2052 define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) { 2053 ; GENERIC-LABEL: test_maskmovpd: 2054 ; GENERIC: # %bb.0: 2055 ; GENERIC-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 2056 ; GENERIC-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 2057 ; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 2058 ; GENERIC-NEXT: retq # sched: [1:1.00] 2059 ; 2060 ; SANDY-LABEL: test_maskmovpd: 2061 ; SANDY: # %bb.0: 2062 ; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 2063 ; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 2064 ; SANDY-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 2065 ; SANDY-NEXT: retq # sched: [1:1.00] 2066 ; 2067 ; HASWELL-LABEL: test_maskmovpd: 2068 ; HASWELL: # %bb.0: 2069 ; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] 2070 ; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 2071 ; HASWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 2072 ; HASWELL-NEXT: retq # sched: [7:1.00] 2073 ; 2074 ; BROADWELL-LABEL: test_maskmovpd: 2075 ; BROADWELL: # %bb.0: 2076 ; BROADWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:2.00] 2077 ; BROADWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 2078 ; BROADWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] 2079 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2080 ; 2081 ; SKYLAKE-LABEL: test_maskmovpd: 2082 ; SKYLAKE: # %bb.0: 2083 ; SKYLAKE-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 2084 ; SKYLAKE-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 2085 ; SKYLAKE-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 2086 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2087 ; 2088 ; SKX-LABEL: test_maskmovpd: 2089 ; SKX: # %bb.0: 2090 ; SKX-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 2091 ; SKX-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 2092 ; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33] 2093 ; SKX-NEXT: retq # sched: [7:1.00] 2094 ; 2095 ; BTVER2-LABEL: test_maskmovpd: 2096 ; BTVER2: # %bb.0: 2097 ; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00] 2098 ; BTVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00] 2099 ; BTVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50] 2100 ; BTVER2-NEXT: retq # sched: [4:1.00] 2101 ; 2102 ; ZNVER1-LABEL: test_maskmovpd: 2103 ; ZNVER1: # %bb.0: 2104 ; ZNVER1-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:0.50] 2105 ; ZNVER1-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:0.50] 2106 ; ZNVER1-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.25] 2107 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2108 %1 = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %a1) 2109 call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) 2110 ret <2 x double> %1 2111 } 2112 declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly 2113 declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind 2114 2115 define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) { 2116 ; GENERIC-LABEL: test_maskmovpd_ymm: 2117 ; GENERIC: # %bb.0: 2118 ; GENERIC-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] 2119 ; GENERIC-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2120 ; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 2121 ; GENERIC-NEXT: retq # sched: [1:1.00] 2122 ; 2123 ; SANDY-LABEL: test_maskmovpd_ymm: 2124 ; SANDY: # %bb.0: 2125 ; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] 2126 ; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2127 ; SANDY-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 2128 ; SANDY-NEXT: retq # sched: [1:1.00] 2129 ; 2130 ; HASWELL-LABEL: test_maskmovpd_ymm: 2131 ; HASWELL: # %bb.0: 2132 ; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:2.00] 2133 ; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2134 ; HASWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 2135 ; HASWELL-NEXT: retq # sched: [7:1.00] 2136 ; 2137 ; BROADWELL-LABEL: test_maskmovpd_ymm: 2138 ; BROADWELL: # %bb.0: 2139 ; BROADWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:2.00] 2140 ; BROADWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2141 ; BROADWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 2142 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2143 ; 2144 ; SKYLAKE-LABEL: test_maskmovpd_ymm: 2145 ; SKYLAKE: # %bb.0: 2146 ; SKYLAKE-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 2147 ; SKYLAKE-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 2148 ; SKYLAKE-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 2149 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2150 ; 2151 ; SKX-LABEL: test_maskmovpd_ymm: 2152 ; SKX: # %bb.0: 2153 ; SKX-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 2154 ; SKX-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 2155 ; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33] 2156 ; SKX-NEXT: retq # sched: [7:1.00] 2157 ; 2158 ; BTVER2-LABEL: test_maskmovpd_ymm: 2159 ; BTVER2: # %bb.0: 2160 ; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00] 2161 ; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00] 2162 ; BTVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] 2163 ; BTVER2-NEXT: retq # sched: [4:1.00] 2164 ; 2165 ; ZNVER1-LABEL: test_maskmovpd_ymm: 2166 ; ZNVER1: # %bb.0: 2167 ; ZNVER1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:1.00] 2168 ; ZNVER1-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2169 ; ZNVER1-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.25] 2170 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2171 %1 = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %a1) 2172 call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %a1, <4 x double> %a2) 2173 ret <4 x double> %1 2174 } 2175 declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly 2176 declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind 2177 2178 define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) { 2179 ; GENERIC-LABEL: test_maskmovps: 2180 ; GENERIC: # %bb.0: 2181 ; GENERIC-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 2182 ; GENERIC-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 2183 ; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 2184 ; GENERIC-NEXT: retq # sched: [1:1.00] 2185 ; 2186 ; SANDY-LABEL: test_maskmovps: 2187 ; SANDY: # %bb.0: 2188 ; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 2189 ; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 2190 ; SANDY-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 2191 ; SANDY-NEXT: retq # sched: [1:1.00] 2192 ; 2193 ; HASWELL-LABEL: test_maskmovps: 2194 ; HASWELL: # %bb.0: 2195 ; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00] 2196 ; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 2197 ; HASWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 2198 ; HASWELL-NEXT: retq # sched: [7:1.00] 2199 ; 2200 ; BROADWELL-LABEL: test_maskmovps: 2201 ; BROADWELL: # %bb.0: 2202 ; BROADWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:2.00] 2203 ; BROADWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 2204 ; BROADWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] 2205 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2206 ; 2207 ; SKYLAKE-LABEL: test_maskmovps: 2208 ; SKYLAKE: # %bb.0: 2209 ; SKYLAKE-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 2210 ; SKYLAKE-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 2211 ; SKYLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 2212 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2213 ; 2214 ; SKX-LABEL: test_maskmovps: 2215 ; SKX: # %bb.0: 2216 ; SKX-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 2217 ; SKX-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 2218 ; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] 2219 ; SKX-NEXT: retq # sched: [7:1.00] 2220 ; 2221 ; BTVER2-LABEL: test_maskmovps: 2222 ; BTVER2: # %bb.0: 2223 ; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00] 2224 ; BTVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00] 2225 ; BTVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50] 2226 ; BTVER2-NEXT: retq # sched: [4:1.00] 2227 ; 2228 ; ZNVER1-LABEL: test_maskmovps: 2229 ; ZNVER1: # %bb.0: 2230 ; ZNVER1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:0.50] 2231 ; ZNVER1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:0.50] 2232 ; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.25] 2233 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2234 %1 = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %a1) 2235 call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) 2236 ret <4 x float> %1 2237 } 2238 declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly 2239 declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind 2240 2241 define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) { 2242 ; GENERIC-LABEL: test_maskmovps_ymm: 2243 ; GENERIC: # %bb.0: 2244 ; GENERIC-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00] 2245 ; GENERIC-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2246 ; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 2247 ; GENERIC-NEXT: retq # sched: [1:1.00] 2248 ; 2249 ; SANDY-LABEL: test_maskmovps_ymm: 2250 ; SANDY: # %bb.0: 2251 ; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00] 2252 ; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2253 ; SANDY-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 2254 ; SANDY-NEXT: retq # sched: [1:1.00] 2255 ; 2256 ; HASWELL-LABEL: test_maskmovps_ymm: 2257 ; HASWELL: # %bb.0: 2258 ; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:2.00] 2259 ; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2260 ; HASWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 2261 ; HASWELL-NEXT: retq # sched: [7:1.00] 2262 ; 2263 ; BROADWELL-LABEL: test_maskmovps_ymm: 2264 ; BROADWELL: # %bb.0: 2265 ; BROADWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:2.00] 2266 ; BROADWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2267 ; BROADWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 2268 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2269 ; 2270 ; SKYLAKE-LABEL: test_maskmovps_ymm: 2271 ; SKYLAKE: # %bb.0: 2272 ; SKYLAKE-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 2273 ; SKYLAKE-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 2274 ; SKYLAKE-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 2275 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2276 ; 2277 ; SKX-LABEL: test_maskmovps_ymm: 2278 ; SKX: # %bb.0: 2279 ; SKX-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 2280 ; SKX-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 2281 ; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33] 2282 ; SKX-NEXT: retq # sched: [7:1.00] 2283 ; 2284 ; BTVER2-LABEL: test_maskmovps_ymm: 2285 ; BTVER2: # %bb.0: 2286 ; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00] 2287 ; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00] 2288 ; BTVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] 2289 ; BTVER2-NEXT: retq # sched: [4:1.00] 2290 ; 2291 ; ZNVER1-LABEL: test_maskmovps_ymm: 2292 ; ZNVER1: # %bb.0: 2293 ; ZNVER1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:1.00] 2294 ; ZNVER1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 2295 ; ZNVER1-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.25] 2296 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2297 %1 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %a1) 2298 call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %a1, <8 x float> %a2) 2299 ret <8 x float> %1 2300 } 2301 declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly 2302 declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind 2303 2304 define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 2305 ; GENERIC-LABEL: test_maxpd: 2306 ; GENERIC: # %bb.0: 2307 ; GENERIC-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2308 ; GENERIC-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2309 ; GENERIC-NEXT: retq # sched: [1:1.00] 2310 ; 2311 ; SANDY-LABEL: test_maxpd: 2312 ; SANDY: # %bb.0: 2313 ; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2314 ; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2315 ; SANDY-NEXT: retq # sched: [1:1.00] 2316 ; 2317 ; HASWELL-LABEL: test_maxpd: 2318 ; HASWELL: # %bb.0: 2319 ; HASWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2320 ; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2321 ; HASWELL-NEXT: retq # sched: [7:1.00] 2322 ; 2323 ; BROADWELL-LABEL: test_maxpd: 2324 ; BROADWELL: # %bb.0: 2325 ; BROADWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2326 ; BROADWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 2327 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2328 ; 2329 ; SKYLAKE-LABEL: test_maxpd: 2330 ; SKYLAKE: # %bb.0: 2331 ; SKYLAKE-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2332 ; SKYLAKE-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 2333 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2334 ; 2335 ; SKX-LABEL: test_maxpd: 2336 ; SKX: # %bb.0: 2337 ; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2338 ; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 2339 ; SKX-NEXT: retq # sched: [7:1.00] 2340 ; 2341 ; BTVER2-LABEL: test_maxpd: 2342 ; BTVER2: # %bb.0: 2343 ; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 2344 ; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] 2345 ; BTVER2-NEXT: retq # sched: [4:1.00] 2346 ; 2347 ; ZNVER1-LABEL: test_maxpd: 2348 ; ZNVER1: # %bb.0: 2349 ; ZNVER1-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2350 ; ZNVER1-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2351 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2352 %1 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) 2353 %2 = load <4 x double>, <4 x double> *%a2, align 32 2354 %3 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %1, <4 x double> %2) 2355 ret <4 x double> %3 2356 } 2357 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone 2358 2359 define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 2360 ; GENERIC-LABEL: test_maxps: 2361 ; GENERIC: # %bb.0: 2362 ; GENERIC-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2363 ; GENERIC-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2364 ; GENERIC-NEXT: retq # sched: [1:1.00] 2365 ; 2366 ; SANDY-LABEL: test_maxps: 2367 ; SANDY: # %bb.0: 2368 ; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2369 ; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2370 ; SANDY-NEXT: retq # sched: [1:1.00] 2371 ; 2372 ; HASWELL-LABEL: test_maxps: 2373 ; HASWELL: # %bb.0: 2374 ; HASWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2375 ; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2376 ; HASWELL-NEXT: retq # sched: [7:1.00] 2377 ; 2378 ; BROADWELL-LABEL: test_maxps: 2379 ; BROADWELL: # %bb.0: 2380 ; BROADWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2381 ; BROADWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 2382 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2383 ; 2384 ; SKYLAKE-LABEL: test_maxps: 2385 ; SKYLAKE: # %bb.0: 2386 ; SKYLAKE-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2387 ; SKYLAKE-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 2388 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2389 ; 2390 ; SKX-LABEL: test_maxps: 2391 ; SKX: # %bb.0: 2392 ; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2393 ; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 2394 ; SKX-NEXT: retq # sched: [7:1.00] 2395 ; 2396 ; BTVER2-LABEL: test_maxps: 2397 ; BTVER2: # %bb.0: 2398 ; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 2399 ; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] 2400 ; BTVER2-NEXT: retq # sched: [4:1.00] 2401 ; 2402 ; ZNVER1-LABEL: test_maxps: 2403 ; ZNVER1: # %bb.0: 2404 ; ZNVER1-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2405 ; ZNVER1-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2406 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2407 %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) 2408 %2 = load <8 x float>, <8 x float> *%a2, align 32 2409 %3 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %1, <8 x float> %2) 2410 ret <8 x float> %3 2411 } 2412 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone 2413 2414 define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 2415 ; GENERIC-LABEL: test_minpd: 2416 ; GENERIC: # %bb.0: 2417 ; GENERIC-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2418 ; GENERIC-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2419 ; GENERIC-NEXT: retq # sched: [1:1.00] 2420 ; 2421 ; SANDY-LABEL: test_minpd: 2422 ; SANDY: # %bb.0: 2423 ; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2424 ; SANDY-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2425 ; SANDY-NEXT: retq # sched: [1:1.00] 2426 ; 2427 ; HASWELL-LABEL: test_minpd: 2428 ; HASWELL: # %bb.0: 2429 ; HASWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2430 ; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2431 ; HASWELL-NEXT: retq # sched: [7:1.00] 2432 ; 2433 ; BROADWELL-LABEL: test_minpd: 2434 ; BROADWELL: # %bb.0: 2435 ; BROADWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2436 ; BROADWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 2437 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2438 ; 2439 ; SKYLAKE-LABEL: test_minpd: 2440 ; SKYLAKE: # %bb.0: 2441 ; SKYLAKE-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2442 ; SKYLAKE-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 2443 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2444 ; 2445 ; SKX-LABEL: test_minpd: 2446 ; SKX: # %bb.0: 2447 ; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2448 ; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 2449 ; SKX-NEXT: retq # sched: [7:1.00] 2450 ; 2451 ; BTVER2-LABEL: test_minpd: 2452 ; BTVER2: # %bb.0: 2453 ; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 2454 ; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00] 2455 ; BTVER2-NEXT: retq # sched: [4:1.00] 2456 ; 2457 ; ZNVER1-LABEL: test_minpd: 2458 ; ZNVER1: # %bb.0: 2459 ; ZNVER1-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2460 ; ZNVER1-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2461 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2462 %1 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) 2463 %2 = load <4 x double>, <4 x double> *%a2, align 32 2464 %3 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %1, <4 x double> %2) 2465 ret <4 x double> %3 2466 } 2467 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone 2468 2469 define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 2470 ; GENERIC-LABEL: test_minps: 2471 ; GENERIC: # %bb.0: 2472 ; GENERIC-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2473 ; GENERIC-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2474 ; GENERIC-NEXT: retq # sched: [1:1.00] 2475 ; 2476 ; SANDY-LABEL: test_minps: 2477 ; SANDY: # %bb.0: 2478 ; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2479 ; SANDY-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2480 ; SANDY-NEXT: retq # sched: [1:1.00] 2481 ; 2482 ; HASWELL-LABEL: test_minps: 2483 ; HASWELL: # %bb.0: 2484 ; HASWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2485 ; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2486 ; HASWELL-NEXT: retq # sched: [7:1.00] 2487 ; 2488 ; BROADWELL-LABEL: test_minps: 2489 ; BROADWELL: # %bb.0: 2490 ; BROADWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2491 ; BROADWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 2492 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2493 ; 2494 ; SKYLAKE-LABEL: test_minps: 2495 ; SKYLAKE: # %bb.0: 2496 ; SKYLAKE-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2497 ; SKYLAKE-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 2498 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2499 ; 2500 ; SKX-LABEL: test_minps: 2501 ; SKX: # %bb.0: 2502 ; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2503 ; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 2504 ; SKX-NEXT: retq # sched: [7:1.00] 2505 ; 2506 ; BTVER2-LABEL: test_minps: 2507 ; BTVER2: # %bb.0: 2508 ; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 2509 ; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] 2510 ; BTVER2-NEXT: retq # sched: [4:1.00] 2511 ; 2512 ; ZNVER1-LABEL: test_minps: 2513 ; ZNVER1: # %bb.0: 2514 ; ZNVER1-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2515 ; ZNVER1-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2516 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2517 %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) 2518 %2 = load <8 x float>, <8 x float> *%a2, align 32 2519 %3 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %1, <8 x float> %2) 2520 ret <8 x float> %3 2521 } 2522 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone 2523 2524 define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { 2525 ; GENERIC-LABEL: test_movapd: 2526 ; GENERIC: # %bb.0: 2527 ; GENERIC-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] 2528 ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2529 ; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] 2530 ; GENERIC-NEXT: retq # sched: [1:1.00] 2531 ; 2532 ; SANDY-LABEL: test_movapd: 2533 ; SANDY: # %bb.0: 2534 ; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] 2535 ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2536 ; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] 2537 ; SANDY-NEXT: retq # sched: [1:1.00] 2538 ; 2539 ; HASWELL-LABEL: test_movapd: 2540 ; HASWELL: # %bb.0: 2541 ; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] 2542 ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2543 ; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] 2544 ; HASWELL-NEXT: retq # sched: [7:1.00] 2545 ; 2546 ; BROADWELL-LABEL: test_movapd: 2547 ; BROADWELL: # %bb.0: 2548 ; BROADWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [6:0.50] 2549 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2550 ; BROADWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] 2551 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2552 ; 2553 ; SKYLAKE-LABEL: test_movapd: 2554 ; SKYLAKE: # %bb.0: 2555 ; SKYLAKE-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] 2556 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 2557 ; SKYLAKE-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] 2558 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2559 ; 2560 ; SKX-LABEL: test_movapd: 2561 ; SKX: # %bb.0: 2562 ; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] 2563 ; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 2564 ; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] 2565 ; SKX-NEXT: retq # sched: [7:1.00] 2566 ; 2567 ; BTVER2-LABEL: test_movapd: 2568 ; BTVER2: # %bb.0: 2569 ; BTVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:1.00] 2570 ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] 2571 ; BTVER2-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] 2572 ; BTVER2-NEXT: retq # sched: [4:1.00] 2573 ; 2574 ; ZNVER1-LABEL: test_movapd: 2575 ; ZNVER1: # %bb.0: 2576 ; ZNVER1-NEXT: vmovapd (%rdi), %ymm0 # sched: [8:0.50] 2577 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2578 ; ZNVER1-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:0.50] 2579 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2580 %1 = load <4 x double>, <4 x double> *%a0, align 32 2581 %2 = fadd <4 x double> %1, %1 2582 store <4 x double> %2, <4 x double> *%a1, align 32 2583 ret <4 x double> %2 2584 } 2585 2586 define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { 2587 ; GENERIC-LABEL: test_movaps: 2588 ; GENERIC: # %bb.0: 2589 ; GENERIC-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] 2590 ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2591 ; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] 2592 ; GENERIC-NEXT: retq # sched: [1:1.00] 2593 ; 2594 ; SANDY-LABEL: test_movaps: 2595 ; SANDY: # %bb.0: 2596 ; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] 2597 ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2598 ; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] 2599 ; SANDY-NEXT: retq # sched: [1:1.00] 2600 ; 2601 ; HASWELL-LABEL: test_movaps: 2602 ; HASWELL: # %bb.0: 2603 ; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] 2604 ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2605 ; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] 2606 ; HASWELL-NEXT: retq # sched: [7:1.00] 2607 ; 2608 ; BROADWELL-LABEL: test_movaps: 2609 ; BROADWELL: # %bb.0: 2610 ; BROADWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [6:0.50] 2611 ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2612 ; BROADWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] 2613 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2614 ; 2615 ; SKYLAKE-LABEL: test_movaps: 2616 ; SKYLAKE: # %bb.0: 2617 ; SKYLAKE-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] 2618 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 2619 ; SKYLAKE-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] 2620 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2621 ; 2622 ; SKX-LABEL: test_movaps: 2623 ; SKX: # %bb.0: 2624 ; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] 2625 ; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 2626 ; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] 2627 ; SKX-NEXT: retq # sched: [7:1.00] 2628 ; 2629 ; BTVER2-LABEL: test_movaps: 2630 ; BTVER2: # %bb.0: 2631 ; BTVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:1.00] 2632 ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] 2633 ; BTVER2-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] 2634 ; BTVER2-NEXT: retq # sched: [4:1.00] 2635 ; 2636 ; ZNVER1-LABEL: test_movaps: 2637 ; ZNVER1: # %bb.0: 2638 ; ZNVER1-NEXT: vmovaps (%rdi), %ymm0 # sched: [8:0.50] 2639 ; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2640 ; ZNVER1-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:0.50] 2641 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2642 %1 = load <8 x float>, <8 x float> *%a0, align 32 2643 %2 = fadd <8 x float> %1, %1 2644 store <8 x float> %2, <8 x float> *%a1, align 32 2645 ret <8 x float> %2 2646 } 2647 2648 define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { 2649 ; GENERIC-LABEL: test_movddup: 2650 ; GENERIC: # %bb.0: 2651 ; GENERIC-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] 2652 ; GENERIC-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] 2653 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2654 ; GENERIC-NEXT: retq # sched: [1:1.00] 2655 ; 2656 ; SANDY-LABEL: test_movddup: 2657 ; SANDY: # %bb.0: 2658 ; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] 2659 ; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] 2660 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2661 ; SANDY-NEXT: retq # sched: [1:1.00] 2662 ; 2663 ; HASWELL-LABEL: test_movddup: 2664 ; HASWELL: # %bb.0: 2665 ; HASWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] 2666 ; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] 2667 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2668 ; HASWELL-NEXT: retq # sched: [7:1.00] 2669 ; 2670 ; BROADWELL-LABEL: test_movddup: 2671 ; BROADWELL: # %bb.0: 2672 ; BROADWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] 2673 ; BROADWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:0.50] 2674 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2675 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2676 ; 2677 ; SKYLAKE-LABEL: test_movddup: 2678 ; SKYLAKE: # %bb.0: 2679 ; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] 2680 ; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] 2681 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2682 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2683 ; 2684 ; SKX-LABEL: test_movddup: 2685 ; SKX: # %bb.0: 2686 ; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] 2687 ; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] 2688 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2689 ; SKX-NEXT: retq # sched: [7:1.00] 2690 ; 2691 ; BTVER2-LABEL: test_movddup: 2692 ; BTVER2: # %bb.0: 2693 ; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:2.00] 2694 ; BTVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] 2695 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 2696 ; BTVER2-NEXT: retq # sched: [4:1.00] 2697 ; 2698 ; ZNVER1-LABEL: test_movddup: 2699 ; ZNVER1: # %bb.0: 2700 ; ZNVER1-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [8:0.50] 2701 ; ZNVER1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:0.50] 2702 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2703 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2704 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 2705 %2 = load <4 x double>, <4 x double> *%a1, align 32 2706 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 2707 %4 = fadd <4 x double> %1, %3 2708 ret <4 x double> %4 2709 } 2710 2711 define i32 @test_movmskpd(<4 x double> %a0) { 2712 ; GENERIC-LABEL: test_movmskpd: 2713 ; GENERIC: # %bb.0: 2714 ; GENERIC-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] 2715 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 2716 ; GENERIC-NEXT: retq # sched: [1:1.00] 2717 ; 2718 ; SANDY-LABEL: test_movmskpd: 2719 ; SANDY: # %bb.0: 2720 ; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] 2721 ; SANDY-NEXT: vzeroupper # sched: [100:0.33] 2722 ; SANDY-NEXT: retq # sched: [1:1.00] 2723 ; 2724 ; HASWELL-LABEL: test_movmskpd: 2725 ; HASWELL: # %bb.0: 2726 ; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] 2727 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 2728 ; HASWELL-NEXT: retq # sched: [7:1.00] 2729 ; 2730 ; BROADWELL-LABEL: test_movmskpd: 2731 ; BROADWELL: # %bb.0: 2732 ; BROADWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] 2733 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 2734 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2735 ; 2736 ; SKYLAKE-LABEL: test_movmskpd: 2737 ; SKYLAKE: # %bb.0: 2738 ; SKYLAKE-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] 2739 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 2740 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2741 ; 2742 ; SKX-LABEL: test_movmskpd: 2743 ; SKX: # %bb.0: 2744 ; SKX-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] 2745 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 2746 ; SKX-NEXT: retq # sched: [7:1.00] 2747 ; 2748 ; BTVER2-LABEL: test_movmskpd: 2749 ; BTVER2: # %bb.0: 2750 ; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] 2751 ; BTVER2-NEXT: retq # sched: [4:1.00] 2752 ; 2753 ; ZNVER1-LABEL: test_movmskpd: 2754 ; ZNVER1: # %bb.0: 2755 ; ZNVER1-NEXT: vmovmskpd %ymm0, %eax # sched: [1:1.00] 2756 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 2757 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2758 %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) 2759 ret i32 %1 2760 } 2761 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone 2762 2763 define i32 @test_movmskps(<8 x float> %a0) { 2764 ; GENERIC-LABEL: test_movmskps: 2765 ; GENERIC: # %bb.0: 2766 ; GENERIC-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] 2767 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 2768 ; GENERIC-NEXT: retq # sched: [1:1.00] 2769 ; 2770 ; SANDY-LABEL: test_movmskps: 2771 ; SANDY: # %bb.0: 2772 ; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] 2773 ; SANDY-NEXT: vzeroupper # sched: [100:0.33] 2774 ; SANDY-NEXT: retq # sched: [1:1.00] 2775 ; 2776 ; HASWELL-LABEL: test_movmskps: 2777 ; HASWELL: # %bb.0: 2778 ; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] 2779 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 2780 ; HASWELL-NEXT: retq # sched: [7:1.00] 2781 ; 2782 ; BROADWELL-LABEL: test_movmskps: 2783 ; BROADWELL: # %bb.0: 2784 ; BROADWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] 2785 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 2786 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2787 ; 2788 ; SKYLAKE-LABEL: test_movmskps: 2789 ; SKYLAKE: # %bb.0: 2790 ; SKYLAKE-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] 2791 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 2792 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2793 ; 2794 ; SKX-LABEL: test_movmskps: 2795 ; SKX: # %bb.0: 2796 ; SKX-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] 2797 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 2798 ; SKX-NEXT: retq # sched: [7:1.00] 2799 ; 2800 ; BTVER2-LABEL: test_movmskps: 2801 ; BTVER2: # %bb.0: 2802 ; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] 2803 ; BTVER2-NEXT: retq # sched: [4:1.00] 2804 ; 2805 ; ZNVER1-LABEL: test_movmskps: 2806 ; ZNVER1: # %bb.0: 2807 ; ZNVER1-NEXT: vmovmskps %ymm0, %eax # sched: [1:1.00] 2808 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 2809 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2810 %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) 2811 ret i32 %1 2812 } 2813 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone 2814 2815 define void @test_movntdq(<4 x i64> %a0, <4 x i64> *%a1) { 2816 ; GENERIC-LABEL: test_movntdq: 2817 ; GENERIC: # %bb.0: 2818 ; GENERIC-NEXT: #APP 2819 ; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] 2820 ; GENERIC-NEXT: #NO_APP 2821 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 2822 ; GENERIC-NEXT: retq # sched: [1:1.00] 2823 ; 2824 ; SANDY-LABEL: test_movntdq: 2825 ; SANDY: # %bb.0: 2826 ; SANDY-NEXT: #APP 2827 ; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] 2828 ; SANDY-NEXT: #NO_APP 2829 ; SANDY-NEXT: vzeroupper # sched: [100:0.33] 2830 ; SANDY-NEXT: retq # sched: [1:1.00] 2831 ; 2832 ; HASWELL-LABEL: test_movntdq: 2833 ; HASWELL: # %bb.0: 2834 ; HASWELL-NEXT: #APP 2835 ; HASWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] 2836 ; HASWELL-NEXT: #NO_APP 2837 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 2838 ; HASWELL-NEXT: retq # sched: [7:1.00] 2839 ; 2840 ; BROADWELL-LABEL: test_movntdq: 2841 ; BROADWELL: # %bb.0: 2842 ; BROADWELL-NEXT: #APP 2843 ; BROADWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] 2844 ; BROADWELL-NEXT: #NO_APP 2845 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 2846 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2847 ; 2848 ; SKYLAKE-LABEL: test_movntdq: 2849 ; SKYLAKE: # %bb.0: 2850 ; SKYLAKE-NEXT: #APP 2851 ; SKYLAKE-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] 2852 ; SKYLAKE-NEXT: #NO_APP 2853 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 2854 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2855 ; 2856 ; SKX-LABEL: test_movntdq: 2857 ; SKX: # %bb.0: 2858 ; SKX-NEXT: #APP 2859 ; SKX-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] 2860 ; SKX-NEXT: #NO_APP 2861 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 2862 ; SKX-NEXT: retq # sched: [7:1.00] 2863 ; 2864 ; BTVER2-LABEL: test_movntdq: 2865 ; BTVER2: # %bb.0: 2866 ; BTVER2-NEXT: #APP 2867 ; BTVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [2:2.00] 2868 ; BTVER2-NEXT: #NO_APP 2869 ; BTVER2-NEXT: retq # sched: [4:1.00] 2870 ; 2871 ; ZNVER1-LABEL: test_movntdq: 2872 ; ZNVER1: # %bb.0: 2873 ; ZNVER1-NEXT: #APP 2874 ; ZNVER1-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:0.50] 2875 ; ZNVER1-NEXT: #NO_APP 2876 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 2877 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2878 call void asm sideeffect "vmovntdq $0, $1", "x,*m"(<4 x i64> %a0, <4 x i64> *%a1) 2879 ret void 2880 } 2881 2882 define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { 2883 ; GENERIC-LABEL: test_movntpd: 2884 ; GENERIC: # %bb.0: 2885 ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2886 ; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] 2887 ; GENERIC-NEXT: retq # sched: [1:1.00] 2888 ; 2889 ; SANDY-LABEL: test_movntpd: 2890 ; SANDY: # %bb.0: 2891 ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2892 ; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] 2893 ; SANDY-NEXT: retq # sched: [1:1.00] 2894 ; 2895 ; HASWELL-LABEL: test_movntpd: 2896 ; HASWELL: # %bb.0: 2897 ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2898 ; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] 2899 ; HASWELL-NEXT: retq # sched: [7:1.00] 2900 ; 2901 ; BROADWELL-LABEL: test_movntpd: 2902 ; BROADWELL: # %bb.0: 2903 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2904 ; BROADWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] 2905 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2906 ; 2907 ; SKYLAKE-LABEL: test_movntpd: 2908 ; SKYLAKE: # %bb.0: 2909 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 2910 ; SKYLAKE-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] 2911 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2912 ; 2913 ; SKX-LABEL: test_movntpd: 2914 ; SKX: # %bb.0: 2915 ; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 2916 ; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] 2917 ; SKX-NEXT: retq # sched: [7:1.00] 2918 ; 2919 ; BTVER2-LABEL: test_movntpd: 2920 ; BTVER2: # %bb.0: 2921 ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] 2922 ; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00] 2923 ; BTVER2-NEXT: retq # sched: [4:1.00] 2924 ; 2925 ; ZNVER1-LABEL: test_movntpd: 2926 ; ZNVER1: # %bb.0: 2927 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2928 ; ZNVER1-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:0.50] 2929 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2930 %1 = fadd <4 x double> %a0, %a0 2931 store <4 x double> %1, <4 x double> *%a1, align 32, !nontemporal !0 2932 ret <4 x double> %1 2933 } 2934 2935 define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { 2936 ; GENERIC-LABEL: test_movntps: 2937 ; GENERIC: # %bb.0: 2938 ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2939 ; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] 2940 ; GENERIC-NEXT: retq # sched: [1:1.00] 2941 ; 2942 ; SANDY-LABEL: test_movntps: 2943 ; SANDY: # %bb.0: 2944 ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2945 ; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] 2946 ; SANDY-NEXT: retq # sched: [1:1.00] 2947 ; 2948 ; HASWELL-LABEL: test_movntps: 2949 ; HASWELL: # %bb.0: 2950 ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2951 ; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] 2952 ; HASWELL-NEXT: retq # sched: [7:1.00] 2953 ; 2954 ; BROADWELL-LABEL: test_movntps: 2955 ; BROADWELL: # %bb.0: 2956 ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2957 ; BROADWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] 2958 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2959 ; 2960 ; SKYLAKE-LABEL: test_movntps: 2961 ; SKYLAKE: # %bb.0: 2962 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 2963 ; SKYLAKE-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] 2964 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2965 ; 2966 ; SKX-LABEL: test_movntps: 2967 ; SKX: # %bb.0: 2968 ; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 2969 ; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] 2970 ; SKX-NEXT: retq # sched: [7:1.00] 2971 ; 2972 ; BTVER2-LABEL: test_movntps: 2973 ; BTVER2: # %bb.0: 2974 ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] 2975 ; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00] 2976 ; BTVER2-NEXT: retq # sched: [4:1.00] 2977 ; 2978 ; ZNVER1-LABEL: test_movntps: 2979 ; ZNVER1: # %bb.0: 2980 ; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 2981 ; ZNVER1-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:0.50] 2982 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2983 %1 = fadd <8 x float> %a0, %a0 2984 store <8 x float> %1, <8 x float> *%a1, align 32, !nontemporal !0 2985 ret <8 x float> %1 2986 } 2987 2988 define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { 2989 ; GENERIC-LABEL: test_movshdup: 2990 ; GENERIC: # %bb.0: 2991 ; GENERIC-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] 2992 ; GENERIC-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] 2993 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2994 ; GENERIC-NEXT: retq # sched: [1:1.00] 2995 ; 2996 ; SANDY-LABEL: test_movshdup: 2997 ; SANDY: # %bb.0: 2998 ; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] 2999 ; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] 3000 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3001 ; SANDY-NEXT: retq # sched: [1:1.00] 3002 ; 3003 ; HASWELL-LABEL: test_movshdup: 3004 ; HASWELL: # %bb.0: 3005 ; HASWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] 3006 ; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] 3007 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3008 ; HASWELL-NEXT: retq # sched: [7:1.00] 3009 ; 3010 ; BROADWELL-LABEL: test_movshdup: 3011 ; BROADWELL: # %bb.0: 3012 ; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] 3013 ; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:0.50] 3014 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3015 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3016 ; 3017 ; SKYLAKE-LABEL: test_movshdup: 3018 ; SKYLAKE: # %bb.0: 3019 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] 3020 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] 3021 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3022 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3023 ; 3024 ; SKX-LABEL: test_movshdup: 3025 ; SKX: # %bb.0: 3026 ; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] 3027 ; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] 3028 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3029 ; SKX-NEXT: retq # sched: [7:1.00] 3030 ; 3031 ; BTVER2-LABEL: test_movshdup: 3032 ; BTVER2: # %bb.0: 3033 ; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:2.00] 3034 ; BTVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] 3035 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3036 ; BTVER2-NEXT: retq # sched: [4:1.00] 3037 ; 3038 ; ZNVER1-LABEL: test_movshdup: 3039 ; ZNVER1: # %bb.0: 3040 ; ZNVER1-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [8:0.50] 3041 ; ZNVER1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:0.50] 3042 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3043 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3044 %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 3045 %2 = load <8 x float>, <8 x float> *%a1, align 32 3046 %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 3047 %4 = fadd <8 x float> %1, %3 3048 ret <8 x float> %4 3049 } 3050 3051 define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { 3052 ; GENERIC-LABEL: test_movsldup: 3053 ; GENERIC: # %bb.0: 3054 ; GENERIC-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] 3055 ; GENERIC-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] 3056 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3057 ; GENERIC-NEXT: retq # sched: [1:1.00] 3058 ; 3059 ; SANDY-LABEL: test_movsldup: 3060 ; SANDY: # %bb.0: 3061 ; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] 3062 ; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] 3063 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3064 ; SANDY-NEXT: retq # sched: [1:1.00] 3065 ; 3066 ; HASWELL-LABEL: test_movsldup: 3067 ; HASWELL: # %bb.0: 3068 ; HASWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] 3069 ; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] 3070 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3071 ; HASWELL-NEXT: retq # sched: [7:1.00] 3072 ; 3073 ; BROADWELL-LABEL: test_movsldup: 3074 ; BROADWELL: # %bb.0: 3075 ; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] 3076 ; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:0.50] 3077 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3078 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3079 ; 3080 ; SKYLAKE-LABEL: test_movsldup: 3081 ; SKYLAKE: # %bb.0: 3082 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] 3083 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] 3084 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3085 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3086 ; 3087 ; SKX-LABEL: test_movsldup: 3088 ; SKX: # %bb.0: 3089 ; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] 3090 ; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] 3091 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3092 ; SKX-NEXT: retq # sched: [7:1.00] 3093 ; 3094 ; BTVER2-LABEL: test_movsldup: 3095 ; BTVER2: # %bb.0: 3096 ; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:2.00] 3097 ; BTVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] 3098 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3099 ; BTVER2-NEXT: retq # sched: [4:1.00] 3100 ; 3101 ; ZNVER1-LABEL: test_movsldup: 3102 ; ZNVER1: # %bb.0: 3103 ; ZNVER1-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [8:0.50] 3104 ; ZNVER1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:0.50] 3105 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3106 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3107 %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 3108 %2 = load <8 x float>, <8 x float> *%a1, align 32 3109 %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 3110 %4 = fadd <8 x float> %1, %3 3111 ret <8 x float> %4 3112 } 3113 3114 define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { 3115 ; GENERIC-LABEL: test_movupd: 3116 ; GENERIC: # %bb.0: 3117 ; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] 3118 ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3119 ; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] 3120 ; GENERIC-NEXT: retq # sched: [1:1.00] 3121 ; 3122 ; SANDY-LABEL: test_movupd: 3123 ; SANDY: # %bb.0: 3124 ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] 3125 ; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3126 ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3127 ; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] 3128 ; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] 3129 ; SANDY-NEXT: retq # sched: [1:1.00] 3130 ; 3131 ; HASWELL-LABEL: test_movupd: 3132 ; HASWELL: # %bb.0: 3133 ; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] 3134 ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3135 ; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] 3136 ; HASWELL-NEXT: retq # sched: [7:1.00] 3137 ; 3138 ; BROADWELL-LABEL: test_movupd: 3139 ; BROADWELL: # %bb.0: 3140 ; BROADWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [6:0.50] 3141 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3142 ; BROADWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] 3143 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3144 ; 3145 ; SKYLAKE-LABEL: test_movupd: 3146 ; SKYLAKE: # %bb.0: 3147 ; SKYLAKE-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] 3148 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 3149 ; SKYLAKE-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] 3150 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3151 ; 3152 ; SKX-LABEL: test_movupd: 3153 ; SKX: # %bb.0: 3154 ; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] 3155 ; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 3156 ; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] 3157 ; SKX-NEXT: retq # sched: [7:1.00] 3158 ; 3159 ; BTVER2-LABEL: test_movupd: 3160 ; BTVER2: # %bb.0: 3161 ; BTVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:1.00] 3162 ; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00] 3163 ; BTVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] 3164 ; BTVER2-NEXT: retq # sched: [4:1.00] 3165 ; 3166 ; ZNVER1-LABEL: test_movupd: 3167 ; ZNVER1: # %bb.0: 3168 ; ZNVER1-NEXT: vmovupd (%rdi), %ymm0 # sched: [8:0.50] 3169 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3170 ; ZNVER1-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:0.50] 3171 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3172 %1 = load <4 x double>, <4 x double> *%a0, align 1 3173 %2 = fadd <4 x double> %1, %1 3174 store <4 x double> %2, <4 x double> *%a1, align 1 3175 ret <4 x double> %2 3176 } 3177 3178 define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { 3179 ; GENERIC-LABEL: test_movups: 3180 ; GENERIC: # %bb.0: 3181 ; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] 3182 ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3183 ; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] 3184 ; GENERIC-NEXT: retq # sched: [1:1.00] 3185 ; 3186 ; SANDY-LABEL: test_movups: 3187 ; SANDY: # %bb.0: 3188 ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] 3189 ; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3190 ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3191 ; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] 3192 ; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] 3193 ; SANDY-NEXT: retq # sched: [1:1.00] 3194 ; 3195 ; HASWELL-LABEL: test_movups: 3196 ; HASWELL: # %bb.0: 3197 ; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] 3198 ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3199 ; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] 3200 ; HASWELL-NEXT: retq # sched: [7:1.00] 3201 ; 3202 ; BROADWELL-LABEL: test_movups: 3203 ; BROADWELL: # %bb.0: 3204 ; BROADWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [6:0.50] 3205 ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3206 ; BROADWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] 3207 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3208 ; 3209 ; SKYLAKE-LABEL: test_movups: 3210 ; SKYLAKE: # %bb.0: 3211 ; SKYLAKE-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] 3212 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 3213 ; SKYLAKE-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] 3214 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3215 ; 3216 ; SKX-LABEL: test_movups: 3217 ; SKX: # %bb.0: 3218 ; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] 3219 ; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 3220 ; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] 3221 ; SKX-NEXT: retq # sched: [7:1.00] 3222 ; 3223 ; BTVER2-LABEL: test_movups: 3224 ; BTVER2: # %bb.0: 3225 ; BTVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:1.00] 3226 ; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00] 3227 ; BTVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] 3228 ; BTVER2-NEXT: retq # sched: [4:1.00] 3229 ; 3230 ; ZNVER1-LABEL: test_movups: 3231 ; ZNVER1: # %bb.0: 3232 ; ZNVER1-NEXT: vmovups (%rdi), %ymm0 # sched: [8:0.50] 3233 ; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 3234 ; ZNVER1-NEXT: vmovups %ymm0, (%rsi) # sched: [1:0.50] 3235 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3236 %1 = load <8 x float>, <8 x float> *%a0, align 1 3237 %2 = fadd <8 x float> %1, %1 3238 store <8 x float> %2, <8 x float> *%a1, align 1 3239 ret <8 x float> %2 3240 } 3241 3242 define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 3243 ; GENERIC-LABEL: test_mulpd: 3244 ; GENERIC: # %bb.0: 3245 ; GENERIC-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3246 ; GENERIC-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3247 ; GENERIC-NEXT: retq # sched: [1:1.00] 3248 ; 3249 ; SANDY-LABEL: test_mulpd: 3250 ; SANDY: # %bb.0: 3251 ; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3252 ; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3253 ; SANDY-NEXT: retq # sched: [1:1.00] 3254 ; 3255 ; HASWELL-LABEL: test_mulpd: 3256 ; HASWELL: # %bb.0: 3257 ; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50] 3258 ; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:0.50] 3259 ; HASWELL-NEXT: retq # sched: [7:1.00] 3260 ; 3261 ; BROADWELL-LABEL: test_mulpd: 3262 ; BROADWELL: # %bb.0: 3263 ; BROADWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [3:0.50] 3264 ; BROADWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:0.50] 3265 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3266 ; 3267 ; SKYLAKE-LABEL: test_mulpd: 3268 ; SKYLAKE: # %bb.0: 3269 ; SKYLAKE-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3270 ; SKYLAKE-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3271 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3272 ; 3273 ; SKX-LABEL: test_mulpd: 3274 ; SKX: # %bb.0: 3275 ; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3276 ; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3277 ; SKX-NEXT: retq # sched: [7:1.00] 3278 ; 3279 ; BTVER2-LABEL: test_mulpd: 3280 ; BTVER2: # %bb.0: 3281 ; BTVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:4.00] 3282 ; BTVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:4.00] 3283 ; BTVER2-NEXT: retq # sched: [4:1.00] 3284 ; 3285 ; ZNVER1-LABEL: test_mulpd: 3286 ; ZNVER1: # %bb.0: 3287 ; ZNVER1-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3288 ; ZNVER1-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3289 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3290 %1 = fmul <4 x double> %a0, %a1 3291 %2 = load <4 x double>, <4 x double> *%a2, align 32 3292 %3 = fmul <4 x double> %1, %2 3293 ret <4 x double> %3 3294 } 3295 3296 define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 3297 ; GENERIC-LABEL: test_mulps: 3298 ; GENERIC: # %bb.0: 3299 ; GENERIC-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3300 ; GENERIC-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3301 ; GENERIC-NEXT: retq # sched: [1:1.00] 3302 ; 3303 ; SANDY-LABEL: test_mulps: 3304 ; SANDY: # %bb.0: 3305 ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3306 ; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3307 ; SANDY-NEXT: retq # sched: [1:1.00] 3308 ; 3309 ; HASWELL-LABEL: test_mulps: 3310 ; HASWELL: # %bb.0: 3311 ; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] 3312 ; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:0.50] 3313 ; HASWELL-NEXT: retq # sched: [7:1.00] 3314 ; 3315 ; BROADWELL-LABEL: test_mulps: 3316 ; BROADWELL: # %bb.0: 3317 ; BROADWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [3:0.50] 3318 ; BROADWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:0.50] 3319 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3320 ; 3321 ; SKYLAKE-LABEL: test_mulps: 3322 ; SKYLAKE: # %bb.0: 3323 ; SKYLAKE-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3324 ; SKYLAKE-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3325 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3326 ; 3327 ; SKX-LABEL: test_mulps: 3328 ; SKX: # %bb.0: 3329 ; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3330 ; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3331 ; SKX-NEXT: retq # sched: [7:1.00] 3332 ; 3333 ; BTVER2-LABEL: test_mulps: 3334 ; BTVER2: # %bb.0: 3335 ; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 3336 ; BTVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [7:2.00] 3337 ; BTVER2-NEXT: retq # sched: [4:1.00] 3338 ; 3339 ; ZNVER1-LABEL: test_mulps: 3340 ; ZNVER1: # %bb.0: 3341 ; ZNVER1-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3342 ; ZNVER1-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3343 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3344 %1 = fmul <8 x float> %a0, %a1 3345 %2 = load <8 x float>, <8 x float> *%a2, align 32 3346 %3 = fmul <8 x float> %1, %2 3347 ret <8 x float> %3 3348 } 3349 3350 define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 3351 ; GENERIC-LABEL: orpd: 3352 ; GENERIC: # %bb.0: 3353 ; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3354 ; GENERIC-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3355 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3356 ; GENERIC-NEXT: retq # sched: [1:1.00] 3357 ; 3358 ; SANDY-LABEL: orpd: 3359 ; SANDY: # %bb.0: 3360 ; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3361 ; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3362 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3363 ; SANDY-NEXT: retq # sched: [1:1.00] 3364 ; 3365 ; HASWELL-LABEL: orpd: 3366 ; HASWELL: # %bb.0: 3367 ; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3368 ; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3369 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3370 ; HASWELL-NEXT: retq # sched: [7:1.00] 3371 ; 3372 ; BROADWELL-LABEL: orpd: 3373 ; BROADWELL: # %bb.0: 3374 ; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3375 ; BROADWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 3376 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3377 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3378 ; 3379 ; SKYLAKE-LABEL: orpd: 3380 ; SKYLAKE: # %bb.0: 3381 ; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 3382 ; SKYLAKE-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3383 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 3384 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3385 ; 3386 ; SKX-LABEL: orpd: 3387 ; SKX: # %bb.0: 3388 ; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 3389 ; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3390 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 3391 ; SKX-NEXT: retq # sched: [7:1.00] 3392 ; 3393 ; BTVER2-LABEL: orpd: 3394 ; BTVER2: # %bb.0: 3395 ; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3396 ; BTVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] 3397 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 3398 ; BTVER2-NEXT: retq # sched: [4:1.00] 3399 ; 3400 ; ZNVER1-LABEL: orpd: 3401 ; ZNVER1: # %bb.0: 3402 ; ZNVER1-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3403 ; ZNVER1-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3404 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3405 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3406 %1 = bitcast <4 x double> %a0 to <4 x i64> 3407 %2 = bitcast <4 x double> %a1 to <4 x i64> 3408 %3 = or <4 x i64> %1, %2 3409 %4 = load <4 x double>, <4 x double> *%a2, align 32 3410 %5 = bitcast <4 x double> %4 to <4 x i64> 3411 %6 = or <4 x i64> %3, %5 3412 %7 = bitcast <4 x i64> %6 to <4 x double> 3413 %8 = fadd <4 x double> %a1, %7 3414 ret <4 x double> %8 3415 } 3416 3417 define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 3418 ; GENERIC-LABEL: test_orps: 3419 ; GENERIC: # %bb.0: 3420 ; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3421 ; GENERIC-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3422 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3423 ; GENERIC-NEXT: retq # sched: [1:1.00] 3424 ; 3425 ; SANDY-LABEL: test_orps: 3426 ; SANDY: # %bb.0: 3427 ; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3428 ; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3429 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3430 ; SANDY-NEXT: retq # sched: [1:1.00] 3431 ; 3432 ; HASWELL-LABEL: test_orps: 3433 ; HASWELL: # %bb.0: 3434 ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3435 ; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3436 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3437 ; HASWELL-NEXT: retq # sched: [7:1.00] 3438 ; 3439 ; BROADWELL-LABEL: test_orps: 3440 ; BROADWELL: # %bb.0: 3441 ; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3442 ; BROADWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 3443 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3444 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3445 ; 3446 ; SKYLAKE-LABEL: test_orps: 3447 ; SKYLAKE: # %bb.0: 3448 ; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 3449 ; SKYLAKE-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3450 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 3451 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3452 ; 3453 ; SKX-LABEL: test_orps: 3454 ; SKX: # %bb.0: 3455 ; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 3456 ; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3457 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 3458 ; SKX-NEXT: retq # sched: [7:1.00] 3459 ; 3460 ; BTVER2-LABEL: test_orps: 3461 ; BTVER2: # %bb.0: 3462 ; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3463 ; BTVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] 3464 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 3465 ; BTVER2-NEXT: retq # sched: [4:1.00] 3466 ; 3467 ; ZNVER1-LABEL: test_orps: 3468 ; ZNVER1: # %bb.0: 3469 ; ZNVER1-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3470 ; ZNVER1-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3471 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3472 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3473 %1 = bitcast <8 x float> %a0 to <4 x i64> 3474 %2 = bitcast <8 x float> %a1 to <4 x i64> 3475 %3 = or <4 x i64> %1, %2 3476 %4 = load <8 x float>, <8 x float> *%a2, align 32 3477 %5 = bitcast <8 x float> %4 to <4 x i64> 3478 %6 = or <4 x i64> %3, %5 3479 %7 = bitcast <4 x i64> %6 to <8 x float> 3480 %8 = fadd <8 x float> %a1, %7 3481 ret <8 x float> %8 3482 } 3483 3484 define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 3485 ; GENERIC-LABEL: test_perm2f128: 3486 ; GENERIC: # %bb.0: 3487 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 3488 ; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] 3489 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3490 ; GENERIC-NEXT: retq # sched: [1:1.00] 3491 ; 3492 ; SANDY-LABEL: test_perm2f128: 3493 ; SANDY: # %bb.0: 3494 ; SANDY-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 3495 ; SANDY-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] 3496 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3497 ; SANDY-NEXT: retq # sched: [1:1.00] 3498 ; 3499 ; HASWELL-LABEL: test_perm2f128: 3500 ; HASWELL: # %bb.0: 3501 ; HASWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 3502 ; HASWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 3503 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3504 ; HASWELL-NEXT: retq # sched: [7:1.00] 3505 ; 3506 ; BROADWELL-LABEL: test_perm2f128: 3507 ; BROADWELL: # %bb.0: 3508 ; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 3509 ; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00] 3510 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3511 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3512 ; 3513 ; SKYLAKE-LABEL: test_perm2f128: 3514 ; SKYLAKE: # %bb.0: 3515 ; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 3516 ; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 3517 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 3518 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3519 ; 3520 ; SKX-LABEL: test_perm2f128: 3521 ; SKX: # %bb.0: 3522 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 3523 ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 3524 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 3525 ; SKX-NEXT: retq # sched: [7:1.00] 3526 ; 3527 ; BTVER2-LABEL: test_perm2f128: 3528 ; BTVER2: # %bb.0: 3529 ; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:0.50] 3530 ; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00] 3531 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 3532 ; BTVER2-NEXT: retq # sched: [4:1.00] 3533 ; 3534 ; ZNVER1-LABEL: test_perm2f128: 3535 ; ZNVER1: # %bb.0: 3536 ; ZNVER1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [100:0.25] 3537 ; ZNVER1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [100:0.25] 3538 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 3539 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3540 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 3541 %2 = load <4 x double>, <4 x double> *%a2, align 32 3542 %3 = shufflevector <4 x double> %a0, <4 x double> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 3543 %4 = fadd <4 x double> %1, %3 3544 ret <4 x double> %4 3545 } 3546 3547 define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { 3548 ; GENERIC-LABEL: test_permilpd: 3549 ; GENERIC: # %bb.0: 3550 ; GENERIC-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] 3551 ; GENERIC-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] 3552 ; GENERIC-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3553 ; GENERIC-NEXT: retq # sched: [1:1.00] 3554 ; 3555 ; SANDY-LABEL: test_permilpd: 3556 ; SANDY: # %bb.0: 3557 ; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] 3558 ; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] 3559 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3560 ; SANDY-NEXT: retq # sched: [1:1.00] 3561 ; 3562 ; HASWELL-LABEL: test_permilpd: 3563 ; HASWELL: # %bb.0: 3564 ; HASWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] 3565 ; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] 3566 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3567 ; HASWELL-NEXT: retq # sched: [7:1.00] 3568 ; 3569 ; BROADWELL-LABEL: test_permilpd: 3570 ; BROADWELL: # %bb.0: 3571 ; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] 3572 ; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00] 3573 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3574 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3575 ; 3576 ; SKYLAKE-LABEL: test_permilpd: 3577 ; SKYLAKE: # %bb.0: 3578 ; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] 3579 ; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] 3580 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3581 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3582 ; 3583 ; SKX-LABEL: test_permilpd: 3584 ; SKX: # %bb.0: 3585 ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] 3586 ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] 3587 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3588 ; SKX-NEXT: retq # sched: [7:1.00] 3589 ; 3590 ; BTVER2-LABEL: test_permilpd: 3591 ; BTVER2: # %bb.0: 3592 ; BTVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00] 3593 ; BTVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50] 3594 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3595 ; BTVER2-NEXT: retq # sched: [4:1.00] 3596 ; 3597 ; ZNVER1-LABEL: test_permilpd: 3598 ; ZNVER1: # %bb.0: 3599 ; ZNVER1-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [8:0.50] 3600 ; ZNVER1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50] 3601 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3602 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3603 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0> 3604 %2 = load <2 x double>, <2 x double> *%a1, align 16 3605 %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> <i32 1, i32 0> 3606 %4 = fadd <2 x double> %1, %3 3607 ret <2 x double> %4 3608 } 3609 3610 define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { 3611 ; GENERIC-LABEL: test_permilpd_ymm: 3612 ; GENERIC: # %bb.0: 3613 ; GENERIC-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] 3614 ; GENERIC-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] 3615 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3616 ; GENERIC-NEXT: retq # sched: [1:1.00] 3617 ; 3618 ; SANDY-LABEL: test_permilpd_ymm: 3619 ; SANDY: # %bb.0: 3620 ; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] 3621 ; SANDY-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] 3622 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3623 ; SANDY-NEXT: retq # sched: [1:1.00] 3624 ; 3625 ; HASWELL-LABEL: test_permilpd_ymm: 3626 ; HASWELL: # %bb.0: 3627 ; HASWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] 3628 ; HASWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] 3629 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3630 ; HASWELL-NEXT: retq # sched: [7:1.00] 3631 ; 3632 ; BROADWELL-LABEL: test_permilpd_ymm: 3633 ; BROADWELL: # %bb.0: 3634 ; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] 3635 ; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [7:1.00] 3636 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3637 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3638 ; 3639 ; SKYLAKE-LABEL: test_permilpd_ymm: 3640 ; SKYLAKE: # %bb.0: 3641 ; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] 3642 ; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] 3643 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3644 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3645 ; 3646 ; SKX-LABEL: test_permilpd_ymm: 3647 ; SKX: # %bb.0: 3648 ; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] 3649 ; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] 3650 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3651 ; SKX-NEXT: retq # sched: [7:1.00] 3652 ; 3653 ; BTVER2-LABEL: test_permilpd_ymm: 3654 ; BTVER2: # %bb.0: 3655 ; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:2.00] 3656 ; BTVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] 3657 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3658 ; BTVER2-NEXT: retq # sched: [4:1.00] 3659 ; 3660 ; ZNVER1-LABEL: test_permilpd_ymm: 3661 ; ZNVER1: # %bb.0: 3662 ; ZNVER1-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:0.50] 3663 ; ZNVER1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:0.50] 3664 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3665 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3666 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3> 3667 %2 = load <4 x double>, <4 x double> *%a1, align 32 3668 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3> 3669 %4 = fadd <4 x double> %1, %3 3670 ret <4 x double> %4 3671 } 3672 3673 define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { 3674 ; GENERIC-LABEL: test_permilps: 3675 ; GENERIC: # %bb.0: 3676 ; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] 3677 ; GENERIC-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] 3678 ; GENERIC-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3679 ; GENERIC-NEXT: retq # sched: [1:1.00] 3680 ; 3681 ; SANDY-LABEL: test_permilps: 3682 ; SANDY: # %bb.0: 3683 ; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] 3684 ; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] 3685 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3686 ; SANDY-NEXT: retq # sched: [1:1.00] 3687 ; 3688 ; HASWELL-LABEL: test_permilps: 3689 ; HASWELL: # %bb.0: 3690 ; HASWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] 3691 ; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] 3692 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3693 ; HASWELL-NEXT: retq # sched: [7:1.00] 3694 ; 3695 ; BROADWELL-LABEL: test_permilps: 3696 ; BROADWELL: # %bb.0: 3697 ; BROADWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] 3698 ; BROADWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] 3699 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3700 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3701 ; 3702 ; SKYLAKE-LABEL: test_permilps: 3703 ; SKYLAKE: # %bb.0: 3704 ; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] 3705 ; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] 3706 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3707 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3708 ; 3709 ; SKX-LABEL: test_permilps: 3710 ; SKX: # %bb.0: 3711 ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] 3712 ; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] 3713 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3714 ; SKX-NEXT: retq # sched: [7:1.00] 3715 ; 3716 ; BTVER2-LABEL: test_permilps: 3717 ; BTVER2: # %bb.0: 3718 ; BTVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] 3719 ; BTVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50] 3720 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3721 ; BTVER2-NEXT: retq # sched: [4:1.00] 3722 ; 3723 ; ZNVER1-LABEL: test_permilps: 3724 ; ZNVER1: # %bb.0: 3725 ; ZNVER1-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50] 3726 ; ZNVER1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50] 3727 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3728 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3729 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 3730 %2 = load <4 x float>, <4 x float> *%a1, align 16 3731 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 3732 %4 = fadd <4 x float> %1, %3 3733 ret <4 x float> %4 3734 } 3735 3736 define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { 3737 ; GENERIC-LABEL: test_permilps_ymm: 3738 ; GENERIC: # %bb.0: 3739 ; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 3740 ; GENERIC-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] 3741 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3742 ; GENERIC-NEXT: retq # sched: [1:1.00] 3743 ; 3744 ; SANDY-LABEL: test_permilps_ymm: 3745 ; SANDY: # %bb.0: 3746 ; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 3747 ; SANDY-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] 3748 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3749 ; SANDY-NEXT: retq # sched: [1:1.00] 3750 ; 3751 ; HASWELL-LABEL: test_permilps_ymm: 3752 ; HASWELL: # %bb.0: 3753 ; HASWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 3754 ; HASWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] 3755 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3756 ; HASWELL-NEXT: retq # sched: [7:1.00] 3757 ; 3758 ; BROADWELL-LABEL: test_permilps_ymm: 3759 ; BROADWELL: # %bb.0: 3760 ; BROADWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 3761 ; BROADWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [7:1.00] 3762 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3763 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3764 ; 3765 ; SKYLAKE-LABEL: test_permilps_ymm: 3766 ; SKYLAKE: # %bb.0: 3767 ; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 3768 ; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] 3769 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3770 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3771 ; 3772 ; SKX-LABEL: test_permilps_ymm: 3773 ; SKX: # %bb.0: 3774 ; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 3775 ; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] 3776 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3777 ; SKX-NEXT: retq # sched: [7:1.00] 3778 ; 3779 ; BTVER2-LABEL: test_permilps_ymm: 3780 ; BTVER2: # %bb.0: 3781 ; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:2.00] 3782 ; BTVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 3783 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3784 ; BTVER2-NEXT: retq # sched: [4:1.00] 3785 ; 3786 ; ZNVER1-LABEL: test_permilps_ymm: 3787 ; ZNVER1: # %bb.0: 3788 ; ZNVER1-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:0.50] 3789 ; ZNVER1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.50] 3790 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 3791 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3792 %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 3793 %2 = load <8 x float>, <8 x float> *%a1, align 32 3794 %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 3795 %4 = fadd <8 x float> %1, %3 3796 ret <8 x float> %4 3797 } 3798 3799 define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 3800 ; GENERIC-LABEL: test_permilvarpd: 3801 ; GENERIC: # %bb.0: 3802 ; GENERIC-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3803 ; GENERIC-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3804 ; GENERIC-NEXT: retq # sched: [1:1.00] 3805 ; 3806 ; SANDY-LABEL: test_permilvarpd: 3807 ; SANDY: # %bb.0: 3808 ; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3809 ; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3810 ; SANDY-NEXT: retq # sched: [1:1.00] 3811 ; 3812 ; HASWELL-LABEL: test_permilvarpd: 3813 ; HASWELL: # %bb.0: 3814 ; HASWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3815 ; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3816 ; HASWELL-NEXT: retq # sched: [7:1.00] 3817 ; 3818 ; BROADWELL-LABEL: test_permilvarpd: 3819 ; BROADWELL: # %bb.0: 3820 ; BROADWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3821 ; BROADWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 3822 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3823 ; 3824 ; SKYLAKE-LABEL: test_permilvarpd: 3825 ; SKYLAKE: # %bb.0: 3826 ; SKYLAKE-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3827 ; SKYLAKE-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3828 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3829 ; 3830 ; SKX-LABEL: test_permilvarpd: 3831 ; SKX: # %bb.0: 3832 ; SKX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3833 ; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3834 ; SKX-NEXT: retq # sched: [7:1.00] 3835 ; 3836 ; BTVER2-LABEL: test_permilvarpd: 3837 ; BTVER2: # %bb.0: 3838 ; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 3839 ; BTVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:2.00] 3840 ; BTVER2-NEXT: retq # sched: [4:1.00] 3841 ; 3842 ; ZNVER1-LABEL: test_permilvarpd: 3843 ; ZNVER1: # %bb.0: 3844 ; ZNVER1-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3845 ; ZNVER1-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 3846 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3847 %1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) 3848 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 3849 %3 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> %2) 3850 ret <2 x double> %3 3851 } 3852 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone 3853 3854 define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 3855 ; GENERIC-LABEL: test_permilvarpd_ymm: 3856 ; GENERIC: # %bb.0: 3857 ; GENERIC-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3858 ; GENERIC-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3859 ; GENERIC-NEXT: retq # sched: [1:1.00] 3860 ; 3861 ; SANDY-LABEL: test_permilvarpd_ymm: 3862 ; SANDY: # %bb.0: 3863 ; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3864 ; SANDY-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3865 ; SANDY-NEXT: retq # sched: [1:1.00] 3866 ; 3867 ; HASWELL-LABEL: test_permilvarpd_ymm: 3868 ; HASWELL: # %bb.0: 3869 ; HASWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3870 ; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3871 ; HASWELL-NEXT: retq # sched: [7:1.00] 3872 ; 3873 ; BROADWELL-LABEL: test_permilvarpd_ymm: 3874 ; BROADWELL: # %bb.0: 3875 ; BROADWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3876 ; BROADWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 3877 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3878 ; 3879 ; SKYLAKE-LABEL: test_permilvarpd_ymm: 3880 ; SKYLAKE: # %bb.0: 3881 ; SKYLAKE-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3882 ; SKYLAKE-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3883 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3884 ; 3885 ; SKX-LABEL: test_permilvarpd_ymm: 3886 ; SKX: # %bb.0: 3887 ; SKX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3888 ; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3889 ; SKX-NEXT: retq # sched: [7:1.00] 3890 ; 3891 ; BTVER2-LABEL: test_permilvarpd_ymm: 3892 ; BTVER2: # %bb.0: 3893 ; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00] 3894 ; BTVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:3.00] 3895 ; BTVER2-NEXT: retq # sched: [4:1.00] 3896 ; 3897 ; ZNVER1-LABEL: test_permilvarpd_ymm: 3898 ; ZNVER1: # %bb.0: 3899 ; ZNVER1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3900 ; ZNVER1-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3901 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3902 %1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) 3903 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 3904 %3 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %1, <4 x i64> %2) 3905 ret <4 x double> %3 3906 } 3907 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone 3908 3909 define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 3910 ; GENERIC-LABEL: test_permilvarps: 3911 ; GENERIC: # %bb.0: 3912 ; GENERIC-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3913 ; GENERIC-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3914 ; GENERIC-NEXT: retq # sched: [1:1.00] 3915 ; 3916 ; SANDY-LABEL: test_permilvarps: 3917 ; SANDY: # %bb.0: 3918 ; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3919 ; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3920 ; SANDY-NEXT: retq # sched: [1:1.00] 3921 ; 3922 ; HASWELL-LABEL: test_permilvarps: 3923 ; HASWELL: # %bb.0: 3924 ; HASWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3925 ; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3926 ; HASWELL-NEXT: retq # sched: [7:1.00] 3927 ; 3928 ; BROADWELL-LABEL: test_permilvarps: 3929 ; BROADWELL: # %bb.0: 3930 ; BROADWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3931 ; BROADWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 3932 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3933 ; 3934 ; SKYLAKE-LABEL: test_permilvarps: 3935 ; SKYLAKE: # %bb.0: 3936 ; SKYLAKE-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3937 ; SKYLAKE-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3938 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3939 ; 3940 ; SKX-LABEL: test_permilvarps: 3941 ; SKX: # %bb.0: 3942 ; SKX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3943 ; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3944 ; SKX-NEXT: retq # sched: [7:1.00] 3945 ; 3946 ; BTVER2-LABEL: test_permilvarps: 3947 ; BTVER2: # %bb.0: 3948 ; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 3949 ; BTVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:2.00] 3950 ; BTVER2-NEXT: retq # sched: [4:1.00] 3951 ; 3952 ; ZNVER1-LABEL: test_permilvarps: 3953 ; ZNVER1: # %bb.0: 3954 ; ZNVER1-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3955 ; ZNVER1-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 3956 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3957 %1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) 3958 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 3959 %3 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> %2) 3960 ret <4 x float> %3 3961 } 3962 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone 3963 3964 define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3965 ; GENERIC-LABEL: test_permilvarps_ymm: 3966 ; GENERIC: # %bb.0: 3967 ; GENERIC-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3968 ; GENERIC-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3969 ; GENERIC-NEXT: retq # sched: [1:1.00] 3970 ; 3971 ; SANDY-LABEL: test_permilvarps_ymm: 3972 ; SANDY: # %bb.0: 3973 ; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3974 ; SANDY-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3975 ; SANDY-NEXT: retq # sched: [1:1.00] 3976 ; 3977 ; HASWELL-LABEL: test_permilvarps_ymm: 3978 ; HASWELL: # %bb.0: 3979 ; HASWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3980 ; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3981 ; HASWELL-NEXT: retq # sched: [7:1.00] 3982 ; 3983 ; BROADWELL-LABEL: test_permilvarps_ymm: 3984 ; BROADWELL: # %bb.0: 3985 ; BROADWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3986 ; BROADWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 3987 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3988 ; 3989 ; SKYLAKE-LABEL: test_permilvarps_ymm: 3990 ; SKYLAKE: # %bb.0: 3991 ; SKYLAKE-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3992 ; SKYLAKE-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3993 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3994 ; 3995 ; SKX-LABEL: test_permilvarps_ymm: 3996 ; SKX: # %bb.0: 3997 ; SKX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 3998 ; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 3999 ; SKX-NEXT: retq # sched: [7:1.00] 4000 ; 4001 ; BTVER2-LABEL: test_permilvarps_ymm: 4002 ; BTVER2: # %bb.0: 4003 ; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00] 4004 ; BTVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:3.00] 4005 ; BTVER2-NEXT: retq # sched: [4:1.00] 4006 ; 4007 ; ZNVER1-LABEL: test_permilvarps_ymm: 4008 ; ZNVER1: # %bb.0: 4009 ; ZNVER1-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4010 ; ZNVER1-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4011 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4012 %1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) 4013 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 4014 %3 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> %2) 4015 ret <8 x float> %3 4016 } 4017 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone 4018 4019 define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { 4020 ; GENERIC-LABEL: test_rcpps: 4021 ; GENERIC: # %bb.0: 4022 ; GENERIC-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00] 4023 ; GENERIC-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] 4024 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4025 ; GENERIC-NEXT: retq # sched: [1:1.00] 4026 ; 4027 ; SANDY-LABEL: test_rcpps: 4028 ; SANDY: # %bb.0: 4029 ; SANDY-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00] 4030 ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] 4031 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4032 ; SANDY-NEXT: retq # sched: [1:1.00] 4033 ; 4034 ; HASWELL-LABEL: test_rcpps: 4035 ; HASWELL: # %bb.0: 4036 ; HASWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [18:2.00] 4037 ; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] 4038 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4039 ; HASWELL-NEXT: retq # sched: [7:1.00] 4040 ; 4041 ; BROADWELL-LABEL: test_rcpps: 4042 ; BROADWELL: # %bb.0: 4043 ; BROADWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] 4044 ; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [17:2.00] 4045 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4046 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4047 ; 4048 ; SKYLAKE-LABEL: test_rcpps: 4049 ; SKYLAKE: # %bb.0: 4050 ; SKYLAKE-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] 4051 ; SKYLAKE-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00] 4052 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4053 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4054 ; 4055 ; SKX-LABEL: test_rcpps: 4056 ; SKX: # %bb.0: 4057 ; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] 4058 ; SKX-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00] 4059 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4060 ; SKX-NEXT: retq # sched: [7:1.00] 4061 ; 4062 ; BTVER2-LABEL: test_rcpps: 4063 ; BTVER2: # %bb.0: 4064 ; BTVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [7:2.00] 4065 ; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00] 4066 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4067 ; BTVER2-NEXT: retq # sched: [4:1.00] 4068 ; 4069 ; ZNVER1-LABEL: test_rcpps: 4070 ; ZNVER1: # %bb.0: 4071 ; ZNVER1-NEXT: vrcpps (%rdi), %ymm1 # sched: [12:0.50] 4072 ; ZNVER1-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:0.50] 4073 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4074 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4075 %1 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) 4076 %2 = load <8 x float>, <8 x float> *%a1, align 32 4077 %3 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %2) 4078 %4 = fadd <8 x float> %1, %3 4079 ret <8 x float> %4 4080 } 4081 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone 4082 4083 define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { 4084 ; GENERIC-LABEL: test_roundpd: 4085 ; GENERIC: # %bb.0: 4086 ; GENERIC-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] 4087 ; GENERIC-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00] 4088 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4089 ; GENERIC-NEXT: retq # sched: [1:1.00] 4090 ; 4091 ; SANDY-LABEL: test_roundpd: 4092 ; SANDY: # %bb.0: 4093 ; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] 4094 ; SANDY-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00] 4095 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4096 ; SANDY-NEXT: retq # sched: [1:1.00] 4097 ; 4098 ; HASWELL-LABEL: test_roundpd: 4099 ; HASWELL: # %bb.0: 4100 ; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50] 4101 ; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [13:2.00] 4102 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4103 ; HASWELL-NEXT: retq # sched: [7:1.00] 4104 ; 4105 ; BROADWELL-LABEL: test_roundpd: 4106 ; BROADWELL: # %bb.0: 4107 ; BROADWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [12:2.00] 4108 ; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50] 4109 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4110 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4111 ; 4112 ; SKYLAKE-LABEL: test_roundpd: 4113 ; SKYLAKE: # %bb.0: 4114 ; SKYLAKE-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00] 4115 ; SKYLAKE-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00] 4116 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4117 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4118 ; 4119 ; SKX-LABEL: test_roundpd: 4120 ; SKX: # %bb.0: 4121 ; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00] 4122 ; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00] 4123 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4124 ; SKX-NEXT: retq # sched: [7:1.00] 4125 ; 4126 ; BTVER2-LABEL: test_roundpd: 4127 ; BTVER2: # %bb.0: 4128 ; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00] 4129 ; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:2.00] 4130 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4131 ; BTVER2-NEXT: retq # sched: [4:1.00] 4132 ; 4133 ; ZNVER1-LABEL: test_roundpd: 4134 ; ZNVER1: # %bb.0: 4135 ; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [11:1.00] 4136 ; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:1.00] 4137 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4138 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4139 %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) 4140 %2 = load <4 x double>, <4 x double> *%a1, align 32 4141 %3 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %2, i32 7) 4142 %4 = fadd <4 x double> %1, %3 4143 ret <4 x double> %4 4144 } 4145 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone 4146 4147 define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { 4148 ; GENERIC-LABEL: test_roundps: 4149 ; GENERIC: # %bb.0: 4150 ; GENERIC-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] 4151 ; GENERIC-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00] 4152 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4153 ; GENERIC-NEXT: retq # sched: [1:1.00] 4154 ; 4155 ; SANDY-LABEL: test_roundps: 4156 ; SANDY: # %bb.0: 4157 ; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] 4158 ; SANDY-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00] 4159 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4160 ; SANDY-NEXT: retq # sched: [1:1.00] 4161 ; 4162 ; HASWELL-LABEL: test_roundps: 4163 ; HASWELL: # %bb.0: 4164 ; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50] 4165 ; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [13:2.00] 4166 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4167 ; HASWELL-NEXT: retq # sched: [7:1.00] 4168 ; 4169 ; BROADWELL-LABEL: test_roundps: 4170 ; BROADWELL: # %bb.0: 4171 ; BROADWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [12:2.00] 4172 ; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50] 4173 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4174 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4175 ; 4176 ; SKYLAKE-LABEL: test_roundps: 4177 ; SKYLAKE: # %bb.0: 4178 ; SKYLAKE-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00] 4179 ; SKYLAKE-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00] 4180 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4181 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4182 ; 4183 ; SKX-LABEL: test_roundps: 4184 ; SKX: # %bb.0: 4185 ; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00] 4186 ; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00] 4187 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4188 ; SKX-NEXT: retq # sched: [7:1.00] 4189 ; 4190 ; BTVER2-LABEL: test_roundps: 4191 ; BTVER2: # %bb.0: 4192 ; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:2.00] 4193 ; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:2.00] 4194 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4195 ; BTVER2-NEXT: retq # sched: [4:1.00] 4196 ; 4197 ; ZNVER1-LABEL: test_roundps: 4198 ; ZNVER1: # %bb.0: 4199 ; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [11:1.00] 4200 ; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:1.00] 4201 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4202 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4203 %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) 4204 %2 = load <8 x float>, <8 x float> *%a1, align 32 4205 %3 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %2, i32 7) 4206 %4 = fadd <8 x float> %1, %3 4207 ret <8 x float> %4 4208 } 4209 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone 4210 4211 define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { 4212 ; GENERIC-LABEL: test_rsqrtps: 4213 ; GENERIC: # %bb.0: 4214 ; GENERIC-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00] 4215 ; GENERIC-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00] 4216 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4217 ; GENERIC-NEXT: retq # sched: [1:1.00] 4218 ; 4219 ; SANDY-LABEL: test_rsqrtps: 4220 ; SANDY: # %bb.0: 4221 ; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00] 4222 ; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00] 4223 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4224 ; SANDY-NEXT: retq # sched: [1:1.00] 4225 ; 4226 ; HASWELL-LABEL: test_rsqrtps: 4227 ; HASWELL: # %bb.0: 4228 ; HASWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [18:2.00] 4229 ; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] 4230 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4231 ; HASWELL-NEXT: retq # sched: [7:1.00] 4232 ; 4233 ; BROADWELL-LABEL: test_rsqrtps: 4234 ; BROADWELL: # %bb.0: 4235 ; BROADWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] 4236 ; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [17:2.00] 4237 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4238 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4239 ; 4240 ; SKYLAKE-LABEL: test_rsqrtps: 4241 ; SKYLAKE: # %bb.0: 4242 ; SKYLAKE-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00] 4243 ; SKYLAKE-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00] 4244 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4245 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4246 ; 4247 ; SKX-LABEL: test_rsqrtps: 4248 ; SKX: # %bb.0: 4249 ; SKX-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00] 4250 ; SKX-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00] 4251 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4252 ; SKX-NEXT: retq # sched: [7:1.00] 4253 ; 4254 ; BTVER2-LABEL: test_rsqrtps: 4255 ; BTVER2: # %bb.0: 4256 ; BTVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [7:2.00] 4257 ; BTVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [2:2.00] 4258 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4259 ; BTVER2-NEXT: retq # sched: [4:1.00] 4260 ; 4261 ; ZNVER1-LABEL: test_rsqrtps: 4262 ; ZNVER1: # %bb.0: 4263 ; ZNVER1-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [12:0.50] 4264 ; ZNVER1-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:1.00] 4265 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4266 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4267 %1 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) 4268 %2 = load <8 x float>, <8 x float> *%a1, align 32 4269 %3 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %2) 4270 %4 = fadd <8 x float> %1, %3 4271 ret <8 x float> %4 4272 } 4273 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone 4274 4275 define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 4276 ; GENERIC-LABEL: test_shufpd: 4277 ; GENERIC: # %bb.0: 4278 ; GENERIC-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] 4279 ; GENERIC-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] 4280 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4281 ; GENERIC-NEXT: retq # sched: [1:1.00] 4282 ; 4283 ; SANDY-LABEL: test_shufpd: 4284 ; SANDY: # %bb.0: 4285 ; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] 4286 ; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] 4287 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4288 ; SANDY-NEXT: retq # sched: [1:1.00] 4289 ; 4290 ; HASWELL-LABEL: test_shufpd: 4291 ; HASWELL: # %bb.0: 4292 ; HASWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] 4293 ; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] 4294 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4295 ; HASWELL-NEXT: retq # sched: [7:1.00] 4296 ; 4297 ; BROADWELL-LABEL: test_shufpd: 4298 ; BROADWELL: # %bb.0: 4299 ; BROADWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] 4300 ; BROADWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [7:1.00] 4301 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4302 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4303 ; 4304 ; SKYLAKE-LABEL: test_shufpd: 4305 ; SKYLAKE: # %bb.0: 4306 ; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] 4307 ; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] 4308 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4309 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4310 ; 4311 ; SKX-LABEL: test_shufpd: 4312 ; SKX: # %bb.0: 4313 ; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] 4314 ; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] 4315 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4316 ; SKX-NEXT: retq # sched: [7:1.00] 4317 ; 4318 ; BTVER2-LABEL: test_shufpd: 4319 ; BTVER2: # %bb.0: 4320 ; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] 4321 ; BTVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [6:2.00] 4322 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4323 ; BTVER2-NEXT: retq # sched: [4:1.00] 4324 ; 4325 ; ZNVER1-LABEL: test_shufpd: 4326 ; ZNVER1: # %bb.0: 4327 ; ZNVER1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:0.50] 4328 ; ZNVER1-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:0.50] 4329 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4330 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4331 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 2, i32 7> 4332 %2 = load <4 x double>, <4 x double> *%a2, align 32 4333 %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 1, i32 4, i32 2, i32 7> 4334 %4 = fadd <4 x double> %1, %3 4335 ret <4 x double> %4 4336 } 4337 4338 define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind { 4339 ; GENERIC-LABEL: test_shufps: 4340 ; GENERIC: # %bb.0: 4341 ; GENERIC-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] 4342 ; GENERIC-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] 4343 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4344 ; GENERIC-NEXT: retq # sched: [1:1.00] 4345 ; 4346 ; SANDY-LABEL: test_shufps: 4347 ; SANDY: # %bb.0: 4348 ; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] 4349 ; SANDY-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] 4350 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4351 ; SANDY-NEXT: retq # sched: [1:1.00] 4352 ; 4353 ; HASWELL-LABEL: test_shufps: 4354 ; HASWELL: # %bb.0: 4355 ; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] 4356 ; HASWELL-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] 4357 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4358 ; HASWELL-NEXT: retq # sched: [7:1.00] 4359 ; 4360 ; BROADWELL-LABEL: test_shufps: 4361 ; BROADWELL: # %bb.0: 4362 ; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] 4363 ; BROADWELL-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [7:1.00] 4364 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4365 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4366 ; 4367 ; SKYLAKE-LABEL: test_shufps: 4368 ; SKYLAKE: # %bb.0: 4369 ; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] 4370 ; SKYLAKE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] 4371 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4372 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4373 ; 4374 ; SKX-LABEL: test_shufps: 4375 ; SKX: # %bb.0: 4376 ; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] 4377 ; SKX-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] 4378 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4379 ; SKX-NEXT: retq # sched: [7:1.00] 4380 ; 4381 ; BTVER2-LABEL: test_shufps: 4382 ; BTVER2: # %bb.0: 4383 ; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] 4384 ; BTVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [6:2.00] 4385 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4386 ; BTVER2-NEXT: retq # sched: [4:1.00] 4387 ; 4388 ; ZNVER1-LABEL: test_shufps: 4389 ; ZNVER1: # %bb.0: 4390 ; ZNVER1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:0.50] 4391 ; ZNVER1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:0.50] 4392 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4393 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4394 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12> 4395 %2 = load <8 x float>, <8 x float> *%a2, align 32 4396 %3 = shufflevector <8 x float> %a1, <8 x float> %2, <8 x i32> <i32 0, i32 3, i32 8, i32 8, i32 4, i32 7, i32 12, i32 12> 4397 %4 = fadd <8 x float> %1, %3 4398 ret <8 x float> %4 4399 } 4400 4401 define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { 4402 ; GENERIC-LABEL: test_sqrtpd: 4403 ; GENERIC: # %bb.0: 4404 ; GENERIC-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:44.00] 4405 ; GENERIC-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:44.00] 4406 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4407 ; GENERIC-NEXT: retq # sched: [1:1.00] 4408 ; 4409 ; SANDY-LABEL: test_sqrtpd: 4410 ; SANDY: # %bb.0: 4411 ; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:44.00] 4412 ; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:44.00] 4413 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4414 ; SANDY-NEXT: retq # sched: [1:1.00] 4415 ; 4416 ; HASWELL-LABEL: test_sqrtpd: 4417 ; HASWELL: # %bb.0: 4418 ; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [42:28.00] 4419 ; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:28.00] 4420 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4421 ; HASWELL-NEXT: retq # sched: [7:1.00] 4422 ; 4423 ; BROADWELL-LABEL: test_sqrtpd: 4424 ; BROADWELL: # %bb.0: 4425 ; BROADWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [29:28.00] 4426 ; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:28.00] 4427 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4428 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4429 ; 4430 ; SKYLAKE-LABEL: test_sqrtpd: 4431 ; SKYLAKE: # %bb.0: 4432 ; SKYLAKE-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00] 4433 ; SKYLAKE-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00] 4434 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4435 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4436 ; 4437 ; SKX-LABEL: test_sqrtpd: 4438 ; SKX: # %bb.0: 4439 ; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00] 4440 ; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00] 4441 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4442 ; SKX-NEXT: retq # sched: [7:1.00] 4443 ; 4444 ; BTVER2-LABEL: test_sqrtpd: 4445 ; BTVER2: # %bb.0: 4446 ; BTVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [59:54.00] 4447 ; BTVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [54:54.00] 4448 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4449 ; BTVER2-NEXT: retq # sched: [4:1.00] 4450 ; 4451 ; ZNVER1-LABEL: test_sqrtpd: 4452 ; ZNVER1: # %bb.0: 4453 ; ZNVER1-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [47:40.00] 4454 ; ZNVER1-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [40:40.00] 4455 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4456 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4457 %1 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) 4458 %2 = load <4 x double>, <4 x double> *%a1, align 32 4459 %3 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %2) 4460 %4 = fadd <4 x double> %1, %3 4461 ret <4 x double> %4 4462 } 4463 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone 4464 4465 define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { 4466 ; GENERIC-LABEL: test_sqrtps: 4467 ; GENERIC: # %bb.0: 4468 ; GENERIC-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:28.00] 4469 ; GENERIC-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:28.00] 4470 ; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4471 ; GENERIC-NEXT: retq # sched: [1:1.00] 4472 ; 4473 ; SANDY-LABEL: test_sqrtps: 4474 ; SANDY: # %bb.0: 4475 ; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:28.00] 4476 ; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:28.00] 4477 ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4478 ; SANDY-NEXT: retq # sched: [1:1.00] 4479 ; 4480 ; HASWELL-LABEL: test_sqrtps: 4481 ; HASWELL: # %bb.0: 4482 ; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [28:14.00] 4483 ; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:14.00] 4484 ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4485 ; HASWELL-NEXT: retq # sched: [7:1.00] 4486 ; 4487 ; BROADWELL-LABEL: test_sqrtps: 4488 ; BROADWELL: # %bb.0: 4489 ; BROADWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:14.00] 4490 ; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [27:14.00] 4491 ; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4492 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4493 ; 4494 ; SKYLAKE-LABEL: test_sqrtps: 4495 ; SKYLAKE: # %bb.0: 4496 ; SKYLAKE-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00] 4497 ; SKYLAKE-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00] 4498 ; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4499 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4500 ; 4501 ; SKX-LABEL: test_sqrtps: 4502 ; SKX: # %bb.0: 4503 ; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00] 4504 ; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00] 4505 ; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4506 ; SKX-NEXT: retq # sched: [7:1.00] 4507 ; 4508 ; BTVER2-LABEL: test_sqrtps: 4509 ; BTVER2: # %bb.0: 4510 ; BTVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [47:42.00] 4511 ; BTVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [42:42.00] 4512 ; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4513 ; BTVER2-NEXT: retq # sched: [4:1.00] 4514 ; 4515 ; ZNVER1-LABEL: test_sqrtps: 4516 ; ZNVER1: # %bb.0: 4517 ; ZNVER1-NEXT: vsqrtps (%rdi), %ymm1 # sched: [35:28.00] 4518 ; ZNVER1-NEXT: vsqrtps %ymm0, %ymm0 # sched: [28:28.00] 4519 ; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4520 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4521 %1 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) 4522 %2 = load <8 x float>, <8 x float> *%a1, align 32 4523 %3 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %2) 4524 %4 = fadd <8 x float> %1, %3 4525 ret <8 x float> %4 4526 } 4527 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone 4528 4529 define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 4530 ; GENERIC-LABEL: test_subpd: 4531 ; GENERIC: # %bb.0: 4532 ; GENERIC-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4533 ; GENERIC-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 4534 ; GENERIC-NEXT: retq # sched: [1:1.00] 4535 ; 4536 ; SANDY-LABEL: test_subpd: 4537 ; SANDY: # %bb.0: 4538 ; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4539 ; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 4540 ; SANDY-NEXT: retq # sched: [1:1.00] 4541 ; 4542 ; HASWELL-LABEL: test_subpd: 4543 ; HASWELL: # %bb.0: 4544 ; HASWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4545 ; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 4546 ; HASWELL-NEXT: retq # sched: [7:1.00] 4547 ; 4548 ; BROADWELL-LABEL: test_subpd: 4549 ; BROADWELL: # %bb.0: 4550 ; BROADWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4551 ; BROADWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 4552 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4553 ; 4554 ; SKYLAKE-LABEL: test_subpd: 4555 ; SKYLAKE: # %bb.0: 4556 ; SKYLAKE-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4557 ; SKYLAKE-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4558 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4559 ; 4560 ; SKX-LABEL: test_subpd: 4561 ; SKX: # %bb.0: 4562 ; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4563 ; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4564 ; SKX-NEXT: retq # sched: [7:1.00] 4565 ; 4566 ; BTVER2-LABEL: test_subpd: 4567 ; BTVER2: # %bb.0: 4568 ; BTVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4569 ; BTVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 4570 ; BTVER2-NEXT: retq # sched: [4:1.00] 4571 ; 4572 ; ZNVER1-LABEL: test_subpd: 4573 ; ZNVER1: # %bb.0: 4574 ; ZNVER1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4575 ; ZNVER1-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 4576 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4577 %1 = fsub <4 x double> %a0, %a1 4578 %2 = load <4 x double>, <4 x double> *%a2, align 32 4579 %3 = fsub <4 x double> %1, %2 4580 ret <4 x double> %3 4581 } 4582 4583 define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 4584 ; GENERIC-LABEL: test_subps: 4585 ; GENERIC: # %bb.0: 4586 ; GENERIC-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4587 ; GENERIC-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 4588 ; GENERIC-NEXT: retq # sched: [1:1.00] 4589 ; 4590 ; SANDY-LABEL: test_subps: 4591 ; SANDY: # %bb.0: 4592 ; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4593 ; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 4594 ; SANDY-NEXT: retq # sched: [1:1.00] 4595 ; 4596 ; HASWELL-LABEL: test_subps: 4597 ; HASWELL: # %bb.0: 4598 ; HASWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4599 ; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 4600 ; HASWELL-NEXT: retq # sched: [7:1.00] 4601 ; 4602 ; BROADWELL-LABEL: test_subps: 4603 ; BROADWELL: # %bb.0: 4604 ; BROADWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4605 ; BROADWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 4606 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4607 ; 4608 ; SKYLAKE-LABEL: test_subps: 4609 ; SKYLAKE: # %bb.0: 4610 ; SKYLAKE-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4611 ; SKYLAKE-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4612 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4613 ; 4614 ; SKX-LABEL: test_subps: 4615 ; SKX: # %bb.0: 4616 ; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4617 ; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4618 ; SKX-NEXT: retq # sched: [7:1.00] 4619 ; 4620 ; BTVER2-LABEL: test_subps: 4621 ; BTVER2: # %bb.0: 4622 ; BTVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 4623 ; BTVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 4624 ; BTVER2-NEXT: retq # sched: [4:1.00] 4625 ; 4626 ; ZNVER1-LABEL: test_subps: 4627 ; ZNVER1: # %bb.0: 4628 ; ZNVER1-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4629 ; ZNVER1-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 4630 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4631 %1 = fsub <8 x float> %a0, %a1 4632 %2 = load <8 x float>, <8 x float> *%a2, align 32 4633 %3 = fsub <8 x float> %1, %2 4634 ret <8 x float> %3 4635 } 4636 4637 define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 4638 ; GENERIC-LABEL: test_testpd: 4639 ; GENERIC: # %bb.0: 4640 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] 4641 ; GENERIC-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] 4642 ; GENERIC-NEXT: setb %al # sched: [1:0.50] 4643 ; GENERIC-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] 4644 ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] 4645 ; GENERIC-NEXT: retq # sched: [1:1.00] 4646 ; 4647 ; SANDY-LABEL: test_testpd: 4648 ; SANDY: # %bb.0: 4649 ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] 4650 ; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] 4651 ; SANDY-NEXT: setb %al # sched: [1:0.50] 4652 ; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] 4653 ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] 4654 ; SANDY-NEXT: retq # sched: [1:1.00] 4655 ; 4656 ; HASWELL-LABEL: test_testpd: 4657 ; HASWELL: # %bb.0: 4658 ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] 4659 ; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] 4660 ; HASWELL-NEXT: setb %al # sched: [1:0.50] 4661 ; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] 4662 ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] 4663 ; HASWELL-NEXT: retq # sched: [7:1.00] 4664 ; 4665 ; BROADWELL-LABEL: test_testpd: 4666 ; BROADWELL: # %bb.0: 4667 ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] 4668 ; BROADWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] 4669 ; BROADWELL-NEXT: setb %al # sched: [1:0.50] 4670 ; BROADWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00] 4671 ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] 4672 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4673 ; 4674 ; SKYLAKE-LABEL: test_testpd: 4675 ; SKYLAKE: # %bb.0: 4676 ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] 4677 ; SKYLAKE-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00] 4678 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50] 4679 ; SKYLAKE-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] 4680 ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] 4681 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4682 ; 4683 ; SKX-LABEL: test_testpd: 4684 ; SKX: # %bb.0: 4685 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] 4686 ; SKX-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00] 4687 ; SKX-NEXT: setb %al # sched: [1:0.50] 4688 ; SKX-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] 4689 ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] 4690 ; SKX-NEXT: retq # sched: [7:1.00] 4691 ; 4692 ; BTVER2-LABEL: test_testpd: 4693 ; BTVER2: # %bb.0: 4694 ; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50] 4695 ; BTVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [3:1.00] 4696 ; BTVER2-NEXT: setb %al # sched: [1:0.50] 4697 ; BTVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00] 4698 ; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00] 4699 ; BTVER2-NEXT: retq # sched: [4:1.00] 4700 ; 4701 ; ZNVER1-LABEL: test_testpd: 4702 ; ZNVER1: # %bb.0: 4703 ; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25] 4704 ; ZNVER1-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.25] 4705 ; ZNVER1-NEXT: setb %al # sched: [1:0.25] 4706 ; ZNVER1-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:0.50] 4707 ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25] 4708 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4709 %1 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) 4710 %2 = load <2 x double>, <2 x double> *%a2, align 16 4711 %3 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %2) 4712 %4 = add i32 %1, %3 4713 ret i32 %4 4714 } 4715 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone 4716 4717 define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 4718 ; GENERIC-LABEL: test_testpd_ymm: 4719 ; GENERIC: # %bb.0: 4720 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] 4721 ; GENERIC-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] 4722 ; GENERIC-NEXT: setb %al # sched: [1:0.50] 4723 ; GENERIC-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] 4724 ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] 4725 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4726 ; GENERIC-NEXT: retq # sched: [1:1.00] 4727 ; 4728 ; SANDY-LABEL: test_testpd_ymm: 4729 ; SANDY: # %bb.0: 4730 ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] 4731 ; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] 4732 ; SANDY-NEXT: setb %al # sched: [1:0.50] 4733 ; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] 4734 ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] 4735 ; SANDY-NEXT: vzeroupper # sched: [100:0.33] 4736 ; SANDY-NEXT: retq # sched: [1:1.00] 4737 ; 4738 ; HASWELL-LABEL: test_testpd_ymm: 4739 ; HASWELL: # %bb.0: 4740 ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] 4741 ; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] 4742 ; HASWELL-NEXT: setb %al # sched: [1:0.50] 4743 ; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] 4744 ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] 4745 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 4746 ; HASWELL-NEXT: retq # sched: [7:1.00] 4747 ; 4748 ; BROADWELL-LABEL: test_testpd_ymm: 4749 ; BROADWELL: # %bb.0: 4750 ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] 4751 ; BROADWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] 4752 ; BROADWELL-NEXT: setb %al # sched: [1:0.50] 4753 ; BROADWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [7:1.00] 4754 ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] 4755 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 4756 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4757 ; 4758 ; SKYLAKE-LABEL: test_testpd_ymm: 4759 ; SKYLAKE: # %bb.0: 4760 ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] 4761 ; SKYLAKE-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00] 4762 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50] 4763 ; SKYLAKE-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00] 4764 ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] 4765 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 4766 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4767 ; 4768 ; SKX-LABEL: test_testpd_ymm: 4769 ; SKX: # %bb.0: 4770 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] 4771 ; SKX-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00] 4772 ; SKX-NEXT: setb %al # sched: [1:0.50] 4773 ; SKX-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00] 4774 ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] 4775 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 4776 ; SKX-NEXT: retq # sched: [7:1.00] 4777 ; 4778 ; BTVER2-LABEL: test_testpd_ymm: 4779 ; BTVER2: # %bb.0: 4780 ; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50] 4781 ; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [4:2.00] 4782 ; BTVER2-NEXT: setb %al # sched: [1:0.50] 4783 ; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:2.00] 4784 ; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00] 4785 ; BTVER2-NEXT: retq # sched: [4:1.00] 4786 ; 4787 ; ZNVER1-LABEL: test_testpd_ymm: 4788 ; ZNVER1: # %bb.0: 4789 ; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25] 4790 ; ZNVER1-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.25] 4791 ; ZNVER1-NEXT: setb %al # sched: [1:0.25] 4792 ; ZNVER1-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:0.50] 4793 ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25] 4794 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 4795 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4796 %1 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) 4797 %2 = load <4 x double>, <4 x double> *%a2, align 32 4798 %3 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %2) 4799 %4 = add i32 %1, %3 4800 ret i32 %4 4801 } 4802 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone 4803 4804 define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 4805 ; GENERIC-LABEL: test_testps: 4806 ; GENERIC: # %bb.0: 4807 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] 4808 ; GENERIC-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] 4809 ; GENERIC-NEXT: setb %al # sched: [1:0.50] 4810 ; GENERIC-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] 4811 ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] 4812 ; GENERIC-NEXT: retq # sched: [1:1.00] 4813 ; 4814 ; SANDY-LABEL: test_testps: 4815 ; SANDY: # %bb.0: 4816 ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] 4817 ; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] 4818 ; SANDY-NEXT: setb %al # sched: [1:0.50] 4819 ; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] 4820 ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] 4821 ; SANDY-NEXT: retq # sched: [1:1.00] 4822 ; 4823 ; HASWELL-LABEL: test_testps: 4824 ; HASWELL: # %bb.0: 4825 ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] 4826 ; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] 4827 ; HASWELL-NEXT: setb %al # sched: [1:0.50] 4828 ; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] 4829 ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] 4830 ; HASWELL-NEXT: retq # sched: [7:1.00] 4831 ; 4832 ; BROADWELL-LABEL: test_testps: 4833 ; BROADWELL: # %bb.0: 4834 ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] 4835 ; BROADWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] 4836 ; BROADWELL-NEXT: setb %al # sched: [1:0.50] 4837 ; BROADWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00] 4838 ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] 4839 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4840 ; 4841 ; SKYLAKE-LABEL: test_testps: 4842 ; SKYLAKE: # %bb.0: 4843 ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] 4844 ; SKYLAKE-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00] 4845 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50] 4846 ; SKYLAKE-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] 4847 ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] 4848 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4849 ; 4850 ; SKX-LABEL: test_testps: 4851 ; SKX: # %bb.0: 4852 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] 4853 ; SKX-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00] 4854 ; SKX-NEXT: setb %al # sched: [1:0.50] 4855 ; SKX-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] 4856 ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] 4857 ; SKX-NEXT: retq # sched: [7:1.00] 4858 ; 4859 ; BTVER2-LABEL: test_testps: 4860 ; BTVER2: # %bb.0: 4861 ; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50] 4862 ; BTVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [3:1.00] 4863 ; BTVER2-NEXT: setb %al # sched: [1:0.50] 4864 ; BTVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00] 4865 ; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00] 4866 ; BTVER2-NEXT: retq # sched: [4:1.00] 4867 ; 4868 ; ZNVER1-LABEL: test_testps: 4869 ; ZNVER1: # %bb.0: 4870 ; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25] 4871 ; ZNVER1-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.25] 4872 ; ZNVER1-NEXT: setb %al # sched: [1:0.25] 4873 ; ZNVER1-NEXT: vtestps (%rdi), %xmm0 # sched: [8:0.50] 4874 ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25] 4875 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4876 %1 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) 4877 %2 = load <4 x float>, <4 x float> *%a2, align 16 4878 %3 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %2) 4879 %4 = add i32 %1, %3 4880 ret i32 %4 4881 } 4882 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone 4883 4884 define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 4885 ; GENERIC-LABEL: test_testps_ymm: 4886 ; GENERIC: # %bb.0: 4887 ; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] 4888 ; GENERIC-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] 4889 ; GENERIC-NEXT: setb %al # sched: [1:0.50] 4890 ; GENERIC-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] 4891 ; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67] 4892 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4893 ; GENERIC-NEXT: retq # sched: [1:1.00] 4894 ; 4895 ; SANDY-LABEL: test_testps_ymm: 4896 ; SANDY: # %bb.0: 4897 ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] 4898 ; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] 4899 ; SANDY-NEXT: setb %al # sched: [1:0.50] 4900 ; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] 4901 ; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67] 4902 ; SANDY-NEXT: vzeroupper # sched: [100:0.33] 4903 ; SANDY-NEXT: retq # sched: [1:1.00] 4904 ; 4905 ; HASWELL-LABEL: test_testps_ymm: 4906 ; HASWELL: # %bb.0: 4907 ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] 4908 ; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] 4909 ; HASWELL-NEXT: setb %al # sched: [1:0.50] 4910 ; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] 4911 ; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] 4912 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 4913 ; HASWELL-NEXT: retq # sched: [7:1.00] 4914 ; 4915 ; BROADWELL-LABEL: test_testps_ymm: 4916 ; BROADWELL: # %bb.0: 4917 ; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] 4918 ; BROADWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] 4919 ; BROADWELL-NEXT: setb %al # sched: [1:0.50] 4920 ; BROADWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [7:1.00] 4921 ; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50] 4922 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 4923 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4924 ; 4925 ; SKYLAKE-LABEL: test_testps_ymm: 4926 ; SKYLAKE: # %bb.0: 4927 ; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25] 4928 ; SKYLAKE-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00] 4929 ; SKYLAKE-NEXT: setb %al # sched: [1:0.50] 4930 ; SKYLAKE-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00] 4931 ; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50] 4932 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 4933 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4934 ; 4935 ; SKX-LABEL: test_testps_ymm: 4936 ; SKX: # %bb.0: 4937 ; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25] 4938 ; SKX-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00] 4939 ; SKX-NEXT: setb %al # sched: [1:0.50] 4940 ; SKX-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00] 4941 ; SKX-NEXT: adcl $0, %eax # sched: [1:0.50] 4942 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 4943 ; SKX-NEXT: retq # sched: [7:1.00] 4944 ; 4945 ; BTVER2-LABEL: test_testps_ymm: 4946 ; BTVER2: # %bb.0: 4947 ; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50] 4948 ; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [4:2.00] 4949 ; BTVER2-NEXT: setb %al # sched: [1:0.50] 4950 ; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [9:2.00] 4951 ; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00] 4952 ; BTVER2-NEXT: retq # sched: [4:1.00] 4953 ; 4954 ; ZNVER1-LABEL: test_testps_ymm: 4955 ; ZNVER1: # %bb.0: 4956 ; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25] 4957 ; ZNVER1-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.25] 4958 ; ZNVER1-NEXT: setb %al # sched: [1:0.25] 4959 ; ZNVER1-NEXT: vtestps (%rdi), %ymm0 # sched: [8:0.50] 4960 ; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25] 4961 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 4962 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4963 %1 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) 4964 %2 = load <8 x float>, <8 x float> *%a2, align 32 4965 %3 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %2) 4966 %4 = add i32 %1, %3 4967 ret i32 %4 4968 } 4969 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone 4970 4971 define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 4972 ; GENERIC-LABEL: test_unpckhpd: 4973 ; GENERIC: # %bb.0: 4974 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 4975 ; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] 4976 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4977 ; GENERIC-NEXT: retq # sched: [1:1.00] 4978 ; 4979 ; SANDY-LABEL: test_unpckhpd: 4980 ; SANDY: # %bb.0: 4981 ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 4982 ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] 4983 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4984 ; SANDY-NEXT: retq # sched: [1:1.00] 4985 ; 4986 ; HASWELL-LABEL: test_unpckhpd: 4987 ; HASWELL: # %bb.0: 4988 ; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 4989 ; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] 4990 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4991 ; HASWELL-NEXT: retq # sched: [7:1.00] 4992 ; 4993 ; BROADWELL-LABEL: test_unpckhpd: 4994 ; BROADWELL: # %bb.0: 4995 ; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 4996 ; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [7:1.00] 4997 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 4998 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4999 ; 5000 ; SKYLAKE-LABEL: test_unpckhpd: 5001 ; SKYLAKE: # %bb.0: 5002 ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 5003 ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] 5004 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 5005 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5006 ; 5007 ; SKX-LABEL: test_unpckhpd: 5008 ; SKX: # %bb.0: 5009 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 5010 ; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] 5011 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 5012 ; SKX-NEXT: retq # sched: [7:1.00] 5013 ; 5014 ; BTVER2-LABEL: test_unpckhpd: 5015 ; BTVER2: # %bb.0: 5016 ; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 5017 ; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [6:2.00] 5018 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5019 ; BTVER2-NEXT: retq # sched: [4:1.00] 5020 ; 5021 ; ZNVER1-LABEL: test_unpckhpd: 5022 ; ZNVER1: # %bb.0: 5023 ; ZNVER1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50] 5024 ; ZNVER1-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:0.50] 5025 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5026 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5027 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 5028 %2 = load <4 x double>, <4 x double> *%a2, align 32 5029 %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 5030 %4 = fadd <4 x double> %1, %3 5031 ret <4 x double> %4 5032 } 5033 5034 define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind { 5035 ; GENERIC-LABEL: test_unpckhps: 5036 ; GENERIC: # %bb.0: 5037 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 5038 ; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 5039 ; GENERIC-NEXT: retq # sched: [1:1.00] 5040 ; 5041 ; SANDY-LABEL: test_unpckhps: 5042 ; SANDY: # %bb.0: 5043 ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 5044 ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 5045 ; SANDY-NEXT: retq # sched: [1:1.00] 5046 ; 5047 ; HASWELL-LABEL: test_unpckhps: 5048 ; HASWELL: # %bb.0: 5049 ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 5050 ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 5051 ; HASWELL-NEXT: retq # sched: [7:1.00] 5052 ; 5053 ; BROADWELL-LABEL: test_unpckhps: 5054 ; BROADWELL: # %bb.0: 5055 ; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 5056 ; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] 5057 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5058 ; 5059 ; SKYLAKE-LABEL: test_unpckhps: 5060 ; SKYLAKE: # %bb.0: 5061 ; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 5062 ; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 5063 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5064 ; 5065 ; SKX-LABEL: test_unpckhps: 5066 ; SKX: # %bb.0: 5067 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 5068 ; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 5069 ; SKX-NEXT: retq # sched: [7:1.00] 5070 ; 5071 ; BTVER2-LABEL: test_unpckhps: 5072 ; BTVER2: # %bb.0: 5073 ; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 5074 ; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:2.00] 5075 ; BTVER2-NEXT: retq # sched: [4:1.00] 5076 ; 5077 ; ZNVER1-LABEL: test_unpckhps: 5078 ; ZNVER1: # %bb.0: 5079 ; ZNVER1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50] 5080 ; ZNVER1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50] 5081 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5082 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 5083 %2 = load <8 x float>, <8 x float> *%a2, align 32 5084 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 5085 ret <8 x float> %3 5086 } 5087 5088 define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 5089 ; GENERIC-LABEL: test_unpcklpd: 5090 ; GENERIC: # %bb.0: 5091 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 5092 ; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] 5093 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5094 ; GENERIC-NEXT: retq # sched: [1:1.00] 5095 ; 5096 ; SANDY-LABEL: test_unpcklpd: 5097 ; SANDY: # %bb.0: 5098 ; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 5099 ; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] 5100 ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5101 ; SANDY-NEXT: retq # sched: [1:1.00] 5102 ; 5103 ; HASWELL-LABEL: test_unpcklpd: 5104 ; HASWELL: # %bb.0: 5105 ; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 5106 ; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] 5107 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5108 ; HASWELL-NEXT: retq # sched: [7:1.00] 5109 ; 5110 ; BROADWELL-LABEL: test_unpcklpd: 5111 ; BROADWELL: # %bb.0: 5112 ; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 5113 ; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [7:1.00] 5114 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5115 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5116 ; 5117 ; SKYLAKE-LABEL: test_unpcklpd: 5118 ; SKYLAKE: # %bb.0: 5119 ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 5120 ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] 5121 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 5122 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5123 ; 5124 ; SKX-LABEL: test_unpcklpd: 5125 ; SKX: # %bb.0: 5126 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 5127 ; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] 5128 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 5129 ; SKX-NEXT: retq # sched: [7:1.00] 5130 ; 5131 ; BTVER2-LABEL: test_unpcklpd: 5132 ; BTVER2: # %bb.0: 5133 ; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 5134 ; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [6:2.00] 5135 ; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5136 ; BTVER2-NEXT: retq # sched: [4:1.00] 5137 ; 5138 ; ZNVER1-LABEL: test_unpcklpd: 5139 ; ZNVER1: # %bb.0: 5140 ; ZNVER1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50] 5141 ; ZNVER1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:0.50] 5142 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5143 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5144 %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 5145 %2 = load <4 x double>, <4 x double> *%a2, align 32 5146 %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 5147 %4 = fadd <4 x double> %1, %3 5148 ret <4 x double> %4 5149 } 5150 5151 define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind { 5152 ; GENERIC-LABEL: test_unpcklps: 5153 ; GENERIC: # %bb.0: 5154 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 5155 ; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 5156 ; GENERIC-NEXT: retq # sched: [1:1.00] 5157 ; 5158 ; SANDY-LABEL: test_unpcklps: 5159 ; SANDY: # %bb.0: 5160 ; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 5161 ; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 5162 ; SANDY-NEXT: retq # sched: [1:1.00] 5163 ; 5164 ; HASWELL-LABEL: test_unpcklps: 5165 ; HASWELL: # %bb.0: 5166 ; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 5167 ; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 5168 ; HASWELL-NEXT: retq # sched: [7:1.00] 5169 ; 5170 ; BROADWELL-LABEL: test_unpcklps: 5171 ; BROADWELL: # %bb.0: 5172 ; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 5173 ; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] 5174 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5175 ; 5176 ; SKYLAKE-LABEL: test_unpcklps: 5177 ; SKYLAKE: # %bb.0: 5178 ; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 5179 ; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 5180 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5181 ; 5182 ; SKX-LABEL: test_unpcklps: 5183 ; SKX: # %bb.0: 5184 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 5185 ; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 5186 ; SKX-NEXT: retq # sched: [7:1.00] 5187 ; 5188 ; BTVER2-LABEL: test_unpcklps: 5189 ; BTVER2: # %bb.0: 5190 ; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 5191 ; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:2.00] 5192 ; BTVER2-NEXT: retq # sched: [4:1.00] 5193 ; 5194 ; ZNVER1-LABEL: test_unpcklps: 5195 ; ZNVER1: # %bb.0: 5196 ; ZNVER1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50] 5197 ; ZNVER1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50] 5198 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5199 %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 5200 %2 = load <8 x float>, <8 x float> *%a2, align 32 5201 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 5202 ret <8 x float> %3 5203 } 5204 5205 define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { 5206 ; GENERIC-LABEL: test_xorpd: 5207 ; GENERIC: # %bb.0: 5208 ; GENERIC-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5209 ; GENERIC-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5210 ; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5211 ; GENERIC-NEXT: retq # sched: [1:1.00] 5212 ; 5213 ; SANDY-LABEL: test_xorpd: 5214 ; SANDY: # %bb.0: 5215 ; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5216 ; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5217 ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5218 ; SANDY-NEXT: retq # sched: [1:1.00] 5219 ; 5220 ; HASWELL-LABEL: test_xorpd: 5221 ; HASWELL: # %bb.0: 5222 ; HASWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5223 ; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5224 ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5225 ; HASWELL-NEXT: retq # sched: [7:1.00] 5226 ; 5227 ; BROADWELL-LABEL: test_xorpd: 5228 ; BROADWELL: # %bb.0: 5229 ; BROADWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5230 ; BROADWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5231 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5232 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5233 ; 5234 ; SKYLAKE-LABEL: test_xorpd: 5235 ; SKYLAKE: # %bb.0: 5236 ; SKYLAKE-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5237 ; SKYLAKE-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5238 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 5239 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5240 ; 5241 ; SKX-LABEL: test_xorpd: 5242 ; SKX: # %bb.0: 5243 ; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5244 ; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5245 ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 5246 ; SKX-NEXT: retq # sched: [7:1.00] 5247 ; 5248 ; BTVER2-LABEL: test_xorpd: 5249 ; BTVER2: # %bb.0: 5250 ; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5251 ; BTVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00] 5252 ; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 5253 ; BTVER2-NEXT: retq # sched: [4:1.00] 5254 ; 5255 ; ZNVER1-LABEL: test_xorpd: 5256 ; ZNVER1: # %bb.0: 5257 ; ZNVER1-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5258 ; ZNVER1-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5259 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5260 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5261 %1 = bitcast <4 x double> %a0 to <4 x i64> 5262 %2 = bitcast <4 x double> %a1 to <4 x i64> 5263 %3 = xor <4 x i64> %1, %2 5264 %4 = load <4 x double>, <4 x double> *%a2, align 32 5265 %5 = bitcast <4 x double> %4 to <4 x i64> 5266 %6 = xor <4 x i64> %3, %5 5267 %7 = bitcast <4 x i64> %6 to <4 x double> 5268 %8 = fadd <4 x double> %a1, %7 5269 ret <4 x double> %8 5270 } 5271 5272 define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { 5273 ; GENERIC-LABEL: test_xorps: 5274 ; GENERIC: # %bb.0: 5275 ; GENERIC-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5276 ; GENERIC-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5277 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5278 ; GENERIC-NEXT: retq # sched: [1:1.00] 5279 ; 5280 ; SANDY-LABEL: test_xorps: 5281 ; SANDY: # %bb.0: 5282 ; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5283 ; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5284 ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5285 ; SANDY-NEXT: retq # sched: [1:1.00] 5286 ; 5287 ; HASWELL-LABEL: test_xorps: 5288 ; HASWELL: # %bb.0: 5289 ; HASWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5290 ; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5291 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5292 ; HASWELL-NEXT: retq # sched: [7:1.00] 5293 ; 5294 ; BROADWELL-LABEL: test_xorps: 5295 ; BROADWELL: # %bb.0: 5296 ; BROADWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5297 ; BROADWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5298 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5299 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5300 ; 5301 ; SKYLAKE-LABEL: test_xorps: 5302 ; SKYLAKE: # %bb.0: 5303 ; SKYLAKE-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5304 ; SKYLAKE-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5305 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 5306 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5307 ; 5308 ; SKX-LABEL: test_xorps: 5309 ; SKX: # %bb.0: 5310 ; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5311 ; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5312 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 5313 ; SKX-NEXT: retq # sched: [7:1.00] 5314 ; 5315 ; BTVER2-LABEL: test_xorps: 5316 ; BTVER2: # %bb.0: 5317 ; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5318 ; BTVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00] 5319 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 5320 ; BTVER2-NEXT: retq # sched: [4:1.00] 5321 ; 5322 ; ZNVER1-LABEL: test_xorps: 5323 ; ZNVER1: # %bb.0: 5324 ; ZNVER1-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5325 ; ZNVER1-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5326 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 5327 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5328 %1 = bitcast <8 x float> %a0 to <4 x i64> 5329 %2 = bitcast <8 x float> %a1 to <4 x i64> 5330 %3 = xor <4 x i64> %1, %2 5331 %4 = load <8 x float>, <8 x float> *%a2, align 32 5332 %5 = bitcast <8 x float> %4 to <4 x i64> 5333 %6 = xor <4 x i64> %3, %5 5334 %7 = bitcast <4 x i64> %6 to <8 x float> 5335 %8 = fadd <8 x float> %a1, %7 5336 ret <8 x float> %8 5337 } 5338 5339 define void @test_zeroall() { 5340 ; GENERIC-LABEL: test_zeroall: 5341 ; GENERIC: # %bb.0: 5342 ; GENERIC-NEXT: vzeroall # sched: [100:0.33] 5343 ; GENERIC-NEXT: retq # sched: [1:1.00] 5344 ; 5345 ; SANDY-LABEL: test_zeroall: 5346 ; SANDY: # %bb.0: 5347 ; SANDY-NEXT: vzeroall # sched: [100:0.33] 5348 ; SANDY-NEXT: retq # sched: [1:1.00] 5349 ; 5350 ; HASWELL-LABEL: test_zeroall: 5351 ; HASWELL: # %bb.0: 5352 ; HASWELL-NEXT: vzeroall # sched: [16:16.00] 5353 ; HASWELL-NEXT: retq # sched: [7:1.00] 5354 ; 5355 ; BROADWELL-LABEL: test_zeroall: 5356 ; BROADWELL: # %bb.0: 5357 ; BROADWELL-NEXT: vzeroall # sched: [16:16.00] 5358 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5359 ; 5360 ; SKYLAKE-LABEL: test_zeroall: 5361 ; SKYLAKE: # %bb.0: 5362 ; SKYLAKE-NEXT: vzeroall # sched: [16:4.00] 5363 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5364 ; 5365 ; SKX-LABEL: test_zeroall: 5366 ; SKX: # %bb.0: 5367 ; SKX-NEXT: vzeroall # sched: [16:4.00] 5368 ; SKX-NEXT: retq # sched: [7:1.00] 5369 ; 5370 ; BTVER2-LABEL: test_zeroall: 5371 ; BTVER2: # %bb.0: 5372 ; BTVER2-NEXT: vzeroall # sched: [90:36.50] 5373 ; BTVER2-NEXT: retq # sched: [4:1.00] 5374 ; 5375 ; ZNVER1-LABEL: test_zeroall: 5376 ; ZNVER1: # %bb.0: 5377 ; ZNVER1-NEXT: vzeroall # sched: [100:0.25] 5378 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5379 call void @llvm.x86.avx.vzeroall() 5380 ret void 5381 } 5382 declare void @llvm.x86.avx.vzeroall() nounwind 5383 5384 define void @test_zeroupper() { 5385 ; GENERIC-LABEL: test_zeroupper: 5386 ; GENERIC: # %bb.0: 5387 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 5388 ; GENERIC-NEXT: retq # sched: [1:1.00] 5389 ; 5390 ; SANDY-LABEL: test_zeroupper: 5391 ; SANDY: # %bb.0: 5392 ; SANDY-NEXT: vzeroupper # sched: [100:0.33] 5393 ; SANDY-NEXT: retq # sched: [1:1.00] 5394 ; 5395 ; HASWELL-LABEL: test_zeroupper: 5396 ; HASWELL: # %bb.0: 5397 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 5398 ; HASWELL-NEXT: retq # sched: [7:1.00] 5399 ; 5400 ; BROADWELL-LABEL: test_zeroupper: 5401 ; BROADWELL: # %bb.0: 5402 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 5403 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5404 ; 5405 ; SKYLAKE-LABEL: test_zeroupper: 5406 ; SKYLAKE: # %bb.0: 5407 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 5408 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5409 ; 5410 ; SKX-LABEL: test_zeroupper: 5411 ; SKX: # %bb.0: 5412 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 5413 ; SKX-NEXT: retq # sched: [7:1.00] 5414 ; 5415 ; BTVER2-LABEL: test_zeroupper: 5416 ; BTVER2: # %bb.0: 5417 ; BTVER2-NEXT: vzeroupper # sched: [46:18.50] 5418 ; BTVER2-NEXT: retq # sched: [4:1.00] 5419 ; 5420 ; ZNVER1-LABEL: test_zeroupper: 5421 ; ZNVER1: # %bb.0: 5422 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 5423 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5424 call void @llvm.x86.avx.vzeroupper() 5425 ret void 5426 } 5427 declare void @llvm.x86.avx.vzeroupper() nounwind 5428 5429 !0 = !{i32 1} 5430