1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE 10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL 11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE 12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL 13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE 14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE 15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE 16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX 17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE 18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE 20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 21 22 define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 23 ; GENERIC-LABEL: test_addsubpd: 24 ; GENERIC: # %bb.0: 25 ; GENERIC-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 26 ; GENERIC-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] 27 ; GENERIC-NEXT: retq # sched: [1:1.00] 28 ; 29 ; ATOM-LABEL: test_addsubpd: 30 ; ATOM: # %bb.0: 31 ; ATOM-NEXT: addsubpd %xmm1, %xmm0 # sched: [6:3.00] 32 ; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [7:3.50] 33 ; ATOM-NEXT: retq # sched: [79:39.50] 34 ; 35 ; SLM-LABEL: test_addsubpd: 36 ; SLM: # %bb.0: 37 ; SLM-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 38 ; SLM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:1.00] 39 ; SLM-NEXT: retq # sched: [4:1.00] 40 ; 41 ; SANDY-SSE-LABEL: test_addsubpd: 42 ; SANDY-SSE: # %bb.0: 43 ; SANDY-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 44 ; SANDY-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] 45 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 46 ; 47 ; SANDY-LABEL: test_addsubpd: 48 ; SANDY: # %bb.0: 49 ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 50 ; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 51 ; SANDY-NEXT: retq # sched: [1:1.00] 52 ; 53 ; HASWELL-SSE-LABEL: test_addsubpd: 54 ; HASWELL-SSE: # %bb.0: 55 ; HASWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 56 ; HASWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] 57 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 58 ; 59 ; HASWELL-LABEL: test_addsubpd: 60 ; HASWELL: # %bb.0: 61 ; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 62 ; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 63 ; HASWELL-NEXT: retq # sched: [7:1.00] 64 ; 65 ; BROADWELL-SSE-LABEL: test_addsubpd: 66 ; BROADWELL-SSE: # %bb.0: 67 ; BROADWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 68 ; BROADWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00] 69 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 70 ; 71 ; BROADWELL-LABEL: test_addsubpd: 72 ; BROADWELL: # %bb.0: 73 ; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 74 ; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 75 ; BROADWELL-NEXT: retq # sched: [7:1.00] 76 ; 77 ; SKYLAKE-SSE-LABEL: test_addsubpd: 78 ; SKYLAKE-SSE: # %bb.0: 79 ; SKYLAKE-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50] 80 ; SKYLAKE-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] 81 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 82 ; 83 ; SKYLAKE-LABEL: test_addsubpd: 84 ; SKYLAKE: # %bb.0: 85 ; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 86 ; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 87 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 88 ; 89 ; SKX-SSE-LABEL: test_addsubpd: 90 ; SKX-SSE: # %bb.0: 91 ; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50] 92 ; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] 93 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 94 ; 95 ; SKX-LABEL: test_addsubpd: 96 ; SKX: # %bb.0: 97 ; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 98 ; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 99 ; SKX-NEXT: retq # sched: [7:1.00] 100 ; 101 ; BTVER2-SSE-LABEL: test_addsubpd: 102 ; BTVER2-SSE: # %bb.0: 103 ; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 104 ; BTVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00] 105 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 106 ; 107 ; BTVER2-LABEL: test_addsubpd: 108 ; BTVER2: # %bb.0: 109 ; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 110 ; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 111 ; BTVER2-NEXT: retq # sched: [4:1.00] 112 ; 113 ; ZNVER1-SSE-LABEL: test_addsubpd: 114 ; ZNVER1-SSE: # %bb.0: 115 ; ZNVER1-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] 116 ; ZNVER1-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00] 117 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 118 ; 119 ; ZNVER1-LABEL: test_addsubpd: 120 ; ZNVER1: # %bb.0: 121 ; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 122 ; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 123 ; ZNVER1-NEXT: retq # sched: [1:0.50] 124 %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) 125 %2 = load <2 x double>, <2 x double> *%a2, align 16 126 %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2) 127 ret <2 x double> %3 128 } 129 declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone 130 131 define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 132 ; GENERIC-LABEL: test_addsubps: 133 ; GENERIC: # %bb.0: 134 ; GENERIC-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 135 ; GENERIC-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] 136 ; GENERIC-NEXT: retq # sched: [1:1.00] 137 ; 138 ; ATOM-LABEL: test_addsubps: 139 ; ATOM: # %bb.0: 140 ; ATOM-NEXT: addsubps %xmm1, %xmm0 # sched: [5:5.00] 141 ; ATOM-NEXT: addsubps (%rdi), %xmm0 # sched: [5:5.00] 142 ; ATOM-NEXT: retq # sched: [79:39.50] 143 ; 144 ; SLM-LABEL: test_addsubps: 145 ; SLM: # %bb.0: 146 ; SLM-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 147 ; SLM-NEXT: addsubps (%rdi), %xmm0 # sched: [6:1.00] 148 ; SLM-NEXT: retq # sched: [4:1.00] 149 ; 150 ; SANDY-SSE-LABEL: test_addsubps: 151 ; SANDY-SSE: # %bb.0: 152 ; SANDY-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 153 ; SANDY-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] 154 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 155 ; 156 ; SANDY-LABEL: test_addsubps: 157 ; SANDY: # %bb.0: 158 ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 159 ; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 160 ; SANDY-NEXT: retq # sched: [1:1.00] 161 ; 162 ; HASWELL-SSE-LABEL: test_addsubps: 163 ; HASWELL-SSE: # %bb.0: 164 ; HASWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 165 ; HASWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] 166 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 167 ; 168 ; HASWELL-LABEL: test_addsubps: 169 ; HASWELL: # %bb.0: 170 ; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 171 ; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 172 ; HASWELL-NEXT: retq # sched: [7:1.00] 173 ; 174 ; BROADWELL-SSE-LABEL: test_addsubps: 175 ; BROADWELL-SSE: # %bb.0: 176 ; BROADWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 177 ; BROADWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00] 178 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 179 ; 180 ; BROADWELL-LABEL: test_addsubps: 181 ; BROADWELL: # %bb.0: 182 ; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 183 ; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 184 ; BROADWELL-NEXT: retq # sched: [7:1.00] 185 ; 186 ; SKYLAKE-SSE-LABEL: test_addsubps: 187 ; SKYLAKE-SSE: # %bb.0: 188 ; SKYLAKE-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50] 189 ; SKYLAKE-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] 190 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 191 ; 192 ; SKYLAKE-LABEL: test_addsubps: 193 ; SKYLAKE: # %bb.0: 194 ; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 195 ; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 196 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 197 ; 198 ; SKX-SSE-LABEL: test_addsubps: 199 ; SKX-SSE: # %bb.0: 200 ; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50] 201 ; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] 202 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 203 ; 204 ; SKX-LABEL: test_addsubps: 205 ; SKX: # %bb.0: 206 ; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 207 ; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 208 ; SKX-NEXT: retq # sched: [7:1.00] 209 ; 210 ; BTVER2-SSE-LABEL: test_addsubps: 211 ; BTVER2-SSE: # %bb.0: 212 ; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 213 ; BTVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00] 214 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 215 ; 216 ; BTVER2-LABEL: test_addsubps: 217 ; BTVER2: # %bb.0: 218 ; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 219 ; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 220 ; BTVER2-NEXT: retq # sched: [4:1.00] 221 ; 222 ; ZNVER1-SSE-LABEL: test_addsubps: 223 ; ZNVER1-SSE: # %bb.0: 224 ; ZNVER1-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] 225 ; ZNVER1-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00] 226 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 227 ; 228 ; ZNVER1-LABEL: test_addsubps: 229 ; ZNVER1: # %bb.0: 230 ; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 231 ; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 232 ; ZNVER1-NEXT: retq # sched: [1:0.50] 233 %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) 234 %2 = load <4 x float>, <4 x float> *%a2, align 16 235 %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2) 236 ret <4 x float> %3 237 } 238 declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone 239 240 define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 241 ; GENERIC-LABEL: test_haddpd: 242 ; GENERIC: # %bb.0: 243 ; GENERIC-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] 244 ; GENERIC-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] 245 ; GENERIC-NEXT: retq # sched: [1:1.00] 246 ; 247 ; ATOM-LABEL: test_haddpd: 248 ; ATOM: # %bb.0: 249 ; ATOM-NEXT: haddpd %xmm1, %xmm0 # sched: [8:4.00] 250 ; ATOM-NEXT: haddpd (%rdi), %xmm0 # sched: [9:4.50] 251 ; ATOM-NEXT: retq # sched: [79:39.50] 252 ; 253 ; SLM-LABEL: test_haddpd: 254 ; SLM: # %bb.0: 255 ; SLM-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00] 256 ; SLM-NEXT: haddpd (%rdi), %xmm0 # sched: [6:1.00] 257 ; SLM-NEXT: retq # sched: [4:1.00] 258 ; 259 ; SANDY-SSE-LABEL: test_haddpd: 260 ; SANDY-SSE: # %bb.0: 261 ; SANDY-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] 262 ; SANDY-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] 263 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 264 ; 265 ; SANDY-LABEL: test_haddpd: 266 ; SANDY: # %bb.0: 267 ; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 268 ; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 269 ; SANDY-NEXT: retq # sched: [1:1.00] 270 ; 271 ; HASWELL-SSE-LABEL: test_haddpd: 272 ; HASWELL-SSE: # %bb.0: 273 ; HASWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] 274 ; HASWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] 275 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 276 ; 277 ; HASWELL-LABEL: test_haddpd: 278 ; HASWELL: # %bb.0: 279 ; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 280 ; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 281 ; HASWELL-NEXT: retq # sched: [7:1.00] 282 ; 283 ; BROADWELL-SSE-LABEL: test_haddpd: 284 ; BROADWELL-SSE: # %bb.0: 285 ; BROADWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] 286 ; BROADWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [10:2.00] 287 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 288 ; 289 ; BROADWELL-LABEL: test_haddpd: 290 ; BROADWELL: # %bb.0: 291 ; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 292 ; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 293 ; BROADWELL-NEXT: retq # sched: [7:1.00] 294 ; 295 ; SKYLAKE-SSE-LABEL: test_haddpd: 296 ; SKYLAKE-SSE: # %bb.0: 297 ; SKYLAKE-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00] 298 ; SKYLAKE-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00] 299 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 300 ; 301 ; SKYLAKE-LABEL: test_haddpd: 302 ; SKYLAKE: # %bb.0: 303 ; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 304 ; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 305 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 306 ; 307 ; SKX-SSE-LABEL: test_haddpd: 308 ; SKX-SSE: # %bb.0: 309 ; SKX-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00] 310 ; SKX-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00] 311 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 312 ; 313 ; SKX-LABEL: test_haddpd: 314 ; SKX: # %bb.0: 315 ; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 316 ; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 317 ; SKX-NEXT: retq # sched: [7:1.00] 318 ; 319 ; BTVER2-SSE-LABEL: test_haddpd: 320 ; BTVER2-SSE: # %bb.0: 321 ; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00] 322 ; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [8:1.00] 323 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 324 ; 325 ; BTVER2-LABEL: test_haddpd: 326 ; BTVER2: # %bb.0: 327 ; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 328 ; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 329 ; BTVER2-NEXT: retq # sched: [4:1.00] 330 ; 331 ; ZNVER1-SSE-LABEL: test_haddpd: 332 ; ZNVER1-SSE: # %bb.0: 333 ; ZNVER1-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [100:0.25] 334 ; ZNVER1-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [100:0.25] 335 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 336 ; 337 ; ZNVER1-LABEL: test_haddpd: 338 ; ZNVER1: # %bb.0: 339 ; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 340 ; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 341 ; ZNVER1-NEXT: retq # sched: [1:0.50] 342 %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) 343 %2 = load <2 x double>, <2 x double> *%a2, align 16 344 %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2) 345 ret <2 x double> %3 346 } 347 declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone 348 349 define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 350 ; GENERIC-LABEL: test_haddps: 351 ; GENERIC: # %bb.0: 352 ; GENERIC-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] 353 ; GENERIC-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] 354 ; GENERIC-NEXT: retq # sched: [1:1.00] 355 ; 356 ; ATOM-LABEL: test_haddps: 357 ; ATOM: # %bb.0: 358 ; ATOM-NEXT: haddps %xmm1, %xmm0 # sched: [8:4.00] 359 ; ATOM-NEXT: haddps (%rdi), %xmm0 # sched: [9:4.50] 360 ; ATOM-NEXT: retq # sched: [79:39.50] 361 ; 362 ; SLM-LABEL: test_haddps: 363 ; SLM: # %bb.0: 364 ; SLM-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00] 365 ; SLM-NEXT: haddps (%rdi), %xmm0 # sched: [6:1.00] 366 ; SLM-NEXT: retq # sched: [4:1.00] 367 ; 368 ; SANDY-SSE-LABEL: test_haddps: 369 ; SANDY-SSE: # %bb.0: 370 ; SANDY-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] 371 ; SANDY-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] 372 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 373 ; 374 ; SANDY-LABEL: test_haddps: 375 ; SANDY: # %bb.0: 376 ; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 377 ; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 378 ; SANDY-NEXT: retq # sched: [1:1.00] 379 ; 380 ; HASWELL-SSE-LABEL: test_haddps: 381 ; HASWELL-SSE: # %bb.0: 382 ; HASWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] 383 ; HASWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] 384 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 385 ; 386 ; HASWELL-LABEL: test_haddps: 387 ; HASWELL: # %bb.0: 388 ; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 389 ; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 390 ; HASWELL-NEXT: retq # sched: [7:1.00] 391 ; 392 ; BROADWELL-SSE-LABEL: test_haddps: 393 ; BROADWELL-SSE: # %bb.0: 394 ; BROADWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] 395 ; BROADWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [10:2.00] 396 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 397 ; 398 ; BROADWELL-LABEL: test_haddps: 399 ; BROADWELL: # %bb.0: 400 ; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 401 ; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 402 ; BROADWELL-NEXT: retq # sched: [7:1.00] 403 ; 404 ; SKYLAKE-SSE-LABEL: test_haddps: 405 ; SKYLAKE-SSE: # %bb.0: 406 ; SKYLAKE-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00] 407 ; SKYLAKE-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00] 408 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 409 ; 410 ; SKYLAKE-LABEL: test_haddps: 411 ; SKYLAKE: # %bb.0: 412 ; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 413 ; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 414 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 415 ; 416 ; SKX-SSE-LABEL: test_haddps: 417 ; SKX-SSE: # %bb.0: 418 ; SKX-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00] 419 ; SKX-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00] 420 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 421 ; 422 ; SKX-LABEL: test_haddps: 423 ; SKX: # %bb.0: 424 ; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 425 ; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 426 ; SKX-NEXT: retq # sched: [7:1.00] 427 ; 428 ; BTVER2-SSE-LABEL: test_haddps: 429 ; BTVER2-SSE: # %bb.0: 430 ; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00] 431 ; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [8:1.00] 432 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 433 ; 434 ; BTVER2-LABEL: test_haddps: 435 ; BTVER2: # %bb.0: 436 ; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 437 ; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 438 ; BTVER2-NEXT: retq # sched: [4:1.00] 439 ; 440 ; ZNVER1-SSE-LABEL: test_haddps: 441 ; ZNVER1-SSE: # %bb.0: 442 ; ZNVER1-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [100:0.25] 443 ; ZNVER1-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [100:0.25] 444 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 445 ; 446 ; ZNVER1-LABEL: test_haddps: 447 ; ZNVER1: # %bb.0: 448 ; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 449 ; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 450 ; ZNVER1-NEXT: retq # sched: [1:0.50] 451 %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) 452 %2 = load <4 x float>, <4 x float> *%a2, align 16 453 %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2) 454 ret <4 x float> %3 455 } 456 declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone 457 458 define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 459 ; GENERIC-LABEL: test_hsubpd: 460 ; GENERIC: # %bb.0: 461 ; GENERIC-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] 462 ; GENERIC-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] 463 ; GENERIC-NEXT: retq # sched: [1:1.00] 464 ; 465 ; ATOM-LABEL: test_hsubpd: 466 ; ATOM: # %bb.0: 467 ; ATOM-NEXT: hsubpd %xmm1, %xmm0 # sched: [8:4.00] 468 ; ATOM-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:4.50] 469 ; ATOM-NEXT: retq # sched: [79:39.50] 470 ; 471 ; SLM-LABEL: test_hsubpd: 472 ; SLM: # %bb.0: 473 ; SLM-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00] 474 ; SLM-NEXT: hsubpd (%rdi), %xmm0 # sched: [6:1.00] 475 ; SLM-NEXT: retq # sched: [4:1.00] 476 ; 477 ; SANDY-SSE-LABEL: test_hsubpd: 478 ; SANDY-SSE: # %bb.0: 479 ; SANDY-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] 480 ; SANDY-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] 481 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 482 ; 483 ; SANDY-LABEL: test_hsubpd: 484 ; SANDY: # %bb.0: 485 ; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 486 ; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 487 ; SANDY-NEXT: retq # sched: [1:1.00] 488 ; 489 ; HASWELL-SSE-LABEL: test_hsubpd: 490 ; HASWELL-SSE: # %bb.0: 491 ; HASWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] 492 ; HASWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] 493 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 494 ; 495 ; HASWELL-LABEL: test_hsubpd: 496 ; HASWELL: # %bb.0: 497 ; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 498 ; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 499 ; HASWELL-NEXT: retq # sched: [7:1.00] 500 ; 501 ; BROADWELL-SSE-LABEL: test_hsubpd: 502 ; BROADWELL-SSE: # %bb.0: 503 ; BROADWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] 504 ; BROADWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [10:2.00] 505 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 506 ; 507 ; BROADWELL-LABEL: test_hsubpd: 508 ; BROADWELL: # %bb.0: 509 ; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 510 ; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 511 ; BROADWELL-NEXT: retq # sched: [7:1.00] 512 ; 513 ; SKYLAKE-SSE-LABEL: test_hsubpd: 514 ; SKYLAKE-SSE: # %bb.0: 515 ; SKYLAKE-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00] 516 ; SKYLAKE-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00] 517 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 518 ; 519 ; SKYLAKE-LABEL: test_hsubpd: 520 ; SKYLAKE: # %bb.0: 521 ; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 522 ; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 523 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 524 ; 525 ; SKX-SSE-LABEL: test_hsubpd: 526 ; SKX-SSE: # %bb.0: 527 ; SKX-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00] 528 ; SKX-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00] 529 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 530 ; 531 ; SKX-LABEL: test_hsubpd: 532 ; SKX: # %bb.0: 533 ; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 534 ; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 535 ; SKX-NEXT: retq # sched: [7:1.00] 536 ; 537 ; BTVER2-SSE-LABEL: test_hsubpd: 538 ; BTVER2-SSE: # %bb.0: 539 ; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00] 540 ; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [8:1.00] 541 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 542 ; 543 ; BTVER2-LABEL: test_hsubpd: 544 ; BTVER2: # %bb.0: 545 ; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 546 ; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 547 ; BTVER2-NEXT: retq # sched: [4:1.00] 548 ; 549 ; ZNVER1-SSE-LABEL: test_hsubpd: 550 ; ZNVER1-SSE: # %bb.0: 551 ; ZNVER1-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [100:0.25] 552 ; ZNVER1-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [100:0.25] 553 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 554 ; 555 ; ZNVER1-LABEL: test_hsubpd: 556 ; ZNVER1: # %bb.0: 557 ; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 558 ; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 559 ; ZNVER1-NEXT: retq # sched: [1:0.50] 560 %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) 561 %2 = load <2 x double>, <2 x double> *%a2, align 16 562 %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2) 563 ret <2 x double> %3 564 } 565 declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone 566 567 define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 568 ; GENERIC-LABEL: test_hsubps: 569 ; GENERIC: # %bb.0: 570 ; GENERIC-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] 571 ; GENERIC-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] 572 ; GENERIC-NEXT: retq # sched: [1:1.00] 573 ; 574 ; ATOM-LABEL: test_hsubps: 575 ; ATOM: # %bb.0: 576 ; ATOM-NEXT: hsubps %xmm1, %xmm0 # sched: [8:4.00] 577 ; ATOM-NEXT: hsubps (%rdi), %xmm0 # sched: [9:4.50] 578 ; ATOM-NEXT: retq # sched: [79:39.50] 579 ; 580 ; SLM-LABEL: test_hsubps: 581 ; SLM: # %bb.0: 582 ; SLM-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00] 583 ; SLM-NEXT: hsubps (%rdi), %xmm0 # sched: [6:1.00] 584 ; SLM-NEXT: retq # sched: [4:1.00] 585 ; 586 ; SANDY-SSE-LABEL: test_hsubps: 587 ; SANDY-SSE: # %bb.0: 588 ; SANDY-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] 589 ; SANDY-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] 590 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 591 ; 592 ; SANDY-LABEL: test_hsubps: 593 ; SANDY: # %bb.0: 594 ; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 595 ; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 596 ; SANDY-NEXT: retq # sched: [1:1.00] 597 ; 598 ; HASWELL-SSE-LABEL: test_hsubps: 599 ; HASWELL-SSE: # %bb.0: 600 ; HASWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] 601 ; HASWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] 602 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 603 ; 604 ; HASWELL-LABEL: test_hsubps: 605 ; HASWELL: # %bb.0: 606 ; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 607 ; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] 608 ; HASWELL-NEXT: retq # sched: [7:1.00] 609 ; 610 ; BROADWELL-SSE-LABEL: test_hsubps: 611 ; BROADWELL-SSE: # %bb.0: 612 ; BROADWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] 613 ; BROADWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [10:2.00] 614 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 615 ; 616 ; BROADWELL-LABEL: test_hsubps: 617 ; BROADWELL: # %bb.0: 618 ; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] 619 ; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] 620 ; BROADWELL-NEXT: retq # sched: [7:1.00] 621 ; 622 ; SKYLAKE-SSE-LABEL: test_hsubps: 623 ; SKYLAKE-SSE: # %bb.0: 624 ; SKYLAKE-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00] 625 ; SKYLAKE-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00] 626 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 627 ; 628 ; SKYLAKE-LABEL: test_hsubps: 629 ; SKYLAKE: # %bb.0: 630 ; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 631 ; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 632 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 633 ; 634 ; SKX-SSE-LABEL: test_hsubps: 635 ; SKX-SSE: # %bb.0: 636 ; SKX-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00] 637 ; SKX-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00] 638 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 639 ; 640 ; SKX-LABEL: test_hsubps: 641 ; SKX: # %bb.0: 642 ; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] 643 ; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] 644 ; SKX-NEXT: retq # sched: [7:1.00] 645 ; 646 ; BTVER2-SSE-LABEL: test_hsubps: 647 ; BTVER2-SSE: # %bb.0: 648 ; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00] 649 ; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [8:1.00] 650 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 651 ; 652 ; BTVER2-LABEL: test_hsubps: 653 ; BTVER2: # %bb.0: 654 ; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 655 ; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 656 ; BTVER2-NEXT: retq # sched: [4:1.00] 657 ; 658 ; ZNVER1-SSE-LABEL: test_hsubps: 659 ; ZNVER1-SSE: # %bb.0: 660 ; ZNVER1-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [100:0.25] 661 ; ZNVER1-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [100:0.25] 662 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 663 ; 664 ; ZNVER1-LABEL: test_hsubps: 665 ; ZNVER1: # %bb.0: 666 ; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 667 ; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 668 ; ZNVER1-NEXT: retq # sched: [1:0.50] 669 %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) 670 %2 = load <4 x float>, <4 x float> *%a2, align 16 671 %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2) 672 ret <4 x float> %3 673 } 674 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone 675 676 define <16 x i8> @test_lddqu(i8* %a0) { 677 ; GENERIC-LABEL: test_lddqu: 678 ; GENERIC: # %bb.0: 679 ; GENERIC-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 680 ; GENERIC-NEXT: retq # sched: [1:1.00] 681 ; 682 ; ATOM-LABEL: test_lddqu: 683 ; ATOM: # %bb.0: 684 ; ATOM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.50] 685 ; ATOM-NEXT: nop # sched: [1:0.50] 686 ; ATOM-NEXT: nop # sched: [1:0.50] 687 ; ATOM-NEXT: retq # sched: [79:39.50] 688 ; 689 ; SLM-LABEL: test_lddqu: 690 ; SLM: # %bb.0: 691 ; SLM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.00] 692 ; SLM-NEXT: retq # sched: [4:1.00] 693 ; 694 ; SANDY-SSE-LABEL: test_lddqu: 695 ; SANDY-SSE: # %bb.0: 696 ; SANDY-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 697 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 698 ; 699 ; SANDY-LABEL: test_lddqu: 700 ; SANDY: # %bb.0: 701 ; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] 702 ; SANDY-NEXT: retq # sched: [1:1.00] 703 ; 704 ; HASWELL-SSE-LABEL: test_lddqu: 705 ; HASWELL-SSE: # %bb.0: 706 ; HASWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 707 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 708 ; 709 ; HASWELL-LABEL: test_lddqu: 710 ; HASWELL: # %bb.0: 711 ; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] 712 ; HASWELL-NEXT: retq # sched: [7:1.00] 713 ; 714 ; BROADWELL-SSE-LABEL: test_lddqu: 715 ; BROADWELL-SSE: # %bb.0: 716 ; BROADWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50] 717 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 718 ; 719 ; BROADWELL-LABEL: test_lddqu: 720 ; BROADWELL: # %bb.0: 721 ; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50] 722 ; BROADWELL-NEXT: retq # sched: [7:1.00] 723 ; 724 ; SKYLAKE-SSE-LABEL: test_lddqu: 725 ; SKYLAKE-SSE: # %bb.0: 726 ; SKYLAKE-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 727 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 728 ; 729 ; SKYLAKE-LABEL: test_lddqu: 730 ; SKYLAKE: # %bb.0: 731 ; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] 732 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 733 ; 734 ; SKX-SSE-LABEL: test_lddqu: 735 ; SKX-SSE: # %bb.0: 736 ; SKX-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] 737 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 738 ; 739 ; SKX-LABEL: test_lddqu: 740 ; SKX: # %bb.0: 741 ; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] 742 ; SKX-NEXT: retq # sched: [7:1.00] 743 ; 744 ; BTVER2-SSE-LABEL: test_lddqu: 745 ; BTVER2-SSE: # %bb.0: 746 ; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00] 747 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 748 ; 749 ; BTVER2-LABEL: test_lddqu: 750 ; BTVER2: # %bb.0: 751 ; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00] 752 ; BTVER2-NEXT: retq # sched: [4:1.00] 753 ; 754 ; ZNVER1-SSE-LABEL: test_lddqu: 755 ; ZNVER1-SSE: # %bb.0: 756 ; ZNVER1-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [8:0.50] 757 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 758 ; 759 ; ZNVER1-LABEL: test_lddqu: 760 ; ZNVER1: # %bb.0: 761 ; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50] 762 ; ZNVER1-NEXT: retq # sched: [1:0.50] 763 %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) 764 ret <16 x i8> %1 765 } 766 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly 767 768 define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { 769 ; GENERIC-LABEL: test_monitor: 770 ; GENERIC: # %bb.0: 771 ; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 772 ; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33] 773 ; GENERIC-NEXT: monitor # sched: [100:0.33] 774 ; GENERIC-NEXT: retq # sched: [1:1.00] 775 ; 776 ; ATOM-LABEL: test_monitor: 777 ; ATOM: # %bb.0: 778 ; ATOM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] 779 ; ATOM-NEXT: movl %esi, %ecx # sched: [1:0.50] 780 ; ATOM-NEXT: monitor # sched: [45:22.50] 781 ; ATOM-NEXT: retq # sched: [79:39.50] 782 ; 783 ; SLM-LABEL: test_monitor: 784 ; SLM: # %bb.0: 785 ; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00] 786 ; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50] 787 ; SLM-NEXT: monitor # sched: [100:1.00] 788 ; SLM-NEXT: retq # sched: [4:1.00] 789 ; 790 ; SANDY-SSE-LABEL: test_monitor: 791 ; SANDY-SSE: # %bb.0: 792 ; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 793 ; SANDY-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33] 794 ; SANDY-SSE-NEXT: monitor # sched: [100:0.33] 795 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 796 ; 797 ; SANDY-LABEL: test_monitor: 798 ; SANDY: # %bb.0: 799 ; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 800 ; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33] 801 ; SANDY-NEXT: monitor # sched: [100:0.33] 802 ; SANDY-NEXT: retq # sched: [1:1.00] 803 ; 804 ; HASWELL-SSE-LABEL: test_monitor: 805 ; HASWELL-SSE: # %bb.0: 806 ; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 807 ; HASWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 808 ; HASWELL-SSE-NEXT: monitor # sched: [100:0.25] 809 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 810 ; 811 ; HASWELL-LABEL: test_monitor: 812 ; HASWELL: # %bb.0: 813 ; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 814 ; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] 815 ; HASWELL-NEXT: monitor # sched: [100:0.25] 816 ; HASWELL-NEXT: retq # sched: [7:1.00] 817 ; 818 ; BROADWELL-SSE-LABEL: test_monitor: 819 ; BROADWELL-SSE: # %bb.0: 820 ; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 821 ; BROADWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 822 ; BROADWELL-SSE-NEXT: monitor # sched: [100:0.25] 823 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 824 ; 825 ; BROADWELL-LABEL: test_monitor: 826 ; BROADWELL: # %bb.0: 827 ; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 828 ; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25] 829 ; BROADWELL-NEXT: monitor # sched: [100:0.25] 830 ; BROADWELL-NEXT: retq # sched: [7:1.00] 831 ; 832 ; SKYLAKE-SSE-LABEL: test_monitor: 833 ; SKYLAKE-SSE: # %bb.0: 834 ; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 835 ; SKYLAKE-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 836 ; SKYLAKE-SSE-NEXT: monitor # sched: [100:0.25] 837 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 838 ; 839 ; SKYLAKE-LABEL: test_monitor: 840 ; SKYLAKE: # %bb.0: 841 ; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 842 ; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25] 843 ; SKYLAKE-NEXT: monitor # sched: [100:0.25] 844 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 845 ; 846 ; SKX-SSE-LABEL: test_monitor: 847 ; SKX-SSE: # %bb.0: 848 ; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 849 ; SKX-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 850 ; SKX-SSE-NEXT: monitor # sched: [100:0.25] 851 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 852 ; 853 ; SKX-LABEL: test_monitor: 854 ; SKX: # %bb.0: 855 ; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 856 ; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25] 857 ; SKX-NEXT: monitor # sched: [100:0.25] 858 ; SKX-NEXT: retq # sched: [7:1.00] 859 ; 860 ; BTVER2-SSE-LABEL: test_monitor: 861 ; BTVER2-SSE: # %bb.0: 862 ; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 863 ; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50] 864 ; BTVER2-SSE-NEXT: monitor # sched: [100:0.50] 865 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 866 ; 867 ; BTVER2-LABEL: test_monitor: 868 ; BTVER2: # %bb.0: 869 ; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] 870 ; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] 871 ; BTVER2-NEXT: monitor # sched: [100:0.50] 872 ; BTVER2-NEXT: retq # sched: [4:1.00] 873 ; 874 ; ZNVER1-SSE-LABEL: test_monitor: 875 ; ZNVER1-SSE: # %bb.0: 876 ; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25] 877 ; ZNVER1-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] 878 ; ZNVER1-SSE-NEXT: monitor # sched: [100:0.25] 879 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 880 ; 881 ; ZNVER1-LABEL: test_monitor: 882 ; ZNVER1: # %bb.0: 883 ; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] 884 ; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25] 885 ; ZNVER1-NEXT: monitor # sched: [100:0.25] 886 ; ZNVER1-NEXT: retq # sched: [1:0.50] 887 tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2) 888 ret void 889 } 890 declare void @llvm.x86.sse3.monitor(i8*, i32, i32) 891 892 define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { 893 ; GENERIC-LABEL: test_movddup: 894 ; GENERIC: # %bb.0: 895 ; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 896 ; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50] 897 ; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 898 ; GENERIC-NEXT: retq # sched: [1:1.00] 899 ; 900 ; ATOM-LABEL: test_movddup: 901 ; ATOM: # %bb.0: 902 ; ATOM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 903 ; ATOM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00] 904 ; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00] 905 ; ATOM-NEXT: retq # sched: [79:39.50] 906 ; 907 ; SLM-LABEL: test_movddup: 908 ; SLM: # %bb.0: 909 ; SLM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00] 910 ; SLM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 911 ; SLM-NEXT: subpd %xmm0, %xmm1 # sched: [3:1.00] 912 ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] 913 ; SLM-NEXT: retq # sched: [4:1.00] 914 ; 915 ; SANDY-SSE-LABEL: test_movddup: 916 ; SANDY-SSE: # %bb.0: 917 ; SANDY-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 918 ; SANDY-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50] 919 ; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 920 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 921 ; 922 ; SANDY-LABEL: test_movddup: 923 ; SANDY: # %bb.0: 924 ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 925 ; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] 926 ; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 927 ; SANDY-NEXT: retq # sched: [1:1.00] 928 ; 929 ; HASWELL-SSE-LABEL: test_movddup: 930 ; HASWELL-SSE: # %bb.0: 931 ; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 932 ; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] 933 ; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 934 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 935 ; 936 ; HASWELL-LABEL: test_movddup: 937 ; HASWELL: # %bb.0: 938 ; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 939 ; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] 940 ; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 941 ; HASWELL-NEXT: retq # sched: [7:1.00] 942 ; 943 ; BROADWELL-SSE-LABEL: test_movddup: 944 ; BROADWELL-SSE: # %bb.0: 945 ; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 946 ; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] 947 ; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 948 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 949 ; 950 ; BROADWELL-LABEL: test_movddup: 951 ; BROADWELL: # %bb.0: 952 ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 953 ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] 954 ; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 955 ; BROADWELL-NEXT: retq # sched: [7:1.00] 956 ; 957 ; SKYLAKE-SSE-LABEL: test_movddup: 958 ; SKYLAKE-SSE: # %bb.0: 959 ; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 960 ; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] 961 ; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] 962 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 963 ; 964 ; SKYLAKE-LABEL: test_movddup: 965 ; SKYLAKE: # %bb.0: 966 ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 967 ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] 968 ; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 969 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 970 ; 971 ; SKX-SSE-LABEL: test_movddup: 972 ; SKX-SSE: # %bb.0: 973 ; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] 974 ; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] 975 ; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] 976 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 977 ; 978 ; SKX-LABEL: test_movddup: 979 ; SKX: # %bb.0: 980 ; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] 981 ; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] 982 ; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 983 ; SKX-NEXT: retq # sched: [7:1.00] 984 ; 985 ; BTVER2-SSE-LABEL: test_movddup: 986 ; BTVER2-SSE: # %bb.0: 987 ; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50] 988 ; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00] 989 ; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 990 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 991 ; 992 ; BTVER2-LABEL: test_movddup: 993 ; BTVER2: # %bb.0: 994 ; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00] 995 ; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50] 996 ; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 997 ; BTVER2-NEXT: retq # sched: [4:1.00] 998 ; 999 ; ZNVER1-SSE-LABEL: test_movddup: 1000 ; ZNVER1-SSE: # %bb.0: 1001 ; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50] 1002 ; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50] 1003 ; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 1004 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1005 ; 1006 ; ZNVER1-LABEL: test_movddup: 1007 ; ZNVER1: # %bb.0: 1008 ; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50] 1009 ; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50] 1010 ; ZNVER1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 1011 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1012 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer 1013 %2 = load <2 x double>, <2 x double> *%a1, align 16 1014 %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer 1015 %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl. 1016 ret <2 x double> %4 1017 } 1018 1019 define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { 1020 ; GENERIC-LABEL: test_movshdup: 1021 ; GENERIC: # %bb.0: 1022 ; GENERIC-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1023 ; GENERIC-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1024 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1025 ; GENERIC-NEXT: retq # sched: [1:1.00] 1026 ; 1027 ; ATOM-LABEL: test_movshdup: 1028 ; ATOM: # %bb.0: 1029 ; ATOM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1030 ; ATOM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00] 1031 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 1032 ; ATOM-NEXT: retq # sched: [79:39.50] 1033 ; 1034 ; SLM-LABEL: test_movshdup: 1035 ; SLM: # %bb.0: 1036 ; SLM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00] 1037 ; SLM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1038 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 1039 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 1040 ; SLM-NEXT: retq # sched: [4:1.00] 1041 ; 1042 ; SANDY-SSE-LABEL: test_movshdup: 1043 ; SANDY-SSE: # %bb.0: 1044 ; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1045 ; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1046 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1047 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1048 ; 1049 ; SANDY-LABEL: test_movshdup: 1050 ; SANDY: # %bb.0: 1051 ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1052 ; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] 1053 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1054 ; SANDY-NEXT: retq # sched: [1:1.00] 1055 ; 1056 ; HASWELL-SSE-LABEL: test_movshdup: 1057 ; HASWELL-SSE: # %bb.0: 1058 ; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1059 ; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1060 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1061 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1062 ; 1063 ; HASWELL-LABEL: test_movshdup: 1064 ; HASWELL: # %bb.0: 1065 ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1066 ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] 1067 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1068 ; HASWELL-NEXT: retq # sched: [7:1.00] 1069 ; 1070 ; BROADWELL-SSE-LABEL: test_movshdup: 1071 ; BROADWELL-SSE: # %bb.0: 1072 ; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1073 ; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50] 1074 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1075 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1076 ; 1077 ; BROADWELL-LABEL: test_movshdup: 1078 ; BROADWELL: # %bb.0: 1079 ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1080 ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50] 1081 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1082 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1083 ; 1084 ; SKYLAKE-SSE-LABEL: test_movshdup: 1085 ; SKYLAKE-SSE: # %bb.0: 1086 ; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1087 ; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1088 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1089 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1090 ; 1091 ; SKYLAKE-LABEL: test_movshdup: 1092 ; SKYLAKE: # %bb.0: 1093 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1094 ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] 1095 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1096 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1097 ; 1098 ; SKX-SSE-LABEL: test_movshdup: 1099 ; SKX-SSE: # %bb.0: 1100 ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] 1101 ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] 1102 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1103 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1104 ; 1105 ; SKX-LABEL: test_movshdup: 1106 ; SKX: # %bb.0: 1107 ; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] 1108 ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] 1109 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1110 ; SKX-NEXT: retq # sched: [7:1.00] 1111 ; 1112 ; BTVER2-SSE-LABEL: test_movshdup: 1113 ; BTVER2-SSE: # %bb.0: 1114 ; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50] 1115 ; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00] 1116 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1117 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1118 ; 1119 ; BTVER2-LABEL: test_movshdup: 1120 ; BTVER2: # %bb.0: 1121 ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00] 1122 ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50] 1123 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1124 ; BTVER2-NEXT: retq # sched: [4:1.00] 1125 ; 1126 ; ZNVER1-SSE-LABEL: test_movshdup: 1127 ; ZNVER1-SSE: # %bb.0: 1128 ; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50] 1129 ; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50] 1130 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1131 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1132 ; 1133 ; ZNVER1-LABEL: test_movshdup: 1134 ; ZNVER1: # %bb.0: 1135 ; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50] 1136 ; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50] 1137 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1138 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1139 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 1140 %2 = load <4 x float>, <4 x float> *%a1, align 16 1141 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 1142 %4 = fadd <4 x float> %1, %3 1143 ret <4 x float> %4 1144 } 1145 1146 define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { 1147 ; GENERIC-LABEL: test_movsldup: 1148 ; GENERIC: # %bb.0: 1149 ; GENERIC-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1150 ; GENERIC-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1151 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1152 ; GENERIC-NEXT: retq # sched: [1:1.00] 1153 ; 1154 ; ATOM-LABEL: test_movsldup: 1155 ; ATOM: # %bb.0: 1156 ; ATOM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1157 ; ATOM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00] 1158 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 1159 ; ATOM-NEXT: retq # sched: [79:39.50] 1160 ; 1161 ; SLM-LABEL: test_movsldup: 1162 ; SLM: # %bb.0: 1163 ; SLM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00] 1164 ; SLM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1165 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 1166 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 1167 ; SLM-NEXT: retq # sched: [4:1.00] 1168 ; 1169 ; SANDY-SSE-LABEL: test_movsldup: 1170 ; SANDY-SSE: # %bb.0: 1171 ; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1172 ; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1173 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1174 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1175 ; 1176 ; SANDY-LABEL: test_movsldup: 1177 ; SANDY: # %bb.0: 1178 ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1179 ; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] 1180 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1181 ; SANDY-NEXT: retq # sched: [1:1.00] 1182 ; 1183 ; HASWELL-SSE-LABEL: test_movsldup: 1184 ; HASWELL-SSE: # %bb.0: 1185 ; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1186 ; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1187 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1188 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1189 ; 1190 ; HASWELL-LABEL: test_movsldup: 1191 ; HASWELL: # %bb.0: 1192 ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1193 ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] 1194 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1195 ; HASWELL-NEXT: retq # sched: [7:1.00] 1196 ; 1197 ; BROADWELL-SSE-LABEL: test_movsldup: 1198 ; BROADWELL-SSE: # %bb.0: 1199 ; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1200 ; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50] 1201 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1202 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1203 ; 1204 ; BROADWELL-LABEL: test_movsldup: 1205 ; BROADWELL: # %bb.0: 1206 ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1207 ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50] 1208 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1209 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1210 ; 1211 ; SKYLAKE-SSE-LABEL: test_movsldup: 1212 ; SKYLAKE-SSE: # %bb.0: 1213 ; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1214 ; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1215 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1216 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1217 ; 1218 ; SKYLAKE-LABEL: test_movsldup: 1219 ; SKYLAKE: # %bb.0: 1220 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1221 ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] 1222 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1223 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1224 ; 1225 ; SKX-SSE-LABEL: test_movsldup: 1226 ; SKX-SSE: # %bb.0: 1227 ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] 1228 ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] 1229 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1230 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1231 ; 1232 ; SKX-LABEL: test_movsldup: 1233 ; SKX: # %bb.0: 1234 ; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] 1235 ; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] 1236 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1237 ; SKX-NEXT: retq # sched: [7:1.00] 1238 ; 1239 ; BTVER2-SSE-LABEL: test_movsldup: 1240 ; BTVER2-SSE: # %bb.0: 1241 ; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50] 1242 ; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00] 1243 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1244 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1245 ; 1246 ; BTVER2-LABEL: test_movsldup: 1247 ; BTVER2: # %bb.0: 1248 ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00] 1249 ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50] 1250 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1251 ; BTVER2-NEXT: retq # sched: [4:1.00] 1252 ; 1253 ; ZNVER1-SSE-LABEL: test_movsldup: 1254 ; ZNVER1-SSE: # %bb.0: 1255 ; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25] 1256 ; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25] 1257 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1258 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1259 ; 1260 ; ZNVER1-LABEL: test_movsldup: 1261 ; ZNVER1: # %bb.0: 1262 ; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50] 1263 ; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50] 1264 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1265 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1266 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1267 %2 = load <4 x float>, <4 x float> *%a1, align 16 1268 %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 1269 %4 = fadd <4 x float> %1, %3 1270 ret <4 x float> %4 1271 } 1272 1273 define void @test_mwait(i32 %a0, i32 %a1) { 1274 ; GENERIC-LABEL: test_mwait: 1275 ; GENERIC: # %bb.0: 1276 ; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33] 1277 ; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33] 1278 ; GENERIC-NEXT: mwait # sched: [100:0.33] 1279 ; GENERIC-NEXT: retq # sched: [1:1.00] 1280 ; 1281 ; ATOM-LABEL: test_mwait: 1282 ; ATOM: # %bb.0: 1283 ; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50] 1284 ; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50] 1285 ; ATOM-NEXT: mwait # sched: [46:23.00] 1286 ; ATOM-NEXT: retq # sched: [79:39.50] 1287 ; 1288 ; SLM-LABEL: test_mwait: 1289 ; SLM: # %bb.0: 1290 ; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50] 1291 ; SLM-NEXT: movl %esi, %eax # sched: [1:0.50] 1292 ; SLM-NEXT: mwait # sched: [100:1.00] 1293 ; SLM-NEXT: retq # sched: [4:1.00] 1294 ; 1295 ; SANDY-SSE-LABEL: test_mwait: 1296 ; SANDY-SSE: # %bb.0: 1297 ; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33] 1298 ; SANDY-SSE-NEXT: movl %esi, %eax # sched: [1:0.33] 1299 ; SANDY-SSE-NEXT: mwait # sched: [100:0.33] 1300 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1301 ; 1302 ; SANDY-LABEL: test_mwait: 1303 ; SANDY: # %bb.0: 1304 ; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] 1305 ; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33] 1306 ; SANDY-NEXT: mwait # sched: [100:0.33] 1307 ; SANDY-NEXT: retq # sched: [1:1.00] 1308 ; 1309 ; HASWELL-SSE-LABEL: test_mwait: 1310 ; HASWELL-SSE: # %bb.0: 1311 ; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1312 ; HASWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1313 ; HASWELL-SSE-NEXT: mwait # sched: [20:2.50] 1314 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1315 ; 1316 ; HASWELL-LABEL: test_mwait: 1317 ; HASWELL: # %bb.0: 1318 ; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] 1319 ; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25] 1320 ; HASWELL-NEXT: mwait # sched: [20:2.50] 1321 ; HASWELL-NEXT: retq # sched: [7:1.00] 1322 ; 1323 ; BROADWELL-SSE-LABEL: test_mwait: 1324 ; BROADWELL-SSE: # %bb.0: 1325 ; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1326 ; BROADWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1327 ; BROADWELL-SSE-NEXT: mwait # sched: [100:0.25] 1328 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1329 ; 1330 ; BROADWELL-LABEL: test_mwait: 1331 ; BROADWELL: # %bb.0: 1332 ; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] 1333 ; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25] 1334 ; BROADWELL-NEXT: mwait # sched: [100:0.25] 1335 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1336 ; 1337 ; SKYLAKE-SSE-LABEL: test_mwait: 1338 ; SKYLAKE-SSE: # %bb.0: 1339 ; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1340 ; SKYLAKE-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1341 ; SKYLAKE-SSE-NEXT: mwait # sched: [20:2.50] 1342 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1343 ; 1344 ; SKYLAKE-LABEL: test_mwait: 1345 ; SKYLAKE: # %bb.0: 1346 ; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1347 ; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25] 1348 ; SKYLAKE-NEXT: mwait # sched: [20:2.50] 1349 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1350 ; 1351 ; SKX-SSE-LABEL: test_mwait: 1352 ; SKX-SSE: # %bb.0: 1353 ; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1354 ; SKX-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1355 ; SKX-SSE-NEXT: mwait # sched: [20:2.50] 1356 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1357 ; 1358 ; SKX-LABEL: test_mwait: 1359 ; SKX: # %bb.0: 1360 ; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] 1361 ; SKX-NEXT: movl %esi, %eax # sched: [1:0.25] 1362 ; SKX-NEXT: mwait # sched: [20:2.50] 1363 ; SKX-NEXT: retq # sched: [7:1.00] 1364 ; 1365 ; BTVER2-SSE-LABEL: test_mwait: 1366 ; BTVER2-SSE: # %bb.0: 1367 ; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50] 1368 ; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50] 1369 ; BTVER2-SSE-NEXT: mwait # sched: [100:0.50] 1370 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1371 ; 1372 ; BTVER2-LABEL: test_mwait: 1373 ; BTVER2: # %bb.0: 1374 ; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] 1375 ; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.50] 1376 ; BTVER2-NEXT: mwait # sched: [100:0.50] 1377 ; BTVER2-NEXT: retq # sched: [4:1.00] 1378 ; 1379 ; ZNVER1-SSE-LABEL: test_mwait: 1380 ; ZNVER1-SSE: # %bb.0: 1381 ; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] 1382 ; ZNVER1-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] 1383 ; ZNVER1-SSE-NEXT: mwait # sched: [100:0.25] 1384 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1385 ; 1386 ; ZNVER1-LABEL: test_mwait: 1387 ; ZNVER1: # %bb.0: 1388 ; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] 1389 ; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25] 1390 ; ZNVER1-NEXT: mwait # sched: [100:0.25] 1391 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1392 tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1) 1393 ret void 1394 } 1395 declare void @llvm.x86.sse3.mwait(i32, i32) 1396