1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE 10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL 11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE 12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL 13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE 14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE 15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE 16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX 17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE 18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE 20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 21 22 ; FIXME: we should really use -mattr=-sse2 here but some of the comparison tests don't work without access to legal <4 x i32> types. 23 24 define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 25 ; GENERIC-LABEL: test_addps: 26 ; GENERIC: # %bb.0: 27 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 28 ; GENERIC-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] 29 ; GENERIC-NEXT: retq # sched: [1:1.00] 30 ; 31 ; ATOM-LABEL: test_addps: 32 ; ATOM: # %bb.0: 33 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 34 ; ATOM-NEXT: addps (%rdi), %xmm0 # sched: [5:5.00] 35 ; ATOM-NEXT: retq # sched: [79:39.50] 36 ; 37 ; SLM-LABEL: test_addps: 38 ; SLM: # %bb.0: 39 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 40 ; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00] 41 ; SLM-NEXT: retq # sched: [4:1.00] 42 ; 43 ; SANDY-SSE-LABEL: test_addps: 44 ; SANDY-SSE: # %bb.0: 45 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 46 ; SANDY-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] 47 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 48 ; 49 ; SANDY-LABEL: test_addps: 50 ; SANDY: # %bb.0: 51 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 52 ; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 53 ; SANDY-NEXT: retq # sched: [1:1.00] 54 ; 55 ; HASWELL-SSE-LABEL: test_addps: 56 ; HASWELL-SSE: # %bb.0: 57 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 58 ; HASWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] 59 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 60 ; 61 ; HASWELL-LABEL: test_addps: 62 ; HASWELL: # %bb.0: 63 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 64 ; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 65 ; HASWELL-NEXT: retq # sched: [7:1.00] 66 ; 67 ; BROADWELL-SSE-LABEL: test_addps: 68 ; BROADWELL-SSE: # %bb.0: 69 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 70 ; BROADWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00] 71 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 72 ; 73 ; BROADWELL-LABEL: test_addps: 74 ; BROADWELL: # %bb.0: 75 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 76 ; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 77 ; BROADWELL-NEXT: retq # sched: [7:1.00] 78 ; 79 ; SKYLAKE-SSE-LABEL: test_addps: 80 ; SKYLAKE-SSE: # %bb.0: 81 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 82 ; SKYLAKE-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50] 83 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 84 ; 85 ; SKYLAKE-LABEL: test_addps: 86 ; SKYLAKE: # %bb.0: 87 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 88 ; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 89 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 90 ; 91 ; SKX-SSE-LABEL: test_addps: 92 ; SKX-SSE: # %bb.0: 93 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 94 ; SKX-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50] 95 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 96 ; 97 ; SKX-LABEL: test_addps: 98 ; SKX: # %bb.0: 99 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 100 ; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 101 ; SKX-NEXT: retq # sched: [7:1.00] 102 ; 103 ; BTVER2-SSE-LABEL: test_addps: 104 ; BTVER2-SSE: # %bb.0: 105 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 106 ; BTVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00] 107 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 108 ; 109 ; BTVER2-LABEL: test_addps: 110 ; BTVER2: # %bb.0: 111 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 112 ; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 113 ; BTVER2-NEXT: retq # sched: [4:1.00] 114 ; 115 ; ZNVER1-SSE-LABEL: test_addps: 116 ; ZNVER1-SSE: # %bb.0: 117 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 118 ; ZNVER1-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:1.00] 119 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 120 ; 121 ; ZNVER1-LABEL: test_addps: 122 ; ZNVER1: # %bb.0: 123 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 124 ; ZNVER1-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 125 ; ZNVER1-NEXT: retq # sched: [1:0.50] 126 %1 = fadd <4 x float> %a0, %a1 127 %2 = load <4 x float>, <4 x float> *%a2, align 16 128 %3 = fadd <4 x float> %1, %2 129 ret <4 x float> %3 130 } 131 132 define float @test_addss(float %a0, float %a1, float *%a2) { 133 ; GENERIC-LABEL: test_addss: 134 ; GENERIC: # %bb.0: 135 ; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 136 ; GENERIC-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00] 137 ; GENERIC-NEXT: retq # sched: [1:1.00] 138 ; 139 ; ATOM-LABEL: test_addss: 140 ; ATOM: # %bb.0: 141 ; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00] 142 ; ATOM-NEXT: addss (%rdi), %xmm0 # sched: [5:5.00] 143 ; ATOM-NEXT: retq # sched: [79:39.50] 144 ; 145 ; SLM-LABEL: test_addss: 146 ; SLM: # %bb.0: 147 ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 148 ; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00] 149 ; SLM-NEXT: retq # sched: [4:1.00] 150 ; 151 ; SANDY-SSE-LABEL: test_addss: 152 ; SANDY-SSE: # %bb.0: 153 ; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 154 ; SANDY-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00] 155 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 156 ; 157 ; SANDY-LABEL: test_addss: 158 ; SANDY: # %bb.0: 159 ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 160 ; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 161 ; SANDY-NEXT: retq # sched: [1:1.00] 162 ; 163 ; HASWELL-SSE-LABEL: test_addss: 164 ; HASWELL-SSE: # %bb.0: 165 ; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 166 ; HASWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00] 167 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 168 ; 169 ; HASWELL-LABEL: test_addss: 170 ; HASWELL: # %bb.0: 171 ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 172 ; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 173 ; HASWELL-NEXT: retq # sched: [7:1.00] 174 ; 175 ; BROADWELL-SSE-LABEL: test_addss: 176 ; BROADWELL-SSE: # %bb.0: 177 ; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 178 ; BROADWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00] 179 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 180 ; 181 ; BROADWELL-LABEL: test_addss: 182 ; BROADWELL: # %bb.0: 183 ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 184 ; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 185 ; BROADWELL-NEXT: retq # sched: [7:1.00] 186 ; 187 ; SKYLAKE-SSE-LABEL: test_addss: 188 ; SKYLAKE-SSE: # %bb.0: 189 ; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] 190 ; SKYLAKE-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50] 191 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 192 ; 193 ; SKYLAKE-LABEL: test_addss: 194 ; SKYLAKE: # %bb.0: 195 ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 196 ; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 197 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 198 ; 199 ; SKX-SSE-LABEL: test_addss: 200 ; SKX-SSE: # %bb.0: 201 ; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] 202 ; SKX-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50] 203 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 204 ; 205 ; SKX-LABEL: test_addss: 206 ; SKX: # %bb.0: 207 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 208 ; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 209 ; SKX-NEXT: retq # sched: [7:1.00] 210 ; 211 ; BTVER2-SSE-LABEL: test_addss: 212 ; BTVER2-SSE: # %bb.0: 213 ; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 214 ; BTVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00] 215 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 216 ; 217 ; BTVER2-LABEL: test_addss: 218 ; BTVER2: # %bb.0: 219 ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 220 ; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 221 ; BTVER2-NEXT: retq # sched: [4:1.00] 222 ; 223 ; ZNVER1-SSE-LABEL: test_addss: 224 ; ZNVER1-SSE: # %bb.0: 225 ; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 226 ; ZNVER1-SSE-NEXT: addss (%rdi), %xmm0 # sched: [10:1.00] 227 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 228 ; 229 ; ZNVER1-LABEL: test_addss: 230 ; ZNVER1: # %bb.0: 231 ; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 232 ; ZNVER1-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 233 ; ZNVER1-NEXT: retq # sched: [1:0.50] 234 %1 = fadd float %a0, %a1 235 %2 = load float, float *%a2, align 4 236 %3 = fadd float %1, %2 237 ret float %3 238 } 239 240 define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 241 ; GENERIC-LABEL: test_andps: 242 ; GENERIC: # %bb.0: 243 ; GENERIC-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] 244 ; GENERIC-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] 245 ; GENERIC-NEXT: retq # sched: [1:1.00] 246 ; 247 ; ATOM-LABEL: test_andps: 248 ; ATOM: # %bb.0: 249 ; ATOM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50] 250 ; ATOM-NEXT: andps (%rdi), %xmm0 # sched: [1:1.00] 251 ; ATOM-NEXT: nop # sched: [1:0.50] 252 ; ATOM-NEXT: nop # sched: [1:0.50] 253 ; ATOM-NEXT: nop # sched: [1:0.50] 254 ; ATOM-NEXT: nop # sched: [1:0.50] 255 ; ATOM-NEXT: retq # sched: [79:39.50] 256 ; 257 ; SLM-LABEL: test_andps: 258 ; SLM: # %bb.0: 259 ; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50] 260 ; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00] 261 ; SLM-NEXT: retq # sched: [4:1.00] 262 ; 263 ; SANDY-SSE-LABEL: test_andps: 264 ; SANDY-SSE: # %bb.0: 265 ; SANDY-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] 266 ; SANDY-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] 267 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 268 ; 269 ; SANDY-LABEL: test_andps: 270 ; SANDY: # %bb.0: 271 ; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 272 ; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 273 ; SANDY-NEXT: retq # sched: [1:1.00] 274 ; 275 ; HASWELL-SSE-LABEL: test_andps: 276 ; HASWELL-SSE: # %bb.0: 277 ; HASWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] 278 ; HASWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] 279 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 280 ; 281 ; HASWELL-LABEL: test_andps: 282 ; HASWELL: # %bb.0: 283 ; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 284 ; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 285 ; HASWELL-NEXT: retq # sched: [7:1.00] 286 ; 287 ; BROADWELL-SSE-LABEL: test_andps: 288 ; BROADWELL-SSE: # %bb.0: 289 ; BROADWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] 290 ; BROADWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00] 291 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 292 ; 293 ; BROADWELL-LABEL: test_andps: 294 ; BROADWELL: # %bb.0: 295 ; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 296 ; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 297 ; BROADWELL-NEXT: retq # sched: [7:1.00] 298 ; 299 ; SKYLAKE-SSE-LABEL: test_andps: 300 ; SKYLAKE-SSE: # %bb.0: 301 ; SKYLAKE-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33] 302 ; SKYLAKE-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50] 303 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 304 ; 305 ; SKYLAKE-LABEL: test_andps: 306 ; SKYLAKE: # %bb.0: 307 ; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 308 ; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 309 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 310 ; 311 ; SKX-SSE-LABEL: test_andps: 312 ; SKX-SSE: # %bb.0: 313 ; SKX-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33] 314 ; SKX-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50] 315 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 316 ; 317 ; SKX-LABEL: test_andps: 318 ; SKX: # %bb.0: 319 ; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 320 ; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 321 ; SKX-NEXT: retq # sched: [7:1.00] 322 ; 323 ; BTVER2-SSE-LABEL: test_andps: 324 ; BTVER2-SSE: # %bb.0: 325 ; BTVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50] 326 ; BTVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00] 327 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 328 ; 329 ; BTVER2-LABEL: test_andps: 330 ; BTVER2: # %bb.0: 331 ; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 332 ; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 333 ; BTVER2-NEXT: retq # sched: [4:1.00] 334 ; 335 ; ZNVER1-SSE-LABEL: test_andps: 336 ; ZNVER1-SSE: # %bb.0: 337 ; ZNVER1-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.25] 338 ; ZNVER1-SSE-NEXT: andps (%rdi), %xmm0 # sched: [8:0.50] 339 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 340 ; 341 ; ZNVER1-LABEL: test_andps: 342 ; ZNVER1: # %bb.0: 343 ; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 344 ; ZNVER1-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 345 ; ZNVER1-NEXT: retq # sched: [1:0.50] 346 %1 = bitcast <4 x float> %a0 to <4 x i32> 347 %2 = bitcast <4 x float> %a1 to <4 x i32> 348 %3 = and <4 x i32> %1, %2 349 %4 = load <4 x float>, <4 x float> *%a2, align 16 350 %5 = bitcast <4 x float> %4 to <4 x i32> 351 %6 = and <4 x i32> %3, %5 352 %7 = bitcast <4 x i32> %6 to <4 x float> 353 ret <4 x float> %7 354 } 355 356 define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 357 ; GENERIC-LABEL: test_andnotps: 358 ; GENERIC: # %bb.0: 359 ; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] 360 ; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] 361 ; GENERIC-NEXT: retq # sched: [1:1.00] 362 ; 363 ; ATOM-LABEL: test_andnotps: 364 ; ATOM: # %bb.0: 365 ; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] 366 ; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00] 367 ; ATOM-NEXT: nop # sched: [1:0.50] 368 ; ATOM-NEXT: nop # sched: [1:0.50] 369 ; ATOM-NEXT: nop # sched: [1:0.50] 370 ; ATOM-NEXT: nop # sched: [1:0.50] 371 ; ATOM-NEXT: retq # sched: [79:39.50] 372 ; 373 ; SLM-LABEL: test_andnotps: 374 ; SLM: # %bb.0: 375 ; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] 376 ; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00] 377 ; SLM-NEXT: retq # sched: [4:1.00] 378 ; 379 ; SANDY-SSE-LABEL: test_andnotps: 380 ; SANDY-SSE: # %bb.0: 381 ; SANDY-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] 382 ; SANDY-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] 383 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 384 ; 385 ; SANDY-LABEL: test_andnotps: 386 ; SANDY: # %bb.0: 387 ; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 388 ; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 389 ; SANDY-NEXT: retq # sched: [1:1.00] 390 ; 391 ; HASWELL-SSE-LABEL: test_andnotps: 392 ; HASWELL-SSE: # %bb.0: 393 ; HASWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] 394 ; HASWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] 395 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 396 ; 397 ; HASWELL-LABEL: test_andnotps: 398 ; HASWELL: # %bb.0: 399 ; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 400 ; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 401 ; HASWELL-NEXT: retq # sched: [7:1.00] 402 ; 403 ; BROADWELL-SSE-LABEL: test_andnotps: 404 ; BROADWELL-SSE: # %bb.0: 405 ; BROADWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] 406 ; BROADWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] 407 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 408 ; 409 ; BROADWELL-LABEL: test_andnotps: 410 ; BROADWELL: # %bb.0: 411 ; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 412 ; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 413 ; BROADWELL-NEXT: retq # sched: [7:1.00] 414 ; 415 ; SKYLAKE-SSE-LABEL: test_andnotps: 416 ; SKYLAKE-SSE: # %bb.0: 417 ; SKYLAKE-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] 418 ; SKYLAKE-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] 419 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 420 ; 421 ; SKYLAKE-LABEL: test_andnotps: 422 ; SKYLAKE: # %bb.0: 423 ; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 424 ; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 425 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 426 ; 427 ; SKX-SSE-LABEL: test_andnotps: 428 ; SKX-SSE: # %bb.0: 429 ; SKX-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] 430 ; SKX-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] 431 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 432 ; 433 ; SKX-LABEL: test_andnotps: 434 ; SKX: # %bb.0: 435 ; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 436 ; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 437 ; SKX-NEXT: retq # sched: [7:1.00] 438 ; 439 ; BTVER2-SSE-LABEL: test_andnotps: 440 ; BTVER2-SSE: # %bb.0: 441 ; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] 442 ; BTVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] 443 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 444 ; 445 ; BTVER2-LABEL: test_andnotps: 446 ; BTVER2: # %bb.0: 447 ; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 448 ; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 449 ; BTVER2-NEXT: retq # sched: [4:1.00] 450 ; 451 ; ZNVER1-SSE-LABEL: test_andnotps: 452 ; ZNVER1-SSE: # %bb.0: 453 ; ZNVER1-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.25] 454 ; ZNVER1-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [8:0.50] 455 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 456 ; 457 ; ZNVER1-LABEL: test_andnotps: 458 ; ZNVER1: # %bb.0: 459 ; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 460 ; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 461 ; ZNVER1-NEXT: retq # sched: [1:0.50] 462 %1 = bitcast <4 x float> %a0 to <4 x i32> 463 %2 = bitcast <4 x float> %a1 to <4 x i32> 464 %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1> 465 %4 = and <4 x i32> %3, %2 466 %5 = load <4 x float>, <4 x float> *%a2, align 16 467 %6 = bitcast <4 x float> %5 to <4 x i32> 468 %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1> 469 %8 = and <4 x i32> %6, %7 470 %9 = bitcast <4 x i32> %8 to <4 x float> 471 ret <4 x float> %9 472 } 473 474 define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 475 ; GENERIC-LABEL: test_cmpps: 476 ; GENERIC: # %bb.0: 477 ; GENERIC-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] 478 ; GENERIC-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] 479 ; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] 480 ; GENERIC-NEXT: retq # sched: [1:1.00] 481 ; 482 ; ATOM-LABEL: test_cmpps: 483 ; ATOM: # %bb.0: 484 ; ATOM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [5:5.00] 485 ; ATOM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [5:5.00] 486 ; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] 487 ; ATOM-NEXT: retq # sched: [79:39.50] 488 ; 489 ; SLM-LABEL: test_cmpps: 490 ; SLM: # %bb.0: 491 ; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] 492 ; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00] 493 ; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] 494 ; SLM-NEXT: retq # sched: [4:1.00] 495 ; 496 ; SANDY-SSE-LABEL: test_cmpps: 497 ; SANDY-SSE: # %bb.0: 498 ; SANDY-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] 499 ; SANDY-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] 500 ; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] 501 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 502 ; 503 ; SANDY-LABEL: test_cmpps: 504 ; SANDY: # %bb.0: 505 ; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 506 ; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 507 ; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] 508 ; SANDY-NEXT: retq # sched: [1:1.00] 509 ; 510 ; HASWELL-SSE-LABEL: test_cmpps: 511 ; HASWELL-SSE: # %bb.0: 512 ; HASWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] 513 ; HASWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] 514 ; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] 515 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 516 ; 517 ; HASWELL-LABEL: test_cmpps: 518 ; HASWELL: # %bb.0: 519 ; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 520 ; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 521 ; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] 522 ; HASWELL-NEXT: retq # sched: [7:1.00] 523 ; 524 ; BROADWELL-SSE-LABEL: test_cmpps: 525 ; BROADWELL-SSE: # %bb.0: 526 ; BROADWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] 527 ; BROADWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [8:1.00] 528 ; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] 529 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 530 ; 531 ; BROADWELL-LABEL: test_cmpps: 532 ; BROADWELL: # %bb.0: 533 ; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 534 ; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 535 ; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] 536 ; BROADWELL-NEXT: retq # sched: [7:1.00] 537 ; 538 ; SKYLAKE-SSE-LABEL: test_cmpps: 539 ; SKYLAKE-SSE: # %bb.0: 540 ; SKYLAKE-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50] 541 ; SKYLAKE-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50] 542 ; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] 543 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 544 ; 545 ; SKYLAKE-LABEL: test_cmpps: 546 ; SKYLAKE: # %bb.0: 547 ; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50] 548 ; SKYLAKE-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 549 ; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 550 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 551 ; 552 ; SKX-SSE-LABEL: test_cmpps: 553 ; SKX-SSE: # %bb.0: 554 ; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50] 555 ; SKX-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50] 556 ; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] 557 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 558 ; 559 ; SKX-LABEL: test_cmpps: 560 ; SKX: # %bb.0: 561 ; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50] 562 ; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 563 ; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 564 ; SKX-NEXT: retq # sched: [7:1.00] 565 ; 566 ; BTVER2-SSE-LABEL: test_cmpps: 567 ; BTVER2-SSE: # %bb.0: 568 ; BTVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00] 569 ; BTVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [7:1.00] 570 ; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] 571 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 572 ; 573 ; BTVER2-LABEL: test_cmpps: 574 ; BTVER2: # %bb.0: 575 ; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00] 576 ; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 577 ; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 578 ; BTVER2-NEXT: retq # sched: [4:1.00] 579 ; 580 ; ZNVER1-SSE-LABEL: test_cmpps: 581 ; ZNVER1-SSE: # %bb.0: 582 ; ZNVER1-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] 583 ; ZNVER1-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:1.00] 584 ; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25] 585 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 586 ; 587 ; ZNVER1-LABEL: test_cmpps: 588 ; ZNVER1: # %bb.0: 589 ; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 590 ; ZNVER1-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 591 ; ZNVER1-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.25] 592 ; ZNVER1-NEXT: retq # sched: [1:0.50] 593 %1 = fcmp oeq <4 x float> %a0, %a1 594 %2 = load <4 x float>, <4 x float> *%a2, align 16 595 %3 = fcmp oeq <4 x float> %a0, %2 596 %4 = sext <4 x i1> %1 to <4 x i32> 597 %5 = sext <4 x i1> %3 to <4 x i32> 598 %6 = or <4 x i32> %4, %5 599 %7 = bitcast <4 x i32> %6 to <4 x float> 600 ret <4 x float> %7 601 } 602 603 define float @test_cmpss(float %a0, float %a1, float *%a2) { 604 ; GENERIC-LABEL: test_cmpss: 605 ; GENERIC: # %bb.0: 606 ; GENERIC-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] 607 ; GENERIC-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00] 608 ; GENERIC-NEXT: retq # sched: [1:1.00] 609 ; 610 ; ATOM-LABEL: test_cmpss: 611 ; ATOM: # %bb.0: 612 ; ATOM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [5:5.00] 613 ; ATOM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [5:5.00] 614 ; ATOM-NEXT: retq # sched: [79:39.50] 615 ; 616 ; SLM-LABEL: test_cmpss: 617 ; SLM: # %bb.0: 618 ; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] 619 ; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00] 620 ; SLM-NEXT: retq # sched: [4:1.00] 621 ; 622 ; SANDY-SSE-LABEL: test_cmpss: 623 ; SANDY-SSE: # %bb.0: 624 ; SANDY-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] 625 ; SANDY-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00] 626 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 627 ; 628 ; SANDY-LABEL: test_cmpss: 629 ; SANDY: # %bb.0: 630 ; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 631 ; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 632 ; SANDY-NEXT: retq # sched: [1:1.00] 633 ; 634 ; HASWELL-SSE-LABEL: test_cmpss: 635 ; HASWELL-SSE: # %bb.0: 636 ; HASWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] 637 ; HASWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00] 638 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 639 ; 640 ; HASWELL-LABEL: test_cmpss: 641 ; HASWELL: # %bb.0: 642 ; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 643 ; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 644 ; HASWELL-NEXT: retq # sched: [7:1.00] 645 ; 646 ; BROADWELL-SSE-LABEL: test_cmpss: 647 ; BROADWELL-SSE: # %bb.0: 648 ; BROADWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] 649 ; BROADWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00] 650 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 651 ; 652 ; BROADWELL-LABEL: test_cmpss: 653 ; BROADWELL: # %bb.0: 654 ; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 655 ; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 656 ; BROADWELL-NEXT: retq # sched: [7:1.00] 657 ; 658 ; SKYLAKE-SSE-LABEL: test_cmpss: 659 ; SKYLAKE-SSE: # %bb.0: 660 ; SKYLAKE-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50] 661 ; SKYLAKE-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50] 662 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 663 ; 664 ; SKYLAKE-LABEL: test_cmpss: 665 ; SKYLAKE: # %bb.0: 666 ; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 667 ; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 668 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 669 ; 670 ; SKX-SSE-LABEL: test_cmpss: 671 ; SKX-SSE: # %bb.0: 672 ; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50] 673 ; SKX-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50] 674 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 675 ; 676 ; SKX-LABEL: test_cmpss: 677 ; SKX: # %bb.0: 678 ; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 679 ; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 680 ; SKX-NEXT: retq # sched: [7:1.00] 681 ; 682 ; BTVER2-SSE-LABEL: test_cmpss: 683 ; BTVER2-SSE: # %bb.0: 684 ; BTVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00] 685 ; BTVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00] 686 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 687 ; 688 ; BTVER2-LABEL: test_cmpss: 689 ; BTVER2: # %bb.0: 690 ; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 691 ; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 692 ; BTVER2-NEXT: retq # sched: [4:1.00] 693 ; 694 ; ZNVER1-SSE-LABEL: test_cmpss: 695 ; ZNVER1-SSE: # %bb.0: 696 ; ZNVER1-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] 697 ; ZNVER1-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [10:1.00] 698 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 699 ; 700 ; ZNVER1-LABEL: test_cmpss: 701 ; ZNVER1: # %bb.0: 702 ; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 703 ; ZNVER1-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 704 ; ZNVER1-NEXT: retq # sched: [1:0.50] 705 %1 = insertelement <4 x float> undef, float %a0, i32 0 706 %2 = insertelement <4 x float> undef, float %a1, i32 0 707 %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0) 708 %4 = load float, float *%a2, align 4 709 %5 = insertelement <4 x float> undef, float %4, i32 0 710 %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0) 711 %7 = extractelement <4 x float> %6, i32 0 712 ret float %7 713 } 714 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 715 716 define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 717 ; GENERIC-LABEL: test_comiss: 718 ; GENERIC: # %bb.0: 719 ; GENERIC-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] 720 ; GENERIC-NEXT: setnp %al # sched: [1:0.50] 721 ; GENERIC-NEXT: sete %cl # sched: [1:0.50] 722 ; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] 723 ; GENERIC-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] 724 ; GENERIC-NEXT: setnp %al # sched: [1:0.50] 725 ; GENERIC-NEXT: sete %dl # sched: [1:0.50] 726 ; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] 727 ; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] 728 ; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] 729 ; GENERIC-NEXT: retq # sched: [1:1.00] 730 ; 731 ; ATOM-LABEL: test_comiss: 732 ; ATOM: # %bb.0: 733 ; ATOM-NEXT: comiss %xmm1, %xmm0 # sched: [9:4.50] 734 ; ATOM-NEXT: setnp %al # sched: [1:0.50] 735 ; ATOM-NEXT: sete %cl # sched: [1:0.50] 736 ; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] 737 ; ATOM-NEXT: comiss (%rdi), %xmm0 # sched: [10:5.00] 738 ; ATOM-NEXT: setnp %al # sched: [1:0.50] 739 ; ATOM-NEXT: sete %dl # sched: [1:0.50] 740 ; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] 741 ; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] 742 ; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] 743 ; ATOM-NEXT: retq # sched: [79:39.50] 744 ; 745 ; SLM-LABEL: test_comiss: 746 ; SLM: # %bb.0: 747 ; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] 748 ; SLM-NEXT: setnp %al # sched: [1:0.50] 749 ; SLM-NEXT: sete %cl # sched: [1:0.50] 750 ; SLM-NEXT: andb %al, %cl # sched: [1:0.50] 751 ; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00] 752 ; SLM-NEXT: setnp %al # sched: [1:0.50] 753 ; SLM-NEXT: sete %dl # sched: [1:0.50] 754 ; SLM-NEXT: andb %al, %dl # sched: [1:0.50] 755 ; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] 756 ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] 757 ; SLM-NEXT: retq # sched: [4:1.00] 758 ; 759 ; SANDY-SSE-LABEL: test_comiss: 760 ; SANDY-SSE: # %bb.0: 761 ; SANDY-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] 762 ; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] 763 ; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] 764 ; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] 765 ; SANDY-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] 766 ; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] 767 ; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] 768 ; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] 769 ; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] 770 ; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] 771 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 772 ; 773 ; SANDY-LABEL: test_comiss: 774 ; SANDY: # %bb.0: 775 ; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] 776 ; SANDY-NEXT: setnp %al # sched: [1:0.50] 777 ; SANDY-NEXT: sete %cl # sched: [1:0.50] 778 ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] 779 ; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] 780 ; SANDY-NEXT: setnp %al # sched: [1:0.50] 781 ; SANDY-NEXT: sete %dl # sched: [1:0.50] 782 ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] 783 ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] 784 ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] 785 ; SANDY-NEXT: retq # sched: [1:1.00] 786 ; 787 ; HASWELL-SSE-LABEL: test_comiss: 788 ; HASWELL-SSE: # %bb.0: 789 ; HASWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] 790 ; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 791 ; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] 792 ; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 793 ; HASWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] 794 ; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 795 ; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] 796 ; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 797 ; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 798 ; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 799 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 800 ; 801 ; HASWELL-LABEL: test_comiss: 802 ; HASWELL: # %bb.0: 803 ; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] 804 ; HASWELL-NEXT: setnp %al # sched: [1:0.50] 805 ; HASWELL-NEXT: sete %cl # sched: [1:0.50] 806 ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] 807 ; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] 808 ; HASWELL-NEXT: setnp %al # sched: [1:0.50] 809 ; HASWELL-NEXT: sete %dl # sched: [1:0.50] 810 ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] 811 ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] 812 ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] 813 ; HASWELL-NEXT: retq # sched: [7:1.00] 814 ; 815 ; BROADWELL-SSE-LABEL: test_comiss: 816 ; BROADWELL-SSE: # %bb.0: 817 ; BROADWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] 818 ; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 819 ; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] 820 ; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 821 ; BROADWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] 822 ; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 823 ; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] 824 ; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 825 ; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 826 ; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 827 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 828 ; 829 ; BROADWELL-LABEL: test_comiss: 830 ; BROADWELL: # %bb.0: 831 ; BROADWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] 832 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] 833 ; BROADWELL-NEXT: sete %cl # sched: [1:0.50] 834 ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] 835 ; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] 836 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] 837 ; BROADWELL-NEXT: sete %dl # sched: [1:0.50] 838 ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] 839 ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] 840 ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] 841 ; BROADWELL-NEXT: retq # sched: [7:1.00] 842 ; 843 ; SKYLAKE-SSE-LABEL: test_comiss: 844 ; SKYLAKE-SSE: # %bb.0: 845 ; SKYLAKE-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] 846 ; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] 847 ; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] 848 ; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 849 ; SKYLAKE-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00] 850 ; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] 851 ; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] 852 ; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 853 ; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 854 ; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 855 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 856 ; 857 ; SKYLAKE-LABEL: test_comiss: 858 ; SKYLAKE: # %bb.0: 859 ; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] 860 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] 861 ; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] 862 ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] 863 ; SKYLAKE-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] 864 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] 865 ; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] 866 ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] 867 ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] 868 ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 869 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 870 ; 871 ; SKX-SSE-LABEL: test_comiss: 872 ; SKX-SSE: # %bb.0: 873 ; SKX-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] 874 ; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] 875 ; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] 876 ; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 877 ; SKX-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00] 878 ; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] 879 ; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] 880 ; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 881 ; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 882 ; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 883 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 884 ; 885 ; SKX-LABEL: test_comiss: 886 ; SKX: # %bb.0: 887 ; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] 888 ; SKX-NEXT: setnp %al # sched: [1:0.50] 889 ; SKX-NEXT: sete %cl # sched: [1:0.50] 890 ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] 891 ; SKX-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] 892 ; SKX-NEXT: setnp %al # sched: [1:0.50] 893 ; SKX-NEXT: sete %dl # sched: [1:0.50] 894 ; SKX-NEXT: andb %al, %dl # sched: [1:0.25] 895 ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] 896 ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] 897 ; SKX-NEXT: retq # sched: [7:1.00] 898 ; 899 ; BTVER2-SSE-LABEL: test_comiss: 900 ; BTVER2-SSE: # %bb.0: 901 ; BTVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] 902 ; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] 903 ; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] 904 ; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] 905 ; BTVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] 906 ; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] 907 ; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] 908 ; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] 909 ; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] 910 ; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] 911 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 912 ; 913 ; BTVER2-LABEL: test_comiss: 914 ; BTVER2: # %bb.0: 915 ; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] 916 ; BTVER2-NEXT: setnp %al # sched: [1:0.50] 917 ; BTVER2-NEXT: sete %cl # sched: [1:0.50] 918 ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] 919 ; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00] 920 ; BTVER2-NEXT: setnp %al # sched: [1:0.50] 921 ; BTVER2-NEXT: sete %dl # sched: [1:0.50] 922 ; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] 923 ; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] 924 ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] 925 ; BTVER2-NEXT: retq # sched: [4:1.00] 926 ; 927 ; ZNVER1-SSE-LABEL: test_comiss: 928 ; ZNVER1-SSE: # %bb.0: 929 ; ZNVER1-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] 930 ; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] 931 ; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] 932 ; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 933 ; ZNVER1-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [10:1.00] 934 ; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] 935 ; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] 936 ; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 937 ; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 938 ; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 939 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 940 ; 941 ; ZNVER1-LABEL: test_comiss: 942 ; ZNVER1: # %bb.0: 943 ; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] 944 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25] 945 ; ZNVER1-NEXT: sete %cl # sched: [1:0.25] 946 ; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] 947 ; ZNVER1-NEXT: vcomiss (%rdi), %xmm0 # sched: [10:1.00] 948 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25] 949 ; ZNVER1-NEXT: sete %dl # sched: [1:0.25] 950 ; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] 951 ; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] 952 ; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] 953 ; ZNVER1-NEXT: retq # sched: [1:0.50] 954 %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 955 %2 = load <4 x float>, <4 x float> *%a2, align 4 956 %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2) 957 %4 = or i32 %1, %3 958 ret i32 %4 959 } 960 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 961 962 define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { 963 ; GENERIC-LABEL: test_cvtsi2ss: 964 ; GENERIC: # %bb.0: 965 ; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00] 966 ; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00] 967 ; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 968 ; GENERIC-NEXT: retq # sched: [1:1.00] 969 ; 970 ; ATOM-LABEL: test_cvtsi2ss: 971 ; ATOM: # %bb.0: 972 ; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:3.50] 973 ; ATOM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [6:3.00] 974 ; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00] 975 ; ATOM-NEXT: retq # sched: [79:39.50] 976 ; 977 ; SLM-LABEL: test_cvtsi2ss: 978 ; SLM: # %bb.0: 979 ; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00] 980 ; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50] 981 ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 982 ; SLM-NEXT: retq # sched: [4:1.00] 983 ; 984 ; SANDY-SSE-LABEL: test_cvtsi2ss: 985 ; SANDY-SSE: # %bb.0: 986 ; SANDY-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00] 987 ; SANDY-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00] 988 ; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 989 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 990 ; 991 ; SANDY-LABEL: test_cvtsi2ss: 992 ; SANDY: # %bb.0: 993 ; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] 994 ; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00] 995 ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 996 ; SANDY-NEXT: retq # sched: [1:1.00] 997 ; 998 ; HASWELL-SSE-LABEL: test_cvtsi2ss: 999 ; HASWELL-SSE: # %bb.0: 1000 ; HASWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00] 1001 ; HASWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] 1002 ; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1003 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1004 ; 1005 ; HASWELL-LABEL: test_cvtsi2ss: 1006 ; HASWELL: # %bb.0: 1007 ; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] 1008 ; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 1009 ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1010 ; HASWELL-NEXT: retq # sched: [7:1.00] 1011 ; 1012 ; BROADWELL-SSE-LABEL: test_cvtsi2ss: 1013 ; BROADWELL-SSE: # %bb.0: 1014 ; BROADWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00] 1015 ; BROADWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] 1016 ; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1017 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1018 ; 1019 ; BROADWELL-LABEL: test_cvtsi2ss: 1020 ; BROADWELL: # %bb.0: 1021 ; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] 1022 ; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 1023 ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1024 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1025 ; 1026 ; SKYLAKE-SSE-LABEL: test_cvtsi2ss: 1027 ; SKYLAKE-SSE: # %bb.0: 1028 ; SKYLAKE-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] 1029 ; SKYLAKE-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] 1030 ; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] 1031 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1032 ; 1033 ; SKYLAKE-LABEL: test_cvtsi2ss: 1034 ; SKYLAKE: # %bb.0: 1035 ; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] 1036 ; SKYLAKE-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 1037 ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1038 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1039 ; 1040 ; SKX-SSE-LABEL: test_cvtsi2ss: 1041 ; SKX-SSE: # %bb.0: 1042 ; SKX-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] 1043 ; SKX-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] 1044 ; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] 1045 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1046 ; 1047 ; SKX-LABEL: test_cvtsi2ss: 1048 ; SKX: # %bb.0: 1049 ; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] 1050 ; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 1051 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1052 ; SKX-NEXT: retq # sched: [7:1.00] 1053 ; 1054 ; BTVER2-SSE-LABEL: test_cvtsi2ss: 1055 ; BTVER2-SSE: # %bb.0: 1056 ; BTVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [14:1.00] 1057 ; BTVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [9:1.00] 1058 ; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1059 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1060 ; 1061 ; BTVER2-LABEL: test_cvtsi2ss: 1062 ; BTVER2: # %bb.0: 1063 ; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [9:1.00] 1064 ; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [14:1.00] 1065 ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1066 ; BTVER2-NEXT: retq # sched: [4:1.00] 1067 ; 1068 ; ZNVER1-SSE-LABEL: test_cvtsi2ss: 1069 ; ZNVER1-SSE: # %bb.0: 1070 ; ZNVER1-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [12:1.00] 1071 ; ZNVER1-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] 1072 ; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1073 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1074 ; 1075 ; ZNVER1-LABEL: test_cvtsi2ss: 1076 ; ZNVER1: # %bb.0: 1077 ; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] 1078 ; ZNVER1-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [12:1.00] 1079 ; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1080 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1081 %1 = sitofp i32 %a0 to float 1082 %2 = load i32, i32 *%a1, align 4 1083 %3 = sitofp i32 %2 to float 1084 %4 = fadd float %1, %3 1085 ret float %4 1086 } 1087 1088 define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { 1089 ; GENERIC-LABEL: test_cvtsi2ssq: 1090 ; GENERIC: # %bb.0: 1091 ; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] 1092 ; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00] 1093 ; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1094 ; GENERIC-NEXT: retq # sched: [1:1.00] 1095 ; 1096 ; ATOM-LABEL: test_cvtsi2ssq: 1097 ; ATOM: # %bb.0: 1098 ; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:3.50] 1099 ; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:3.00] 1100 ; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00] 1101 ; ATOM-NEXT: retq # sched: [79:39.50] 1102 ; 1103 ; SLM-LABEL: test_cvtsi2ssq: 1104 ; SLM: # %bb.0: 1105 ; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00] 1106 ; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50] 1107 ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1108 ; SLM-NEXT: retq # sched: [4:1.00] 1109 ; 1110 ; SANDY-SSE-LABEL: test_cvtsi2ssq: 1111 ; SANDY-SSE: # %bb.0: 1112 ; SANDY-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] 1113 ; SANDY-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00] 1114 ; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1115 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1116 ; 1117 ; SANDY-LABEL: test_cvtsi2ssq: 1118 ; SANDY: # %bb.0: 1119 ; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] 1120 ; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00] 1121 ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1122 ; SANDY-NEXT: retq # sched: [1:1.00] 1123 ; 1124 ; HASWELL-SSE-LABEL: test_cvtsi2ssq: 1125 ; HASWELL-SSE: # %bb.0: 1126 ; HASWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] 1127 ; HASWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] 1128 ; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1129 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1130 ; 1131 ; HASWELL-LABEL: test_cvtsi2ssq: 1132 ; HASWELL: # %bb.0: 1133 ; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] 1134 ; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 1135 ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1136 ; HASWELL-NEXT: retq # sched: [7:1.00] 1137 ; 1138 ; BROADWELL-SSE-LABEL: test_cvtsi2ssq: 1139 ; BROADWELL-SSE: # %bb.0: 1140 ; BROADWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] 1141 ; BROADWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] 1142 ; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1143 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1144 ; 1145 ; BROADWELL-LABEL: test_cvtsi2ssq: 1146 ; BROADWELL: # %bb.0: 1147 ; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] 1148 ; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 1149 ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1150 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1151 ; 1152 ; SKYLAKE-SSE-LABEL: test_cvtsi2ssq: 1153 ; SKYLAKE-SSE: # %bb.0: 1154 ; SKYLAKE-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00] 1155 ; SKYLAKE-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] 1156 ; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] 1157 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1158 ; 1159 ; SKYLAKE-LABEL: test_cvtsi2ssq: 1160 ; SKYLAKE: # %bb.0: 1161 ; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] 1162 ; SKYLAKE-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 1163 ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1164 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1165 ; 1166 ; SKX-SSE-LABEL: test_cvtsi2ssq: 1167 ; SKX-SSE: # %bb.0: 1168 ; SKX-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00] 1169 ; SKX-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] 1170 ; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] 1171 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1172 ; 1173 ; SKX-LABEL: test_cvtsi2ssq: 1174 ; SKX: # %bb.0: 1175 ; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] 1176 ; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 1177 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1178 ; SKX-NEXT: retq # sched: [7:1.00] 1179 ; 1180 ; BTVER2-SSE-LABEL: test_cvtsi2ssq: 1181 ; BTVER2-SSE: # %bb.0: 1182 ; BTVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [14:1.00] 1183 ; BTVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [9:1.00] 1184 ; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1185 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1186 ; 1187 ; BTVER2-LABEL: test_cvtsi2ssq: 1188 ; BTVER2: # %bb.0: 1189 ; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [9:1.00] 1190 ; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [14:1.00] 1191 ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1192 ; BTVER2-NEXT: retq # sched: [4:1.00] 1193 ; 1194 ; ZNVER1-SSE-LABEL: test_cvtsi2ssq: 1195 ; ZNVER1-SSE: # %bb.0: 1196 ; ZNVER1-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [12:1.00] 1197 ; ZNVER1-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:1.00] 1198 ; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 1199 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1200 ; 1201 ; ZNVER1-LABEL: test_cvtsi2ssq: 1202 ; ZNVER1: # %bb.0: 1203 ; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00] 1204 ; ZNVER1-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [12:1.00] 1205 ; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1206 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1207 %1 = sitofp i64 %a0 to float 1208 %2 = load i64, i64 *%a1, align 8 1209 %3 = sitofp i64 %2 to float 1210 %4 = fadd float %1, %3 1211 ret float %4 1212 } 1213 1214 define i32 @test_cvtss2si(float %a0, float *%a1) { 1215 ; GENERIC-LABEL: test_cvtss2si: 1216 ; GENERIC: # %bb.0: 1217 ; GENERIC-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] 1218 ; GENERIC-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] 1219 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 1220 ; GENERIC-NEXT: retq # sched: [1:1.00] 1221 ; 1222 ; ATOM-LABEL: test_cvtss2si: 1223 ; ATOM: # %bb.0: 1224 ; ATOM-NEXT: cvtss2si (%rdi), %eax # sched: [9:4.50] 1225 ; ATOM-NEXT: cvtss2si %xmm0, %ecx # sched: [8:4.00] 1226 ; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] 1227 ; ATOM-NEXT: retq # sched: [79:39.50] 1228 ; 1229 ; SLM-LABEL: test_cvtss2si: 1230 ; SLM: # %bb.0: 1231 ; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00] 1232 ; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50] 1233 ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] 1234 ; SLM-NEXT: retq # sched: [4:1.00] 1235 ; 1236 ; SANDY-SSE-LABEL: test_cvtss2si: 1237 ; SANDY-SSE: # %bb.0: 1238 ; SANDY-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] 1239 ; SANDY-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] 1240 ; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] 1241 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1242 ; 1243 ; SANDY-LABEL: test_cvtss2si: 1244 ; SANDY: # %bb.0: 1245 ; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] 1246 ; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00] 1247 ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] 1248 ; SANDY-NEXT: retq # sched: [1:1.00] 1249 ; 1250 ; HASWELL-SSE-LABEL: test_cvtss2si: 1251 ; HASWELL-SSE: # %bb.0: 1252 ; HASWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00] 1253 ; HASWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] 1254 ; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1255 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1256 ; 1257 ; HASWELL-LABEL: test_cvtss2si: 1258 ; HASWELL: # %bb.0: 1259 ; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] 1260 ; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00] 1261 ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] 1262 ; HASWELL-NEXT: retq # sched: [7:1.00] 1263 ; 1264 ; BROADWELL-SSE-LABEL: test_cvtss2si: 1265 ; BROADWELL-SSE: # %bb.0: 1266 ; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] 1267 ; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00] 1268 ; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1269 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1270 ; 1271 ; BROADWELL-LABEL: test_cvtss2si: 1272 ; BROADWELL: # %bb.0: 1273 ; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00] 1274 ; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] 1275 ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] 1276 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1277 ; 1278 ; SKYLAKE-SSE-LABEL: test_cvtss2si: 1279 ; SKYLAKE-SSE: # %bb.0: 1280 ; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00] 1281 ; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00] 1282 ; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1283 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1284 ; 1285 ; SKYLAKE-LABEL: test_cvtss2si: 1286 ; SKYLAKE: # %bb.0: 1287 ; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] 1288 ; SKYLAKE-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00] 1289 ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1290 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1291 ; 1292 ; SKX-SSE-LABEL: test_cvtss2si: 1293 ; SKX-SSE: # %bb.0: 1294 ; SKX-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00] 1295 ; SKX-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00] 1296 ; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1297 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1298 ; 1299 ; SKX-LABEL: test_cvtss2si: 1300 ; SKX: # %bb.0: 1301 ; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] 1302 ; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00] 1303 ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] 1304 ; SKX-NEXT: retq # sched: [7:1.00] 1305 ; 1306 ; BTVER2-SSE-LABEL: test_cvtss2si: 1307 ; BTVER2-SSE: # %bb.0: 1308 ; BTVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00] 1309 ; BTVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [7:1.00] 1310 ; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] 1311 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1312 ; 1313 ; BTVER2-LABEL: test_cvtss2si: 1314 ; BTVER2: # %bb.0: 1315 ; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00] 1316 ; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [7:1.00] 1317 ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] 1318 ; BTVER2-NEXT: retq # sched: [4:1.00] 1319 ; 1320 ; ZNVER1-SSE-LABEL: test_cvtss2si: 1321 ; ZNVER1-SSE: # %bb.0: 1322 ; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00] 1323 ; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] 1324 ; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1325 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1326 ; 1327 ; ZNVER1-LABEL: test_cvtss2si: 1328 ; ZNVER1: # %bb.0: 1329 ; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00] 1330 ; ZNVER1-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] 1331 ; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] 1332 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1333 %1 = insertelement <4 x float> undef, float %a0, i32 0 1334 %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1) 1335 %3 = load float, float *%a1, align 4 1336 %4 = insertelement <4 x float> undef, float %3, i32 0 1337 %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4) 1338 %6 = add i32 %2, %5 1339 ret i32 %6 1340 } 1341 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 1342 1343 define i64 @test_cvtss2siq(float %a0, float *%a1) { 1344 ; GENERIC-LABEL: test_cvtss2siq: 1345 ; GENERIC: # %bb.0: 1346 ; GENERIC-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] 1347 ; GENERIC-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] 1348 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 1349 ; GENERIC-NEXT: retq # sched: [1:1.00] 1350 ; 1351 ; ATOM-LABEL: test_cvtss2siq: 1352 ; ATOM: # %bb.0: 1353 ; ATOM-NEXT: cvtss2si (%rdi), %rax # sched: [10:5.00] 1354 ; ATOM-NEXT: cvtss2si %xmm0, %rcx # sched: [9:4.50] 1355 ; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] 1356 ; ATOM-NEXT: retq # sched: [79:39.50] 1357 ; 1358 ; SLM-LABEL: test_cvtss2siq: 1359 ; SLM: # %bb.0: 1360 ; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00] 1361 ; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50] 1362 ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] 1363 ; SLM-NEXT: retq # sched: [4:1.00] 1364 ; 1365 ; SANDY-SSE-LABEL: test_cvtss2siq: 1366 ; SANDY-SSE: # %bb.0: 1367 ; SANDY-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] 1368 ; SANDY-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] 1369 ; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] 1370 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1371 ; 1372 ; SANDY-LABEL: test_cvtss2siq: 1373 ; SANDY: # %bb.0: 1374 ; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] 1375 ; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00] 1376 ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] 1377 ; SANDY-NEXT: retq # sched: [1:1.00] 1378 ; 1379 ; HASWELL-SSE-LABEL: test_cvtss2siq: 1380 ; HASWELL-SSE: # %bb.0: 1381 ; HASWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00] 1382 ; HASWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] 1383 ; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1384 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1385 ; 1386 ; HASWELL-LABEL: test_cvtss2siq: 1387 ; HASWELL: # %bb.0: 1388 ; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] 1389 ; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00] 1390 ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] 1391 ; HASWELL-NEXT: retq # sched: [7:1.00] 1392 ; 1393 ; BROADWELL-SSE-LABEL: test_cvtss2siq: 1394 ; BROADWELL-SSE: # %bb.0: 1395 ; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] 1396 ; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00] 1397 ; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1398 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1399 ; 1400 ; BROADWELL-LABEL: test_cvtss2siq: 1401 ; BROADWELL: # %bb.0: 1402 ; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00] 1403 ; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] 1404 ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] 1405 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1406 ; 1407 ; SKYLAKE-SSE-LABEL: test_cvtss2siq: 1408 ; SKYLAKE-SSE: # %bb.0: 1409 ; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00] 1410 ; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00] 1411 ; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1412 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1413 ; 1414 ; SKYLAKE-LABEL: test_cvtss2siq: 1415 ; SKYLAKE: # %bb.0: 1416 ; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] 1417 ; SKYLAKE-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00] 1418 ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1419 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1420 ; 1421 ; SKX-SSE-LABEL: test_cvtss2siq: 1422 ; SKX-SSE: # %bb.0: 1423 ; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00] 1424 ; SKX-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00] 1425 ; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1426 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1427 ; 1428 ; SKX-LABEL: test_cvtss2siq: 1429 ; SKX: # %bb.0: 1430 ; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00] 1431 ; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00] 1432 ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] 1433 ; SKX-NEXT: retq # sched: [7:1.00] 1434 ; 1435 ; BTVER2-SSE-LABEL: test_cvtss2siq: 1436 ; BTVER2-SSE: # %bb.0: 1437 ; BTVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00] 1438 ; BTVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00] 1439 ; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] 1440 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1441 ; 1442 ; BTVER2-LABEL: test_cvtss2siq: 1443 ; BTVER2: # %bb.0: 1444 ; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00] 1445 ; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00] 1446 ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] 1447 ; BTVER2-NEXT: retq # sched: [4:1.00] 1448 ; 1449 ; ZNVER1-SSE-LABEL: test_cvtss2siq: 1450 ; ZNVER1-SSE: # %bb.0: 1451 ; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00] 1452 ; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] 1453 ; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1454 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1455 ; 1456 ; ZNVER1-LABEL: test_cvtss2siq: 1457 ; ZNVER1: # %bb.0: 1458 ; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00] 1459 ; ZNVER1-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] 1460 ; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] 1461 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1462 %1 = insertelement <4 x float> undef, float %a0, i32 0 1463 %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1) 1464 %3 = load float, float *%a1, align 4 1465 %4 = insertelement <4 x float> undef, float %3, i32 0 1466 %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4) 1467 %6 = add i64 %2, %5 1468 ret i64 %6 1469 } 1470 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone 1471 1472 define i32 @test_cvttss2si(float %a0, float *%a1) { 1473 ; GENERIC-LABEL: test_cvttss2si: 1474 ; GENERIC: # %bb.0: 1475 ; GENERIC-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] 1476 ; GENERIC-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] 1477 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 1478 ; GENERIC-NEXT: retq # sched: [1:1.00] 1479 ; 1480 ; ATOM-LABEL: test_cvttss2si: 1481 ; ATOM: # %bb.0: 1482 ; ATOM-NEXT: cvttss2si (%rdi), %eax # sched: [9:4.50] 1483 ; ATOM-NEXT: cvttss2si %xmm0, %ecx # sched: [8:4.00] 1484 ; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] 1485 ; ATOM-NEXT: retq # sched: [79:39.50] 1486 ; 1487 ; SLM-LABEL: test_cvttss2si: 1488 ; SLM: # %bb.0: 1489 ; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00] 1490 ; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50] 1491 ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] 1492 ; SLM-NEXT: retq # sched: [4:1.00] 1493 ; 1494 ; SANDY-SSE-LABEL: test_cvttss2si: 1495 ; SANDY-SSE: # %bb.0: 1496 ; SANDY-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] 1497 ; SANDY-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] 1498 ; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] 1499 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1500 ; 1501 ; SANDY-LABEL: test_cvttss2si: 1502 ; SANDY: # %bb.0: 1503 ; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] 1504 ; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00] 1505 ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] 1506 ; SANDY-NEXT: retq # sched: [1:1.00] 1507 ; 1508 ; HASWELL-SSE-LABEL: test_cvttss2si: 1509 ; HASWELL-SSE: # %bb.0: 1510 ; HASWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00] 1511 ; HASWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] 1512 ; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1513 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1514 ; 1515 ; HASWELL-LABEL: test_cvttss2si: 1516 ; HASWELL: # %bb.0: 1517 ; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] 1518 ; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00] 1519 ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] 1520 ; HASWELL-NEXT: retq # sched: [7:1.00] 1521 ; 1522 ; BROADWELL-SSE-LABEL: test_cvttss2si: 1523 ; BROADWELL-SSE: # %bb.0: 1524 ; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] 1525 ; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00] 1526 ; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1527 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1528 ; 1529 ; BROADWELL-LABEL: test_cvttss2si: 1530 ; BROADWELL: # %bb.0: 1531 ; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00] 1532 ; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] 1533 ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] 1534 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1535 ; 1536 ; SKYLAKE-SSE-LABEL: test_cvttss2si: 1537 ; SKYLAKE-SSE: # %bb.0: 1538 ; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00] 1539 ; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00] 1540 ; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1541 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1542 ; 1543 ; SKYLAKE-LABEL: test_cvttss2si: 1544 ; SKYLAKE: # %bb.0: 1545 ; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] 1546 ; SKYLAKE-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00] 1547 ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1548 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1549 ; 1550 ; SKX-SSE-LABEL: test_cvttss2si: 1551 ; SKX-SSE: # %bb.0: 1552 ; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [6:1.00] 1553 ; SKX-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00] 1554 ; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1555 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1556 ; 1557 ; SKX-LABEL: test_cvttss2si: 1558 ; SKX: # %bb.0: 1559 ; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [6:1.00] 1560 ; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00] 1561 ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] 1562 ; SKX-NEXT: retq # sched: [7:1.00] 1563 ; 1564 ; BTVER2-SSE-LABEL: test_cvttss2si: 1565 ; BTVER2-SSE: # %bb.0: 1566 ; BTVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00] 1567 ; BTVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00] 1568 ; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] 1569 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1570 ; 1571 ; BTVER2-LABEL: test_cvttss2si: 1572 ; BTVER2: # %bb.0: 1573 ; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00] 1574 ; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] 1575 ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] 1576 ; BTVER2-NEXT: retq # sched: [4:1.00] 1577 ; 1578 ; ZNVER1-SSE-LABEL: test_cvttss2si: 1579 ; ZNVER1-SSE: # %bb.0: 1580 ; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00] 1581 ; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] 1582 ; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1583 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1584 ; 1585 ; ZNVER1-LABEL: test_cvttss2si: 1586 ; ZNVER1: # %bb.0: 1587 ; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00] 1588 ; ZNVER1-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] 1589 ; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] 1590 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1591 %1 = fptosi float %a0 to i32 1592 %2 = load float, float *%a1, align 4 1593 %3 = fptosi float %2 to i32 1594 %4 = add i32 %1, %3 1595 ret i32 %4 1596 } 1597 1598 define i64 @test_cvttss2siq(float %a0, float *%a1) { 1599 ; GENERIC-LABEL: test_cvttss2siq: 1600 ; GENERIC: # %bb.0: 1601 ; GENERIC-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] 1602 ; GENERIC-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00] 1603 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 1604 ; GENERIC-NEXT: retq # sched: [1:1.00] 1605 ; 1606 ; ATOM-LABEL: test_cvttss2siq: 1607 ; ATOM: # %bb.0: 1608 ; ATOM-NEXT: cvttss2si (%rdi), %rax # sched: [10:5.00] 1609 ; ATOM-NEXT: cvttss2si %xmm0, %rcx # sched: [9:4.50] 1610 ; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] 1611 ; ATOM-NEXT: retq # sched: [79:39.50] 1612 ; 1613 ; SLM-LABEL: test_cvttss2siq: 1614 ; SLM: # %bb.0: 1615 ; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00] 1616 ; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50] 1617 ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] 1618 ; SLM-NEXT: retq # sched: [4:1.00] 1619 ; 1620 ; SANDY-SSE-LABEL: test_cvttss2siq: 1621 ; SANDY-SSE: # %bb.0: 1622 ; SANDY-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] 1623 ; SANDY-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00] 1624 ; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] 1625 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1626 ; 1627 ; SANDY-LABEL: test_cvttss2siq: 1628 ; SANDY: # %bb.0: 1629 ; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] 1630 ; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00] 1631 ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] 1632 ; SANDY-NEXT: retq # sched: [1:1.00] 1633 ; 1634 ; HASWELL-SSE-LABEL: test_cvttss2siq: 1635 ; HASWELL-SSE: # %bb.0: 1636 ; HASWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00] 1637 ; HASWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00] 1638 ; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1639 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1640 ; 1641 ; HASWELL-LABEL: test_cvttss2siq: 1642 ; HASWELL: # %bb.0: 1643 ; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] 1644 ; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00] 1645 ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] 1646 ; HASWELL-NEXT: retq # sched: [7:1.00] 1647 ; 1648 ; BROADWELL-SSE-LABEL: test_cvttss2siq: 1649 ; BROADWELL-SSE: # %bb.0: 1650 ; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00] 1651 ; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00] 1652 ; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1653 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1654 ; 1655 ; BROADWELL-LABEL: test_cvttss2siq: 1656 ; BROADWELL: # %bb.0: 1657 ; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00] 1658 ; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] 1659 ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] 1660 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1661 ; 1662 ; SKYLAKE-SSE-LABEL: test_cvttss2siq: 1663 ; SKYLAKE-SSE: # %bb.0: 1664 ; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00] 1665 ; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] 1666 ; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1667 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1668 ; 1669 ; SKYLAKE-LABEL: test_cvttss2siq: 1670 ; SKYLAKE: # %bb.0: 1671 ; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] 1672 ; SKYLAKE-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00] 1673 ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1674 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1675 ; 1676 ; SKX-SSE-LABEL: test_cvttss2siq: 1677 ; SKX-SSE: # %bb.0: 1678 ; SKX-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00] 1679 ; SKX-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] 1680 ; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1681 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1682 ; 1683 ; SKX-LABEL: test_cvttss2siq: 1684 ; SKX: # %bb.0: 1685 ; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] 1686 ; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00] 1687 ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] 1688 ; SKX-NEXT: retq # sched: [7:1.00] 1689 ; 1690 ; BTVER2-SSE-LABEL: test_cvttss2siq: 1691 ; BTVER2-SSE: # %bb.0: 1692 ; BTVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] 1693 ; BTVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00] 1694 ; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] 1695 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1696 ; 1697 ; BTVER2-LABEL: test_cvttss2siq: 1698 ; BTVER2: # %bb.0: 1699 ; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00] 1700 ; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] 1701 ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] 1702 ; BTVER2-NEXT: retq # sched: [4:1.00] 1703 ; 1704 ; ZNVER1-SSE-LABEL: test_cvttss2siq: 1705 ; ZNVER1-SSE: # %bb.0: 1706 ; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] 1707 ; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] 1708 ; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 1709 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1710 ; 1711 ; ZNVER1-LABEL: test_cvttss2siq: 1712 ; ZNVER1: # %bb.0: 1713 ; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00] 1714 ; ZNVER1-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] 1715 ; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] 1716 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1717 %1 = fptosi float %a0 to i64 1718 %2 = load float, float *%a1, align 4 1719 %3 = fptosi float %2 to i64 1720 %4 = add i64 %1, %3 1721 ret i64 %4 1722 } 1723 1724 define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 1725 ; GENERIC-LABEL: test_divps: 1726 ; GENERIC: # %bb.0: 1727 ; GENERIC-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00] 1728 ; GENERIC-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00] 1729 ; GENERIC-NEXT: retq # sched: [1:1.00] 1730 ; 1731 ; ATOM-LABEL: test_divps: 1732 ; ATOM: # %bb.0: 1733 ; ATOM-NEXT: divps %xmm1, %xmm0 # sched: [70:35.00] 1734 ; ATOM-NEXT: divps (%rdi), %xmm0 # sched: [70:35.00] 1735 ; ATOM-NEXT: retq # sched: [79:39.50] 1736 ; 1737 ; SLM-LABEL: test_divps: 1738 ; SLM: # %bb.0: 1739 ; SLM-NEXT: divps %xmm1, %xmm0 # sched: [39:39.00] 1740 ; SLM-NEXT: divps (%rdi), %xmm0 # sched: [42:39.00] 1741 ; SLM-NEXT: retq # sched: [4:1.00] 1742 ; 1743 ; SANDY-SSE-LABEL: test_divps: 1744 ; SANDY-SSE: # %bb.0: 1745 ; SANDY-SSE-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00] 1746 ; SANDY-SSE-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00] 1747 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1748 ; 1749 ; SANDY-LABEL: test_divps: 1750 ; SANDY: # %bb.0: 1751 ; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:14.00] 1752 ; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:14.00] 1753 ; SANDY-NEXT: retq # sched: [1:1.00] 1754 ; 1755 ; HASWELL-SSE-LABEL: test_divps: 1756 ; HASWELL-SSE: # %bb.0: 1757 ; HASWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [13:7.00] 1758 ; HASWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [19:7.00] 1759 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1760 ; 1761 ; HASWELL-LABEL: test_divps: 1762 ; HASWELL: # %bb.0: 1763 ; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:7.00] 1764 ; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [19:7.00] 1765 ; HASWELL-NEXT: retq # sched: [7:1.00] 1766 ; 1767 ; BROADWELL-SSE-LABEL: test_divps: 1768 ; BROADWELL-SSE: # %bb.0: 1769 ; BROADWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:5.00] 1770 ; BROADWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [16:5.00] 1771 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1772 ; 1773 ; BROADWELL-LABEL: test_divps: 1774 ; BROADWELL: # %bb.0: 1775 ; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:5.00] 1776 ; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:5.00] 1777 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1778 ; 1779 ; SKYLAKE-SSE-LABEL: test_divps: 1780 ; SKYLAKE-SSE: # %bb.0: 1781 ; SKYLAKE-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:3.00] 1782 ; SKYLAKE-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:5.00] 1783 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1784 ; 1785 ; SKYLAKE-LABEL: test_divps: 1786 ; SKYLAKE: # %bb.0: 1787 ; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:3.00] 1788 ; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00] 1789 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1790 ; 1791 ; SKX-SSE-LABEL: test_divps: 1792 ; SKX-SSE: # %bb.0: 1793 ; SKX-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:3.00] 1794 ; SKX-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:5.00] 1795 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1796 ; 1797 ; SKX-LABEL: test_divps: 1798 ; SKX: # %bb.0: 1799 ; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:3.00] 1800 ; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00] 1801 ; SKX-NEXT: retq # sched: [7:1.00] 1802 ; 1803 ; BTVER2-SSE-LABEL: test_divps: 1804 ; BTVER2-SSE: # %bb.0: 1805 ; BTVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [19:19.00] 1806 ; BTVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [24:19.00] 1807 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1808 ; 1809 ; BTVER2-LABEL: test_divps: 1810 ; BTVER2: # %bb.0: 1811 ; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00] 1812 ; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00] 1813 ; BTVER2-NEXT: retq # sched: [4:1.00] 1814 ; 1815 ; ZNVER1-SSE-LABEL: test_divps: 1816 ; ZNVER1-SSE: # %bb.0: 1817 ; ZNVER1-SSE-NEXT: divps %xmm1, %xmm0 # sched: [15:1.00] 1818 ; ZNVER1-SSE-NEXT: divps (%rdi), %xmm0 # sched: [22:1.00] 1819 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1820 ; 1821 ; ZNVER1-LABEL: test_divps: 1822 ; ZNVER1: # %bb.0: 1823 ; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00] 1824 ; ZNVER1-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [22:1.00] 1825 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1826 %1 = fdiv <4 x float> %a0, %a1 1827 %2 = load <4 x float>, <4 x float> *%a2, align 16 1828 %3 = fdiv <4 x float> %1, %2 1829 ret <4 x float> %3 1830 } 1831 1832 define float @test_divss(float %a0, float %a1, float *%a2) { 1833 ; GENERIC-LABEL: test_divss: 1834 ; GENERIC: # %bb.0: 1835 ; GENERIC-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00] 1836 ; GENERIC-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00] 1837 ; GENERIC-NEXT: retq # sched: [1:1.00] 1838 ; 1839 ; ATOM-LABEL: test_divss: 1840 ; ATOM: # %bb.0: 1841 ; ATOM-NEXT: divss %xmm1, %xmm0 # sched: [34:17.00] 1842 ; ATOM-NEXT: divss (%rdi), %xmm0 # sched: [34:17.00] 1843 ; ATOM-NEXT: retq # sched: [79:39.50] 1844 ; 1845 ; SLM-LABEL: test_divss: 1846 ; SLM: # %bb.0: 1847 ; SLM-NEXT: divss %xmm1, %xmm0 # sched: [19:17.00] 1848 ; SLM-NEXT: divss (%rdi), %xmm0 # sched: [22:17.00] 1849 ; SLM-NEXT: retq # sched: [4:1.00] 1850 ; 1851 ; SANDY-SSE-LABEL: test_divss: 1852 ; SANDY-SSE: # %bb.0: 1853 ; SANDY-SSE-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00] 1854 ; SANDY-SSE-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00] 1855 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1856 ; 1857 ; SANDY-LABEL: test_divss: 1858 ; SANDY: # %bb.0: 1859 ; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:14.00] 1860 ; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:14.00] 1861 ; SANDY-NEXT: retq # sched: [1:1.00] 1862 ; 1863 ; HASWELL-SSE-LABEL: test_divss: 1864 ; HASWELL-SSE: # %bb.0: 1865 ; HASWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [13:7.00] 1866 ; HASWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [18:7.00] 1867 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1868 ; 1869 ; HASWELL-LABEL: test_divss: 1870 ; HASWELL: # %bb.0: 1871 ; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:7.00] 1872 ; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [18:7.00] 1873 ; HASWELL-NEXT: retq # sched: [7:1.00] 1874 ; 1875 ; BROADWELL-SSE-LABEL: test_divss: 1876 ; BROADWELL-SSE: # %bb.0: 1877 ; BROADWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:3.00] 1878 ; BROADWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:5.00] 1879 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1880 ; 1881 ; BROADWELL-LABEL: test_divss: 1882 ; BROADWELL: # %bb.0: 1883 ; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00] 1884 ; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:5.00] 1885 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1886 ; 1887 ; SKYLAKE-SSE-LABEL: test_divss: 1888 ; SKYLAKE-SSE: # %bb.0: 1889 ; SKYLAKE-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:3.00] 1890 ; SKYLAKE-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:3.00] 1891 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1892 ; 1893 ; SKYLAKE-LABEL: test_divss: 1894 ; SKYLAKE: # %bb.0: 1895 ; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00] 1896 ; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00] 1897 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1898 ; 1899 ; SKX-SSE-LABEL: test_divss: 1900 ; SKX-SSE: # %bb.0: 1901 ; SKX-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:3.00] 1902 ; SKX-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:3.00] 1903 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1904 ; 1905 ; SKX-LABEL: test_divss: 1906 ; SKX: # %bb.0: 1907 ; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00] 1908 ; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00] 1909 ; SKX-NEXT: retq # sched: [7:1.00] 1910 ; 1911 ; BTVER2-SSE-LABEL: test_divss: 1912 ; BTVER2-SSE: # %bb.0: 1913 ; BTVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [19:19.00] 1914 ; BTVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [24:19.00] 1915 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1916 ; 1917 ; BTVER2-LABEL: test_divss: 1918 ; BTVER2: # %bb.0: 1919 ; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00] 1920 ; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00] 1921 ; BTVER2-NEXT: retq # sched: [4:1.00] 1922 ; 1923 ; ZNVER1-SSE-LABEL: test_divss: 1924 ; ZNVER1-SSE: # %bb.0: 1925 ; ZNVER1-SSE-NEXT: divss %xmm1, %xmm0 # sched: [15:1.00] 1926 ; ZNVER1-SSE-NEXT: divss (%rdi), %xmm0 # sched: [22:1.00] 1927 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1928 ; 1929 ; ZNVER1-LABEL: test_divss: 1930 ; ZNVER1: # %bb.0: 1931 ; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00] 1932 ; ZNVER1-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [22:1.00] 1933 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1934 %1 = fdiv float %a0, %a1 1935 %2 = load float, float *%a2, align 4 1936 %3 = fdiv float %1, %2 1937 ret float %3 1938 } 1939 1940 define void @test_ldmxcsr(i32 %a0) { 1941 ; GENERIC-LABEL: test_ldmxcsr: 1942 ; GENERIC: # %bb.0: 1943 ; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1944 ; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] 1945 ; GENERIC-NEXT: retq # sched: [1:1.00] 1946 ; 1947 ; ATOM-LABEL: test_ldmxcsr: 1948 ; ATOM: # %bb.0: 1949 ; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1950 ; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:2.50] 1951 ; ATOM-NEXT: retq # sched: [79:39.50] 1952 ; 1953 ; SLM-LABEL: test_ldmxcsr: 1954 ; SLM: # %bb.0: 1955 ; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1956 ; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00] 1957 ; SLM-NEXT: retq # sched: [4:1.00] 1958 ; 1959 ; SANDY-SSE-LABEL: test_ldmxcsr: 1960 ; SANDY-SSE: # %bb.0: 1961 ; SANDY-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1962 ; SANDY-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] 1963 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1964 ; 1965 ; SANDY-LABEL: test_ldmxcsr: 1966 ; SANDY: # %bb.0: 1967 ; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1968 ; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] 1969 ; SANDY-NEXT: retq # sched: [1:1.00] 1970 ; 1971 ; HASWELL-SSE-LABEL: test_ldmxcsr: 1972 ; HASWELL-SSE: # %bb.0: 1973 ; HASWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1974 ; HASWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] 1975 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1976 ; 1977 ; HASWELL-LABEL: test_ldmxcsr: 1978 ; HASWELL: # %bb.0: 1979 ; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1980 ; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] 1981 ; HASWELL-NEXT: retq # sched: [7:1.00] 1982 ; 1983 ; BROADWELL-SSE-LABEL: test_ldmxcsr: 1984 ; BROADWELL-SSE: # %bb.0: 1985 ; BROADWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1986 ; BROADWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] 1987 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1988 ; 1989 ; BROADWELL-LABEL: test_ldmxcsr: 1990 ; BROADWELL: # %bb.0: 1991 ; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1992 ; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] 1993 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1994 ; 1995 ; SKYLAKE-SSE-LABEL: test_ldmxcsr: 1996 ; SKYLAKE-SSE: # %bb.0: 1997 ; SKYLAKE-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 1998 ; SKYLAKE-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] 1999 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2000 ; 2001 ; SKYLAKE-LABEL: test_ldmxcsr: 2002 ; SKYLAKE: # %bb.0: 2003 ; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 2004 ; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] 2005 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2006 ; 2007 ; SKX-SSE-LABEL: test_ldmxcsr: 2008 ; SKX-SSE: # %bb.0: 2009 ; SKX-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 2010 ; SKX-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] 2011 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2012 ; 2013 ; SKX-LABEL: test_ldmxcsr: 2014 ; SKX: # %bb.0: 2015 ; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 2016 ; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] 2017 ; SKX-NEXT: retq # sched: [7:1.00] 2018 ; 2019 ; BTVER2-SSE-LABEL: test_ldmxcsr: 2020 ; BTVER2-SSE: # %bb.0: 2021 ; BTVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 2022 ; BTVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] 2023 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2024 ; 2025 ; BTVER2-LABEL: test_ldmxcsr: 2026 ; BTVER2: # %bb.0: 2027 ; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] 2028 ; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] 2029 ; BTVER2-NEXT: retq # sched: [4:1.00] 2030 ; 2031 ; ZNVER1-SSE-LABEL: test_ldmxcsr: 2032 ; ZNVER1-SSE: # %bb.0: 2033 ; ZNVER1-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50] 2034 ; ZNVER1-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25] 2035 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2036 ; 2037 ; ZNVER1-LABEL: test_ldmxcsr: 2038 ; ZNVER1: # %bb.0: 2039 ; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50] 2040 ; ZNVER1-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25] 2041 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2042 %1 = alloca i32, align 4 2043 %2 = bitcast i32* %1 to i8* 2044 store i32 %a0, i32* %1 2045 call void @llvm.x86.sse.ldmxcsr(i8* %2) 2046 ret void 2047 } 2048 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone 2049 2050 define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 2051 ; GENERIC-LABEL: test_maxps: 2052 ; GENERIC: # %bb.0: 2053 ; GENERIC-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] 2054 ; GENERIC-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] 2055 ; GENERIC-NEXT: retq # sched: [1:1.00] 2056 ; 2057 ; ATOM-LABEL: test_maxps: 2058 ; ATOM: # %bb.0: 2059 ; ATOM-NEXT: maxps %xmm1, %xmm0 # sched: [5:5.00] 2060 ; ATOM-NEXT: maxps (%rdi), %xmm0 # sched: [5:5.00] 2061 ; ATOM-NEXT: retq # sched: [79:39.50] 2062 ; 2063 ; SLM-LABEL: test_maxps: 2064 ; SLM: # %bb.0: 2065 ; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] 2066 ; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00] 2067 ; SLM-NEXT: retq # sched: [4:1.00] 2068 ; 2069 ; SANDY-SSE-LABEL: test_maxps: 2070 ; SANDY-SSE: # %bb.0: 2071 ; SANDY-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] 2072 ; SANDY-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] 2073 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2074 ; 2075 ; SANDY-LABEL: test_maxps: 2076 ; SANDY: # %bb.0: 2077 ; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2078 ; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 2079 ; SANDY-NEXT: retq # sched: [1:1.00] 2080 ; 2081 ; HASWELL-SSE-LABEL: test_maxps: 2082 ; HASWELL-SSE: # %bb.0: 2083 ; HASWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] 2084 ; HASWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] 2085 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2086 ; 2087 ; HASWELL-LABEL: test_maxps: 2088 ; HASWELL: # %bb.0: 2089 ; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2090 ; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 2091 ; HASWELL-NEXT: retq # sched: [7:1.00] 2092 ; 2093 ; BROADWELL-SSE-LABEL: test_maxps: 2094 ; BROADWELL-SSE: # %bb.0: 2095 ; BROADWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] 2096 ; BROADWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [8:1.00] 2097 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2098 ; 2099 ; BROADWELL-LABEL: test_maxps: 2100 ; BROADWELL: # %bb.0: 2101 ; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2102 ; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 2103 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2104 ; 2105 ; SKYLAKE-SSE-LABEL: test_maxps: 2106 ; SKYLAKE-SSE: # %bb.0: 2107 ; SKYLAKE-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50] 2108 ; SKYLAKE-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50] 2109 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2110 ; 2111 ; SKYLAKE-LABEL: test_maxps: 2112 ; SKYLAKE: # %bb.0: 2113 ; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2114 ; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 2115 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2116 ; 2117 ; SKX-SSE-LABEL: test_maxps: 2118 ; SKX-SSE: # %bb.0: 2119 ; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50] 2120 ; SKX-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50] 2121 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2122 ; 2123 ; SKX-LABEL: test_maxps: 2124 ; SKX: # %bb.0: 2125 ; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2126 ; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 2127 ; SKX-NEXT: retq # sched: [7:1.00] 2128 ; 2129 ; BTVER2-SSE-LABEL: test_maxps: 2130 ; BTVER2-SSE: # %bb.0: 2131 ; BTVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00] 2132 ; BTVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [7:1.00] 2133 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2134 ; 2135 ; BTVER2-LABEL: test_maxps: 2136 ; BTVER2: # %bb.0: 2137 ; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 2138 ; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 2139 ; BTVER2-NEXT: retq # sched: [4:1.00] 2140 ; 2141 ; ZNVER1-SSE-LABEL: test_maxps: 2142 ; ZNVER1-SSE: # %bb.0: 2143 ; ZNVER1-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] 2144 ; ZNVER1-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:1.00] 2145 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2146 ; 2147 ; ZNVER1-LABEL: test_maxps: 2148 ; ZNVER1: # %bb.0: 2149 ; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2150 ; ZNVER1-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 2151 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2152 %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 2153 %2 = load <4 x float>, <4 x float> *%a2, align 16 2154 %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2) 2155 ret <4 x float> %3 2156 } 2157 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 2158 2159 define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 2160 ; GENERIC-LABEL: test_maxss: 2161 ; GENERIC: # %bb.0: 2162 ; GENERIC-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] 2163 ; GENERIC-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00] 2164 ; GENERIC-NEXT: retq # sched: [1:1.00] 2165 ; 2166 ; ATOM-LABEL: test_maxss: 2167 ; ATOM: # %bb.0: 2168 ; ATOM-NEXT: maxss %xmm1, %xmm0 # sched: [5:5.00] 2169 ; ATOM-NEXT: maxss (%rdi), %xmm0 # sched: [5:5.00] 2170 ; ATOM-NEXT: retq # sched: [79:39.50] 2171 ; 2172 ; SLM-LABEL: test_maxss: 2173 ; SLM: # %bb.0: 2174 ; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] 2175 ; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00] 2176 ; SLM-NEXT: retq # sched: [4:1.00] 2177 ; 2178 ; SANDY-SSE-LABEL: test_maxss: 2179 ; SANDY-SSE: # %bb.0: 2180 ; SANDY-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] 2181 ; SANDY-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00] 2182 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2183 ; 2184 ; SANDY-LABEL: test_maxss: 2185 ; SANDY: # %bb.0: 2186 ; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2187 ; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 2188 ; SANDY-NEXT: retq # sched: [1:1.00] 2189 ; 2190 ; HASWELL-SSE-LABEL: test_maxss: 2191 ; HASWELL-SSE: # %bb.0: 2192 ; HASWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] 2193 ; HASWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00] 2194 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2195 ; 2196 ; HASWELL-LABEL: test_maxss: 2197 ; HASWELL: # %bb.0: 2198 ; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2199 ; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 2200 ; HASWELL-NEXT: retq # sched: [7:1.00] 2201 ; 2202 ; BROADWELL-SSE-LABEL: test_maxss: 2203 ; BROADWELL-SSE: # %bb.0: 2204 ; BROADWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] 2205 ; BROADWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00] 2206 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2207 ; 2208 ; BROADWELL-LABEL: test_maxss: 2209 ; BROADWELL: # %bb.0: 2210 ; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2211 ; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 2212 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2213 ; 2214 ; SKYLAKE-SSE-LABEL: test_maxss: 2215 ; SKYLAKE-SSE: # %bb.0: 2216 ; SKYLAKE-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50] 2217 ; SKYLAKE-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50] 2218 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2219 ; 2220 ; SKYLAKE-LABEL: test_maxss: 2221 ; SKYLAKE: # %bb.0: 2222 ; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2223 ; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 2224 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2225 ; 2226 ; SKX-SSE-LABEL: test_maxss: 2227 ; SKX-SSE: # %bb.0: 2228 ; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50] 2229 ; SKX-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50] 2230 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2231 ; 2232 ; SKX-LABEL: test_maxss: 2233 ; SKX: # %bb.0: 2234 ; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2235 ; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 2236 ; SKX-NEXT: retq # sched: [7:1.00] 2237 ; 2238 ; BTVER2-SSE-LABEL: test_maxss: 2239 ; BTVER2-SSE: # %bb.0: 2240 ; BTVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00] 2241 ; BTVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [7:1.00] 2242 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2243 ; 2244 ; BTVER2-LABEL: test_maxss: 2245 ; BTVER2: # %bb.0: 2246 ; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 2247 ; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 2248 ; BTVER2-NEXT: retq # sched: [4:1.00] 2249 ; 2250 ; ZNVER1-SSE-LABEL: test_maxss: 2251 ; ZNVER1-SSE: # %bb.0: 2252 ; ZNVER1-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] 2253 ; ZNVER1-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [10:1.00] 2254 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2255 ; 2256 ; ZNVER1-LABEL: test_maxss: 2257 ; ZNVER1: # %bb.0: 2258 ; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2259 ; ZNVER1-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 2260 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2261 %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 2262 %2 = load <4 x float>, <4 x float> *%a2, align 16 2263 %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2) 2264 ret <4 x float> %3 2265 } 2266 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 2267 2268 define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 2269 ; GENERIC-LABEL: test_minps: 2270 ; GENERIC: # %bb.0: 2271 ; GENERIC-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] 2272 ; GENERIC-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] 2273 ; GENERIC-NEXT: retq # sched: [1:1.00] 2274 ; 2275 ; ATOM-LABEL: test_minps: 2276 ; ATOM: # %bb.0: 2277 ; ATOM-NEXT: minps %xmm1, %xmm0 # sched: [5:5.00] 2278 ; ATOM-NEXT: minps (%rdi), %xmm0 # sched: [5:5.00] 2279 ; ATOM-NEXT: retq # sched: [79:39.50] 2280 ; 2281 ; SLM-LABEL: test_minps: 2282 ; SLM: # %bb.0: 2283 ; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] 2284 ; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00] 2285 ; SLM-NEXT: retq # sched: [4:1.00] 2286 ; 2287 ; SANDY-SSE-LABEL: test_minps: 2288 ; SANDY-SSE: # %bb.0: 2289 ; SANDY-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] 2290 ; SANDY-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] 2291 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2292 ; 2293 ; SANDY-LABEL: test_minps: 2294 ; SANDY: # %bb.0: 2295 ; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2296 ; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 2297 ; SANDY-NEXT: retq # sched: [1:1.00] 2298 ; 2299 ; HASWELL-SSE-LABEL: test_minps: 2300 ; HASWELL-SSE: # %bb.0: 2301 ; HASWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] 2302 ; HASWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] 2303 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2304 ; 2305 ; HASWELL-LABEL: test_minps: 2306 ; HASWELL: # %bb.0: 2307 ; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2308 ; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 2309 ; HASWELL-NEXT: retq # sched: [7:1.00] 2310 ; 2311 ; BROADWELL-SSE-LABEL: test_minps: 2312 ; BROADWELL-SSE: # %bb.0: 2313 ; BROADWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] 2314 ; BROADWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [8:1.00] 2315 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2316 ; 2317 ; BROADWELL-LABEL: test_minps: 2318 ; BROADWELL: # %bb.0: 2319 ; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2320 ; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 2321 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2322 ; 2323 ; SKYLAKE-SSE-LABEL: test_minps: 2324 ; SKYLAKE-SSE: # %bb.0: 2325 ; SKYLAKE-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50] 2326 ; SKYLAKE-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50] 2327 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2328 ; 2329 ; SKYLAKE-LABEL: test_minps: 2330 ; SKYLAKE: # %bb.0: 2331 ; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2332 ; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 2333 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2334 ; 2335 ; SKX-SSE-LABEL: test_minps: 2336 ; SKX-SSE: # %bb.0: 2337 ; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50] 2338 ; SKX-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50] 2339 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2340 ; 2341 ; SKX-LABEL: test_minps: 2342 ; SKX: # %bb.0: 2343 ; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2344 ; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 2345 ; SKX-NEXT: retq # sched: [7:1.00] 2346 ; 2347 ; BTVER2-SSE-LABEL: test_minps: 2348 ; BTVER2-SSE: # %bb.0: 2349 ; BTVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00] 2350 ; BTVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [7:1.00] 2351 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2352 ; 2353 ; BTVER2-LABEL: test_minps: 2354 ; BTVER2: # %bb.0: 2355 ; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 2356 ; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 2357 ; BTVER2-NEXT: retq # sched: [4:1.00] 2358 ; 2359 ; ZNVER1-SSE-LABEL: test_minps: 2360 ; ZNVER1-SSE: # %bb.0: 2361 ; ZNVER1-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] 2362 ; ZNVER1-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:1.00] 2363 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2364 ; 2365 ; ZNVER1-LABEL: test_minps: 2366 ; ZNVER1: # %bb.0: 2367 ; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2368 ; ZNVER1-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 2369 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2370 %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 2371 %2 = load <4 x float>, <4 x float> *%a2, align 16 2372 %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2) 2373 ret <4 x float> %3 2374 } 2375 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 2376 2377 define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 2378 ; GENERIC-LABEL: test_minss: 2379 ; GENERIC: # %bb.0: 2380 ; GENERIC-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] 2381 ; GENERIC-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00] 2382 ; GENERIC-NEXT: retq # sched: [1:1.00] 2383 ; 2384 ; ATOM-LABEL: test_minss: 2385 ; ATOM: # %bb.0: 2386 ; ATOM-NEXT: minss %xmm1, %xmm0 # sched: [5:5.00] 2387 ; ATOM-NEXT: minss (%rdi), %xmm0 # sched: [5:5.00] 2388 ; ATOM-NEXT: retq # sched: [79:39.50] 2389 ; 2390 ; SLM-LABEL: test_minss: 2391 ; SLM: # %bb.0: 2392 ; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] 2393 ; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00] 2394 ; SLM-NEXT: retq # sched: [4:1.00] 2395 ; 2396 ; SANDY-SSE-LABEL: test_minss: 2397 ; SANDY-SSE: # %bb.0: 2398 ; SANDY-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] 2399 ; SANDY-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00] 2400 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2401 ; 2402 ; SANDY-LABEL: test_minss: 2403 ; SANDY: # %bb.0: 2404 ; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2405 ; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 2406 ; SANDY-NEXT: retq # sched: [1:1.00] 2407 ; 2408 ; HASWELL-SSE-LABEL: test_minss: 2409 ; HASWELL-SSE: # %bb.0: 2410 ; HASWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] 2411 ; HASWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00] 2412 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2413 ; 2414 ; HASWELL-LABEL: test_minss: 2415 ; HASWELL: # %bb.0: 2416 ; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2417 ; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 2418 ; HASWELL-NEXT: retq # sched: [7:1.00] 2419 ; 2420 ; BROADWELL-SSE-LABEL: test_minss: 2421 ; BROADWELL-SSE: # %bb.0: 2422 ; BROADWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] 2423 ; BROADWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00] 2424 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2425 ; 2426 ; BROADWELL-LABEL: test_minss: 2427 ; BROADWELL: # %bb.0: 2428 ; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2429 ; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 2430 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2431 ; 2432 ; SKYLAKE-SSE-LABEL: test_minss: 2433 ; SKYLAKE-SSE: # %bb.0: 2434 ; SKYLAKE-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50] 2435 ; SKYLAKE-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50] 2436 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2437 ; 2438 ; SKYLAKE-LABEL: test_minss: 2439 ; SKYLAKE: # %bb.0: 2440 ; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2441 ; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 2442 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2443 ; 2444 ; SKX-SSE-LABEL: test_minss: 2445 ; SKX-SSE: # %bb.0: 2446 ; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50] 2447 ; SKX-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50] 2448 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2449 ; 2450 ; SKX-LABEL: test_minss: 2451 ; SKX: # %bb.0: 2452 ; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2453 ; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 2454 ; SKX-NEXT: retq # sched: [7:1.00] 2455 ; 2456 ; BTVER2-SSE-LABEL: test_minss: 2457 ; BTVER2-SSE: # %bb.0: 2458 ; BTVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00] 2459 ; BTVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [7:1.00] 2460 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2461 ; 2462 ; BTVER2-LABEL: test_minss: 2463 ; BTVER2: # %bb.0: 2464 ; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 2465 ; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 2466 ; BTVER2-NEXT: retq # sched: [4:1.00] 2467 ; 2468 ; ZNVER1-SSE-LABEL: test_minss: 2469 ; ZNVER1-SSE: # %bb.0: 2470 ; ZNVER1-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] 2471 ; ZNVER1-SSE-NEXT: minss (%rdi), %xmm0 # sched: [10:1.00] 2472 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2473 ; 2474 ; ZNVER1-LABEL: test_minss: 2475 ; ZNVER1: # %bb.0: 2476 ; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2477 ; ZNVER1-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 2478 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2479 %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 2480 %2 = load <4 x float>, <4 x float> *%a2, align 16 2481 %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2) 2482 ret <4 x float> %3 2483 } 2484 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 2485 2486 define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { 2487 ; GENERIC-LABEL: test_movaps: 2488 ; GENERIC: # %bb.0: 2489 ; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] 2490 ; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 2491 ; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] 2492 ; GENERIC-NEXT: retq # sched: [1:1.00] 2493 ; 2494 ; ATOM-LABEL: test_movaps: 2495 ; ATOM: # %bb.0: 2496 ; ATOM-NEXT: movaps (%rdi), %xmm0 # sched: [1:1.00] 2497 ; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00] 2498 ; ATOM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] 2499 ; ATOM-NEXT: retq # sched: [79:39.50] 2500 ; 2501 ; SLM-LABEL: test_movaps: 2502 ; SLM: # %bb.0: 2503 ; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00] 2504 ; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 2505 ; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] 2506 ; SLM-NEXT: retq # sched: [4:1.00] 2507 ; 2508 ; SANDY-SSE-LABEL: test_movaps: 2509 ; SANDY-SSE: # %bb.0: 2510 ; SANDY-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] 2511 ; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 2512 ; SANDY-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] 2513 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2514 ; 2515 ; SANDY-LABEL: test_movaps: 2516 ; SANDY: # %bb.0: 2517 ; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] 2518 ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 2519 ; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] 2520 ; SANDY-NEXT: retq # sched: [1:1.00] 2521 ; 2522 ; HASWELL-SSE-LABEL: test_movaps: 2523 ; HASWELL-SSE: # %bb.0: 2524 ; HASWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] 2525 ; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 2526 ; HASWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] 2527 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2528 ; 2529 ; HASWELL-LABEL: test_movaps: 2530 ; HASWELL: # %bb.0: 2531 ; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] 2532 ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 2533 ; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] 2534 ; HASWELL-NEXT: retq # sched: [7:1.00] 2535 ; 2536 ; BROADWELL-SSE-LABEL: test_movaps: 2537 ; BROADWELL-SSE: # %bb.0: 2538 ; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:0.50] 2539 ; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 2540 ; BROADWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] 2541 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2542 ; 2543 ; BROADWELL-LABEL: test_movaps: 2544 ; BROADWELL: # %bb.0: 2545 ; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50] 2546 ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 2547 ; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] 2548 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2549 ; 2550 ; SKYLAKE-SSE-LABEL: test_movaps: 2551 ; SKYLAKE-SSE: # %bb.0: 2552 ; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] 2553 ; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] 2554 ; SKYLAKE-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] 2555 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2556 ; 2557 ; SKYLAKE-LABEL: test_movaps: 2558 ; SKYLAKE: # %bb.0: 2559 ; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] 2560 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 2561 ; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] 2562 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2563 ; 2564 ; SKX-SSE-LABEL: test_movaps: 2565 ; SKX-SSE: # %bb.0: 2566 ; SKX-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] 2567 ; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] 2568 ; SKX-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] 2569 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2570 ; 2571 ; SKX-LABEL: test_movaps: 2572 ; SKX: # %bb.0: 2573 ; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] 2574 ; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 2575 ; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] 2576 ; SKX-NEXT: retq # sched: [7:1.00] 2577 ; 2578 ; BTVER2-SSE-LABEL: test_movaps: 2579 ; BTVER2-SSE: # %bb.0: 2580 ; BTVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:1.00] 2581 ; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 2582 ; BTVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] 2583 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2584 ; 2585 ; BTVER2-LABEL: test_movaps: 2586 ; BTVER2: # %bb.0: 2587 ; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00] 2588 ; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 2589 ; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] 2590 ; BTVER2-NEXT: retq # sched: [4:1.00] 2591 ; 2592 ; ZNVER1-SSE-LABEL: test_movaps: 2593 ; ZNVER1-SSE: # %bb.0: 2594 ; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [8:0.50] 2595 ; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 2596 ; ZNVER1-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:0.50] 2597 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2598 ; 2599 ; ZNVER1-LABEL: test_movaps: 2600 ; ZNVER1: # %bb.0: 2601 ; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50] 2602 ; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 2603 ; ZNVER1-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:0.50] 2604 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2605 %1 = load <4 x float>, <4 x float> *%a0, align 16 2606 %2 = fadd <4 x float> %1, %1 2607 store <4 x float> %2, <4 x float> *%a1, align 16 2608 ret void 2609 } 2610 2611 ; TODO (v)movhlps 2612 2613 define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { 2614 ; GENERIC-LABEL: test_movhlps: 2615 ; GENERIC: # %bb.0: 2616 ; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2617 ; GENERIC-NEXT: retq # sched: [1:1.00] 2618 ; 2619 ; ATOM-LABEL: test_movhlps: 2620 ; ATOM: # %bb.0: 2621 ; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2622 ; ATOM-NEXT: nop # sched: [1:0.50] 2623 ; ATOM-NEXT: nop # sched: [1:0.50] 2624 ; ATOM-NEXT: nop # sched: [1:0.50] 2625 ; ATOM-NEXT: nop # sched: [1:0.50] 2626 ; ATOM-NEXT: nop # sched: [1:0.50] 2627 ; ATOM-NEXT: nop # sched: [1:0.50] 2628 ; ATOM-NEXT: retq # sched: [79:39.50] 2629 ; 2630 ; SLM-LABEL: test_movhlps: 2631 ; SLM: # %bb.0: 2632 ; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2633 ; SLM-NEXT: retq # sched: [4:1.00] 2634 ; 2635 ; SANDY-SSE-LABEL: test_movhlps: 2636 ; SANDY-SSE: # %bb.0: 2637 ; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2638 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2639 ; 2640 ; SANDY-LABEL: test_movhlps: 2641 ; SANDY: # %bb.0: 2642 ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2643 ; SANDY-NEXT: retq # sched: [1:1.00] 2644 ; 2645 ; HASWELL-SSE-LABEL: test_movhlps: 2646 ; HASWELL-SSE: # %bb.0: 2647 ; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2648 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2649 ; 2650 ; HASWELL-LABEL: test_movhlps: 2651 ; HASWELL: # %bb.0: 2652 ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2653 ; HASWELL-NEXT: retq # sched: [7:1.00] 2654 ; 2655 ; BROADWELL-SSE-LABEL: test_movhlps: 2656 ; BROADWELL-SSE: # %bb.0: 2657 ; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2658 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2659 ; 2660 ; BROADWELL-LABEL: test_movhlps: 2661 ; BROADWELL: # %bb.0: 2662 ; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2663 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2664 ; 2665 ; SKYLAKE-SSE-LABEL: test_movhlps: 2666 ; SKYLAKE-SSE: # %bb.0: 2667 ; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2668 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2669 ; 2670 ; SKYLAKE-LABEL: test_movhlps: 2671 ; SKYLAKE: # %bb.0: 2672 ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2673 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2674 ; 2675 ; SKX-SSE-LABEL: test_movhlps: 2676 ; SKX-SSE: # %bb.0: 2677 ; SKX-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2678 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2679 ; 2680 ; SKX-LABEL: test_movhlps: 2681 ; SKX: # %bb.0: 2682 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] 2683 ; SKX-NEXT: retq # sched: [7:1.00] 2684 ; 2685 ; BTVER2-SSE-LABEL: test_movhlps: 2686 ; BTVER2-SSE: # %bb.0: 2687 ; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] 2688 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2689 ; 2690 ; BTVER2-LABEL: test_movhlps: 2691 ; BTVER2: # %bb.0: 2692 ; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] 2693 ; BTVER2-NEXT: retq # sched: [4:1.00] 2694 ; 2695 ; ZNVER1-SSE-LABEL: test_movhlps: 2696 ; ZNVER1-SSE: # %bb.0: 2697 ; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] 2698 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2699 ; 2700 ; ZNVER1-LABEL: test_movhlps: 2701 ; ZNVER1: # %bb.0: 2702 ; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] 2703 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2704 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 2705 ret <4 x float> %1 2706 } 2707 2708 ; TODO (v)movhps 2709 2710 define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { 2711 ; GENERIC-LABEL: test_movhps: 2712 ; GENERIC: # %bb.0: 2713 ; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 2714 ; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2715 ; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] 2716 ; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2717 ; GENERIC-NEXT: retq # sched: [1:1.00] 2718 ; 2719 ; ATOM-LABEL: test_movhps: 2720 ; ATOM: # %bb.0: 2721 ; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] 2722 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] 2723 ; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] 2724 ; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2725 ; ATOM-NEXT: retq # sched: [79:39.50] 2726 ; 2727 ; SLM-LABEL: test_movhps: 2728 ; SLM: # %bb.0: 2729 ; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] 2730 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2731 ; SLM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] 2732 ; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2733 ; SLM-NEXT: retq # sched: [4:1.00] 2734 ; 2735 ; SANDY-SSE-LABEL: test_movhps: 2736 ; SANDY-SSE: # %bb.0: 2737 ; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 2738 ; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2739 ; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] 2740 ; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2741 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2742 ; 2743 ; SANDY-LABEL: test_movhps: 2744 ; SANDY: # %bb.0: 2745 ; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 2746 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2747 ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] 2748 ; SANDY-NEXT: retq # sched: [1:1.00] 2749 ; 2750 ; HASWELL-SSE-LABEL: test_movhps: 2751 ; HASWELL-SSE: # %bb.0: 2752 ; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2753 ; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2754 ; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] 2755 ; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2756 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2757 ; 2758 ; HASWELL-LABEL: test_movhps: 2759 ; HASWELL: # %bb.0: 2760 ; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2761 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2762 ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 2763 ; HASWELL-NEXT: retq # sched: [7:1.00] 2764 ; 2765 ; BROADWELL-SSE-LABEL: test_movhps: 2766 ; BROADWELL-SSE: # %bb.0: 2767 ; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2768 ; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2769 ; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] 2770 ; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2771 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2772 ; 2773 ; BROADWELL-LABEL: test_movhps: 2774 ; BROADWELL: # %bb.0: 2775 ; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2776 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2777 ; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 2778 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2779 ; 2780 ; SKYLAKE-SSE-LABEL: test_movhps: 2781 ; SKYLAKE-SSE: # %bb.0: 2782 ; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2783 ; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] 2784 ; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] 2785 ; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2786 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2787 ; 2788 ; SKYLAKE-LABEL: test_movhps: 2789 ; SKYLAKE: # %bb.0: 2790 ; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2791 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2792 ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 2793 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2794 ; 2795 ; SKX-SSE-LABEL: test_movhps: 2796 ; SKX-SSE: # %bb.0: 2797 ; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2798 ; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] 2799 ; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] 2800 ; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2801 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2802 ; 2803 ; SKX-LABEL: test_movhps: 2804 ; SKX: # %bb.0: 2805 ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2806 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2807 ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] 2808 ; SKX-NEXT: retq # sched: [7:1.00] 2809 ; 2810 ; BTVER2-SSE-LABEL: test_movhps: 2811 ; BTVER2-SSE: # %bb.0: 2812 ; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2813 ; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2814 ; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] 2815 ; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00] 2816 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2817 ; 2818 ; BTVER2-LABEL: test_movhps: 2819 ; BTVER2: # %bb.0: 2820 ; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 2821 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2822 ; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00] 2823 ; BTVER2-NEXT: retq # sched: [4:1.00] 2824 ; 2825 ; ZNVER1-SSE-LABEL: test_movhps: 2826 ; ZNVER1-SSE: # %bb.0: 2827 ; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] 2828 ; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2829 ; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] 2830 ; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50] 2831 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2832 ; 2833 ; ZNVER1-LABEL: test_movhps: 2834 ; ZNVER1: # %bb.0: 2835 ; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] 2836 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2837 ; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00] 2838 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2839 %1 = bitcast x86_mmx* %a2 to <2 x float>* 2840 %2 = load <2 x float>, <2 x float> *%1, align 8 2841 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2842 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 2843 %5 = fadd <4 x float> %a0, %4 2844 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3> 2845 store <2 x float> %6, <2 x float>* %1 2846 ret void 2847 } 2848 2849 ; TODO (v)movlhps 2850 2851 define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { 2852 ; GENERIC-LABEL: test_movlhps: 2853 ; GENERIC: # %bb.0: 2854 ; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2855 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 2856 ; GENERIC-NEXT: retq # sched: [1:1.00] 2857 ; 2858 ; ATOM-LABEL: test_movlhps: 2859 ; ATOM: # %bb.0: 2860 ; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2861 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 2862 ; ATOM-NEXT: retq # sched: [79:39.50] 2863 ; 2864 ; SLM-LABEL: test_movlhps: 2865 ; SLM: # %bb.0: 2866 ; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2867 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 2868 ; SLM-NEXT: retq # sched: [4:1.00] 2869 ; 2870 ; SANDY-SSE-LABEL: test_movlhps: 2871 ; SANDY-SSE: # %bb.0: 2872 ; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2873 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 2874 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2875 ; 2876 ; SANDY-LABEL: test_movlhps: 2877 ; SANDY: # %bb.0: 2878 ; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2879 ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2880 ; SANDY-NEXT: retq # sched: [1:1.00] 2881 ; 2882 ; HASWELL-SSE-LABEL: test_movlhps: 2883 ; HASWELL-SSE: # %bb.0: 2884 ; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2885 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 2886 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2887 ; 2888 ; HASWELL-LABEL: test_movlhps: 2889 ; HASWELL: # %bb.0: 2890 ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2891 ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2892 ; HASWELL-NEXT: retq # sched: [7:1.00] 2893 ; 2894 ; BROADWELL-SSE-LABEL: test_movlhps: 2895 ; BROADWELL-SSE: # %bb.0: 2896 ; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2897 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 2898 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2899 ; 2900 ; BROADWELL-LABEL: test_movlhps: 2901 ; BROADWELL: # %bb.0: 2902 ; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2903 ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2904 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2905 ; 2906 ; SKYLAKE-SSE-LABEL: test_movlhps: 2907 ; SKYLAKE-SSE: # %bb.0: 2908 ; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2909 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 2910 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2911 ; 2912 ; SKYLAKE-LABEL: test_movlhps: 2913 ; SKYLAKE: # %bb.0: 2914 ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2915 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 2916 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2917 ; 2918 ; SKX-SSE-LABEL: test_movlhps: 2919 ; SKX-SSE: # %bb.0: 2920 ; SKX-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2921 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 2922 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2923 ; 2924 ; SKX-LABEL: test_movlhps: 2925 ; SKX: # %bb.0: 2926 ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 2927 ; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 2928 ; SKX-NEXT: retq # sched: [7:1.00] 2929 ; 2930 ; BTVER2-SSE-LABEL: test_movlhps: 2931 ; BTVER2-SSE: # %bb.0: 2932 ; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 2933 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 2934 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2935 ; 2936 ; BTVER2-LABEL: test_movlhps: 2937 ; BTVER2: # %bb.0: 2938 ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 2939 ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2940 ; BTVER2-NEXT: retq # sched: [4:1.00] 2941 ; 2942 ; ZNVER1-SSE-LABEL: test_movlhps: 2943 ; ZNVER1-SSE: # %bb.0: 2944 ; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 2945 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 2946 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2947 ; 2948 ; ZNVER1-LABEL: test_movlhps: 2949 ; ZNVER1: # %bb.0: 2950 ; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 2951 ; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 2952 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2953 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 2954 %2 = fadd <4 x float> %a1, %1 2955 ret <4 x float> %2 2956 } 2957 2958 define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { 2959 ; GENERIC-LABEL: test_movlps: 2960 ; GENERIC: # %bb.0: 2961 ; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] 2962 ; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2963 ; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2964 ; GENERIC-NEXT: retq # sched: [1:1.00] 2965 ; 2966 ; ATOM-LABEL: test_movlps: 2967 ; ATOM: # %bb.0: 2968 ; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] 2969 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] 2970 ; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2971 ; ATOM-NEXT: retq # sched: [79:39.50] 2972 ; 2973 ; SLM-LABEL: test_movlps: 2974 ; SLM: # %bb.0: 2975 ; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00] 2976 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2977 ; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2978 ; SLM-NEXT: retq # sched: [4:1.00] 2979 ; 2980 ; SANDY-SSE-LABEL: test_movlps: 2981 ; SANDY-SSE: # %bb.0: 2982 ; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] 2983 ; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2984 ; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2985 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2986 ; 2987 ; SANDY-LABEL: test_movlps: 2988 ; SANDY: # %bb.0: 2989 ; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] 2990 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2991 ; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] 2992 ; SANDY-NEXT: retq # sched: [1:1.00] 2993 ; 2994 ; HASWELL-SSE-LABEL: test_movlps: 2995 ; HASWELL-SSE: # %bb.0: 2996 ; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 2997 ; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 2998 ; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 2999 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3000 ; 3001 ; HASWELL-LABEL: test_movlps: 3002 ; HASWELL: # %bb.0: 3003 ; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 3004 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3005 ; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] 3006 ; HASWELL-NEXT: retq # sched: [7:1.00] 3007 ; 3008 ; BROADWELL-SSE-LABEL: test_movlps: 3009 ; BROADWELL-SSE: # %bb.0: 3010 ; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 3011 ; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 3012 ; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 3013 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3014 ; 3015 ; BROADWELL-LABEL: test_movlps: 3016 ; BROADWELL: # %bb.0: 3017 ; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 3018 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3019 ; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] 3020 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3021 ; 3022 ; SKYLAKE-SSE-LABEL: test_movlps: 3023 ; SKYLAKE-SSE: # %bb.0: 3024 ; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 3025 ; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] 3026 ; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 3027 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3028 ; 3029 ; SKYLAKE-LABEL: test_movlps: 3030 ; SKYLAKE: # %bb.0: 3031 ; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 3032 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3033 ; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] 3034 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3035 ; 3036 ; SKX-SSE-LABEL: test_movlps: 3037 ; SKX-SSE: # %bb.0: 3038 ; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 3039 ; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] 3040 ; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] 3041 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3042 ; 3043 ; SKX-LABEL: test_movlps: 3044 ; SKX: # %bb.0: 3045 ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 3046 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3047 ; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] 3048 ; SKX-NEXT: retq # sched: [7:1.00] 3049 ; 3050 ; BTVER2-SSE-LABEL: test_movlps: 3051 ; BTVER2-SSE: # %bb.0: 3052 ; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 3053 ; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 3054 ; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00] 3055 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3056 ; 3057 ; BTVER2-LABEL: test_movlps: 3058 ; BTVER2: # %bb.0: 3059 ; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 3060 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3061 ; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00] 3062 ; BTVER2-NEXT: retq # sched: [4:1.00] 3063 ; 3064 ; ZNVER1-SSE-LABEL: test_movlps: 3065 ; ZNVER1-SSE: # %bb.0: 3066 ; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] 3067 ; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 3068 ; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50] 3069 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3070 ; 3071 ; ZNVER1-LABEL: test_movlps: 3072 ; ZNVER1: # %bb.0: 3073 ; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] 3074 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3075 ; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50] 3076 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3077 %1 = bitcast x86_mmx* %a2 to <2 x float>* 3078 %2 = load <2 x float>, <2 x float> *%1, align 8 3079 %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3080 %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 3081 %5 = fadd <4 x float> %a0, %4 3082 %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1> 3083 store <2 x float> %6, <2 x float>* %1 3084 ret void 3085 } 3086 3087 define i32 @test_movmskps(<4 x float> %a0) { 3088 ; GENERIC-LABEL: test_movmskps: 3089 ; GENERIC: # %bb.0: 3090 ; GENERIC-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] 3091 ; GENERIC-NEXT: retq # sched: [1:1.00] 3092 ; 3093 ; ATOM-LABEL: test_movmskps: 3094 ; ATOM: # %bb.0: 3095 ; ATOM-NEXT: movmskps %xmm0, %eax # sched: [3:3.00] 3096 ; ATOM-NEXT: nop # sched: [1:0.50] 3097 ; ATOM-NEXT: nop # sched: [1:0.50] 3098 ; ATOM-NEXT: retq # sched: [79:39.50] 3099 ; 3100 ; SLM-LABEL: test_movmskps: 3101 ; SLM: # %bb.0: 3102 ; SLM-NEXT: movmskps %xmm0, %eax # sched: [4:1.00] 3103 ; SLM-NEXT: retq # sched: [4:1.00] 3104 ; 3105 ; SANDY-SSE-LABEL: test_movmskps: 3106 ; SANDY-SSE: # %bb.0: 3107 ; SANDY-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] 3108 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3109 ; 3110 ; SANDY-LABEL: test_movmskps: 3111 ; SANDY: # %bb.0: 3112 ; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] 3113 ; SANDY-NEXT: retq # sched: [1:1.00] 3114 ; 3115 ; HASWELL-SSE-LABEL: test_movmskps: 3116 ; HASWELL-SSE: # %bb.0: 3117 ; HASWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00] 3118 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3119 ; 3120 ; HASWELL-LABEL: test_movmskps: 3121 ; HASWELL: # %bb.0: 3122 ; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] 3123 ; HASWELL-NEXT: retq # sched: [7:1.00] 3124 ; 3125 ; BROADWELL-SSE-LABEL: test_movmskps: 3126 ; BROADWELL-SSE: # %bb.0: 3127 ; BROADWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00] 3128 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3129 ; 3130 ; BROADWELL-LABEL: test_movmskps: 3131 ; BROADWELL: # %bb.0: 3132 ; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] 3133 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3134 ; 3135 ; SKYLAKE-SSE-LABEL: test_movmskps: 3136 ; SKYLAKE-SSE: # %bb.0: 3137 ; SKYLAKE-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] 3138 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3139 ; 3140 ; SKYLAKE-LABEL: test_movmskps: 3141 ; SKYLAKE: # %bb.0: 3142 ; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] 3143 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3144 ; 3145 ; SKX-SSE-LABEL: test_movmskps: 3146 ; SKX-SSE: # %bb.0: 3147 ; SKX-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] 3148 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3149 ; 3150 ; SKX-LABEL: test_movmskps: 3151 ; SKX: # %bb.0: 3152 ; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] 3153 ; SKX-NEXT: retq # sched: [7:1.00] 3154 ; 3155 ; BTVER2-SSE-LABEL: test_movmskps: 3156 ; BTVER2-SSE: # %bb.0: 3157 ; BTVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00] 3158 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3159 ; 3160 ; BTVER2-LABEL: test_movmskps: 3161 ; BTVER2: # %bb.0: 3162 ; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] 3163 ; BTVER2-NEXT: retq # sched: [4:1.00] 3164 ; 3165 ; ZNVER1-SSE-LABEL: test_movmskps: 3166 ; ZNVER1-SSE: # %bb.0: 3167 ; ZNVER1-SSE-NEXT: movmskps %xmm0, %eax # sched: [1:1.00] 3168 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3169 ; 3170 ; ZNVER1-LABEL: test_movmskps: 3171 ; ZNVER1: # %bb.0: 3172 ; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:1.00] 3173 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3174 %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 3175 ret i32 %1 3176 } 3177 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 3178 3179 define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { 3180 ; GENERIC-LABEL: test_movntps: 3181 ; GENERIC: # %bb.0: 3182 ; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] 3183 ; GENERIC-NEXT: retq # sched: [1:1.00] 3184 ; 3185 ; ATOM-LABEL: test_movntps: 3186 ; ATOM: # %bb.0: 3187 ; ATOM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] 3188 ; ATOM-NEXT: nop # sched: [1:0.50] 3189 ; ATOM-NEXT: nop # sched: [1:0.50] 3190 ; ATOM-NEXT: nop # sched: [1:0.50] 3191 ; ATOM-NEXT: nop # sched: [1:0.50] 3192 ; ATOM-NEXT: nop # sched: [1:0.50] 3193 ; ATOM-NEXT: nop # sched: [1:0.50] 3194 ; ATOM-NEXT: retq # sched: [79:39.50] 3195 ; 3196 ; SLM-LABEL: test_movntps: 3197 ; SLM: # %bb.0: 3198 ; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] 3199 ; SLM-NEXT: retq # sched: [4:1.00] 3200 ; 3201 ; SANDY-SSE-LABEL: test_movntps: 3202 ; SANDY-SSE: # %bb.0: 3203 ; SANDY-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] 3204 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3205 ; 3206 ; SANDY-LABEL: test_movntps: 3207 ; SANDY: # %bb.0: 3208 ; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] 3209 ; SANDY-NEXT: retq # sched: [1:1.00] 3210 ; 3211 ; HASWELL-SSE-LABEL: test_movntps: 3212 ; HASWELL-SSE: # %bb.0: 3213 ; HASWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] 3214 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3215 ; 3216 ; HASWELL-LABEL: test_movntps: 3217 ; HASWELL: # %bb.0: 3218 ; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] 3219 ; HASWELL-NEXT: retq # sched: [7:1.00] 3220 ; 3221 ; BROADWELL-SSE-LABEL: test_movntps: 3222 ; BROADWELL-SSE: # %bb.0: 3223 ; BROADWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] 3224 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3225 ; 3226 ; BROADWELL-LABEL: test_movntps: 3227 ; BROADWELL: # %bb.0: 3228 ; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] 3229 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3230 ; 3231 ; SKYLAKE-SSE-LABEL: test_movntps: 3232 ; SKYLAKE-SSE: # %bb.0: 3233 ; SKYLAKE-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] 3234 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3235 ; 3236 ; SKYLAKE-LABEL: test_movntps: 3237 ; SKYLAKE: # %bb.0: 3238 ; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] 3239 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3240 ; 3241 ; SKX-SSE-LABEL: test_movntps: 3242 ; SKX-SSE: # %bb.0: 3243 ; SKX-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] 3244 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3245 ; 3246 ; SKX-LABEL: test_movntps: 3247 ; SKX: # %bb.0: 3248 ; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] 3249 ; SKX-NEXT: retq # sched: [7:1.00] 3250 ; 3251 ; BTVER2-SSE-LABEL: test_movntps: 3252 ; BTVER2-SSE: # %bb.0: 3253 ; BTVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00] 3254 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3255 ; 3256 ; BTVER2-LABEL: test_movntps: 3257 ; BTVER2: # %bb.0: 3258 ; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [3:1.00] 3259 ; BTVER2-NEXT: retq # sched: [4:1.00] 3260 ; 3261 ; ZNVER1-SSE-LABEL: test_movntps: 3262 ; ZNVER1-SSE: # %bb.0: 3263 ; ZNVER1-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:0.50] 3264 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3265 ; 3266 ; ZNVER1-LABEL: test_movntps: 3267 ; ZNVER1: # %bb.0: 3268 ; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50] 3269 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3270 store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0 3271 ret void 3272 } 3273 3274 define void @test_movss_mem(float* %a0, float* %a1) { 3275 ; GENERIC-LABEL: test_movss_mem: 3276 ; GENERIC: # %bb.0: 3277 ; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 3278 ; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] 3279 ; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] 3280 ; GENERIC-NEXT: retq # sched: [1:1.00] 3281 ; 3282 ; ATOM-LABEL: test_movss_mem: 3283 ; ATOM: # %bb.0: 3284 ; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00] 3285 ; ATOM-NEXT: addss %xmm0, %xmm0 # sched: [5:5.00] 3286 ; ATOM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] 3287 ; ATOM-NEXT: retq # sched: [79:39.50] 3288 ; 3289 ; SLM-LABEL: test_movss_mem: 3290 ; SLM: # %bb.0: 3291 ; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00] 3292 ; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] 3293 ; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] 3294 ; SLM-NEXT: retq # sched: [4:1.00] 3295 ; 3296 ; SANDY-SSE-LABEL: test_movss_mem: 3297 ; SANDY-SSE: # %bb.0: 3298 ; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 3299 ; SANDY-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] 3300 ; SANDY-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] 3301 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3302 ; 3303 ; SANDY-LABEL: test_movss_mem: 3304 ; SANDY: # %bb.0: 3305 ; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 3306 ; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3307 ; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] 3308 ; SANDY-NEXT: retq # sched: [1:1.00] 3309 ; 3310 ; HASWELL-SSE-LABEL: test_movss_mem: 3311 ; HASWELL-SSE: # %bb.0: 3312 ; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 3313 ; HASWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] 3314 ; HASWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] 3315 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3316 ; 3317 ; HASWELL-LABEL: test_movss_mem: 3318 ; HASWELL: # %bb.0: 3319 ; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 3320 ; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3321 ; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] 3322 ; HASWELL-NEXT: retq # sched: [7:1.00] 3323 ; 3324 ; BROADWELL-SSE-LABEL: test_movss_mem: 3325 ; BROADWELL-SSE: # %bb.0: 3326 ; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 3327 ; BROADWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] 3328 ; BROADWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] 3329 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3330 ; 3331 ; BROADWELL-LABEL: test_movss_mem: 3332 ; BROADWELL: # %bb.0: 3333 ; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 3334 ; BROADWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3335 ; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] 3336 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3337 ; 3338 ; SKYLAKE-SSE-LABEL: test_movss_mem: 3339 ; SKYLAKE-SSE: # %bb.0: 3340 ; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 3341 ; SKYLAKE-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50] 3342 ; SKYLAKE-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] 3343 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3344 ; 3345 ; SKYLAKE-LABEL: test_movss_mem: 3346 ; SKYLAKE: # %bb.0: 3347 ; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 3348 ; SKYLAKE-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 3349 ; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] 3350 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3351 ; 3352 ; SKX-SSE-LABEL: test_movss_mem: 3353 ; SKX-SSE: # %bb.0: 3354 ; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 3355 ; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50] 3356 ; SKX-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] 3357 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3358 ; 3359 ; SKX-LABEL: test_movss_mem: 3360 ; SKX: # %bb.0: 3361 ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 3362 ; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 3363 ; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] 3364 ; SKX-NEXT: retq # sched: [7:1.00] 3365 ; 3366 ; BTVER2-SSE-LABEL: test_movss_mem: 3367 ; BTVER2-SSE: # %bb.0: 3368 ; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] 3369 ; BTVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] 3370 ; BTVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [2:1.00] 3371 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3372 ; 3373 ; BTVER2-LABEL: test_movss_mem: 3374 ; BTVER2: # %bb.0: 3375 ; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] 3376 ; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3377 ; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [2:1.00] 3378 ; BTVER2-NEXT: retq # sched: [4:1.00] 3379 ; 3380 ; ZNVER1-SSE-LABEL: test_movss_mem: 3381 ; ZNVER1-SSE: # %bb.0: 3382 ; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] 3383 ; ZNVER1-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] 3384 ; ZNVER1-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:0.50] 3385 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3386 ; 3387 ; ZNVER1-LABEL: test_movss_mem: 3388 ; ZNVER1: # %bb.0: 3389 ; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] 3390 ; ZNVER1-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3391 ; ZNVER1-NEXT: vmovss %xmm0, (%rsi) # sched: [1:0.50] 3392 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3393 %1 = load float, float* %a0, align 1 3394 %2 = fadd float %1, %1 3395 store float %2, float *%a1, align 1 3396 ret void 3397 } 3398 3399 define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { 3400 ; GENERIC-LABEL: test_movss_reg: 3401 ; GENERIC: # %bb.0: 3402 ; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] 3403 ; GENERIC-NEXT: retq # sched: [1:1.00] 3404 ; 3405 ; ATOM-LABEL: test_movss_reg: 3406 ; ATOM: # %bb.0: 3407 ; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] 3408 ; ATOM-NEXT: nop # sched: [1:0.50] 3409 ; ATOM-NEXT: nop # sched: [1:0.50] 3410 ; ATOM-NEXT: nop # sched: [1:0.50] 3411 ; ATOM-NEXT: nop # sched: [1:0.50] 3412 ; ATOM-NEXT: nop # sched: [1:0.50] 3413 ; ATOM-NEXT: nop # sched: [1:0.50] 3414 ; ATOM-NEXT: retq # sched: [79:39.50] 3415 ; 3416 ; SLM-LABEL: test_movss_reg: 3417 ; SLM: # %bb.0: 3418 ; SLM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] 3419 ; SLM-NEXT: retq # sched: [4:1.00] 3420 ; 3421 ; SANDY-SSE-LABEL: test_movss_reg: 3422 ; SANDY-SSE: # %bb.0: 3423 ; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] 3424 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3425 ; 3426 ; SANDY-LABEL: test_movss_reg: 3427 ; SANDY: # %bb.0: 3428 ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] 3429 ; SANDY-NEXT: retq # sched: [1:1.00] 3430 ; 3431 ; HASWELL-SSE-LABEL: test_movss_reg: 3432 ; HASWELL-SSE: # %bb.0: 3433 ; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] 3434 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3435 ; 3436 ; HASWELL-LABEL: test_movss_reg: 3437 ; HASWELL: # %bb.0: 3438 ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] 3439 ; HASWELL-NEXT: retq # sched: [7:1.00] 3440 ; 3441 ; BROADWELL-SSE-LABEL: test_movss_reg: 3442 ; BROADWELL-SSE: # %bb.0: 3443 ; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] 3444 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3445 ; 3446 ; BROADWELL-LABEL: test_movss_reg: 3447 ; BROADWELL: # %bb.0: 3448 ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] 3449 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3450 ; 3451 ; SKYLAKE-SSE-LABEL: test_movss_reg: 3452 ; SKYLAKE-SSE: # %bb.0: 3453 ; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] 3454 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3455 ; 3456 ; SKYLAKE-LABEL: test_movss_reg: 3457 ; SKYLAKE: # %bb.0: 3458 ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] 3459 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3460 ; 3461 ; SKX-SSE-LABEL: test_movss_reg: 3462 ; SKX-SSE: # %bb.0: 3463 ; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] 3464 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3465 ; 3466 ; SKX-LABEL: test_movss_reg: 3467 ; SKX: # %bb.0: 3468 ; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] 3469 ; SKX-NEXT: retq # sched: [7:1.00] 3470 ; 3471 ; BTVER2-SSE-LABEL: test_movss_reg: 3472 ; BTVER2-SSE: # %bb.0: 3473 ; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] 3474 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3475 ; 3476 ; BTVER2-LABEL: test_movss_reg: 3477 ; BTVER2: # %bb.0: 3478 ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] 3479 ; BTVER2-NEXT: retq # sched: [4:1.00] 3480 ; 3481 ; ZNVER1-SSE-LABEL: test_movss_reg: 3482 ; ZNVER1-SSE: # %bb.0: 3483 ; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] 3484 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3485 ; 3486 ; ZNVER1-LABEL: test_movss_reg: 3487 ; ZNVER1: # %bb.0: 3488 ; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] 3489 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3490 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 3491 ret <4 x float> %1 3492 } 3493 3494 define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { 3495 ; GENERIC-LABEL: test_movups: 3496 ; GENERIC: # %bb.0: 3497 ; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] 3498 ; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 3499 ; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] 3500 ; GENERIC-NEXT: retq # sched: [1:1.00] 3501 ; 3502 ; ATOM-LABEL: test_movups: 3503 ; ATOM: # %bb.0: 3504 ; ATOM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.50] 3505 ; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00] 3506 ; ATOM-NEXT: movups %xmm0, (%rsi) # sched: [2:1.00] 3507 ; ATOM-NEXT: retq # sched: [79:39.50] 3508 ; 3509 ; SLM-LABEL: test_movups: 3510 ; SLM: # %bb.0: 3511 ; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00] 3512 ; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 3513 ; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] 3514 ; SLM-NEXT: retq # sched: [4:1.00] 3515 ; 3516 ; SANDY-SSE-LABEL: test_movups: 3517 ; SANDY-SSE: # %bb.0: 3518 ; SANDY-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] 3519 ; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 3520 ; SANDY-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] 3521 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3522 ; 3523 ; SANDY-LABEL: test_movups: 3524 ; SANDY: # %bb.0: 3525 ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] 3526 ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3527 ; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] 3528 ; SANDY-NEXT: retq # sched: [1:1.00] 3529 ; 3530 ; HASWELL-SSE-LABEL: test_movups: 3531 ; HASWELL-SSE: # %bb.0: 3532 ; HASWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] 3533 ; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 3534 ; HASWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] 3535 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3536 ; 3537 ; HASWELL-LABEL: test_movups: 3538 ; HASWELL: # %bb.0: 3539 ; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] 3540 ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3541 ; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] 3542 ; HASWELL-NEXT: retq # sched: [7:1.00] 3543 ; 3544 ; BROADWELL-SSE-LABEL: test_movups: 3545 ; BROADWELL-SSE: # %bb.0: 3546 ; BROADWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:0.50] 3547 ; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 3548 ; BROADWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] 3549 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3550 ; 3551 ; BROADWELL-LABEL: test_movups: 3552 ; BROADWELL: # %bb.0: 3553 ; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50] 3554 ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3555 ; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] 3556 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3557 ; 3558 ; SKYLAKE-SSE-LABEL: test_movups: 3559 ; SKYLAKE-SSE: # %bb.0: 3560 ; SKYLAKE-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] 3561 ; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] 3562 ; SKYLAKE-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] 3563 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3564 ; 3565 ; SKYLAKE-LABEL: test_movups: 3566 ; SKYLAKE: # %bb.0: 3567 ; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] 3568 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 3569 ; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] 3570 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3571 ; 3572 ; SKX-SSE-LABEL: test_movups: 3573 ; SKX-SSE: # %bb.0: 3574 ; SKX-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] 3575 ; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] 3576 ; SKX-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] 3577 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3578 ; 3579 ; SKX-LABEL: test_movups: 3580 ; SKX: # %bb.0: 3581 ; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] 3582 ; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 3583 ; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] 3584 ; SKX-NEXT: retq # sched: [7:1.00] 3585 ; 3586 ; BTVER2-SSE-LABEL: test_movups: 3587 ; BTVER2-SSE: # %bb.0: 3588 ; BTVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:1.00] 3589 ; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 3590 ; BTVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] 3591 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3592 ; 3593 ; BTVER2-LABEL: test_movups: 3594 ; BTVER2: # %bb.0: 3595 ; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00] 3596 ; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3597 ; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] 3598 ; BTVER2-NEXT: retq # sched: [4:1.00] 3599 ; 3600 ; ZNVER1-SSE-LABEL: test_movups: 3601 ; ZNVER1-SSE: # %bb.0: 3602 ; ZNVER1-SSE-NEXT: movups (%rdi), %xmm0 # sched: [8:0.50] 3603 ; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] 3604 ; ZNVER1-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:0.50] 3605 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3606 ; 3607 ; ZNVER1-LABEL: test_movups: 3608 ; ZNVER1: # %bb.0: 3609 ; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50] 3610 ; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 3611 ; ZNVER1-NEXT: vmovups %xmm0, (%rsi) # sched: [1:0.50] 3612 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3613 %1 = load <4 x float>, <4 x float> *%a0, align 1 3614 %2 = fadd <4 x float> %1, %1 3615 store <4 x float> %2, <4 x float> *%a1, align 1 3616 ret void 3617 } 3618 3619 define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 3620 ; GENERIC-LABEL: test_mulps: 3621 ; GENERIC: # %bb.0: 3622 ; GENERIC-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00] 3623 ; GENERIC-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00] 3624 ; GENERIC-NEXT: retq # sched: [1:1.00] 3625 ; 3626 ; ATOM-LABEL: test_mulps: 3627 ; ATOM: # %bb.0: 3628 ; ATOM-NEXT: mulps %xmm1, %xmm0 # sched: [5:5.00] 3629 ; ATOM-NEXT: mulps (%rdi), %xmm0 # sched: [5:5.00] 3630 ; ATOM-NEXT: retq # sched: [79:39.50] 3631 ; 3632 ; SLM-LABEL: test_mulps: 3633 ; SLM: # %bb.0: 3634 ; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00] 3635 ; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00] 3636 ; SLM-NEXT: retq # sched: [4:1.00] 3637 ; 3638 ; SANDY-SSE-LABEL: test_mulps: 3639 ; SANDY-SSE: # %bb.0: 3640 ; SANDY-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00] 3641 ; SANDY-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00] 3642 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3643 ; 3644 ; SANDY-LABEL: test_mulps: 3645 ; SANDY: # %bb.0: 3646 ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 3647 ; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 3648 ; SANDY-NEXT: retq # sched: [1:1.00] 3649 ; 3650 ; HASWELL-SSE-LABEL: test_mulps: 3651 ; HASWELL-SSE: # %bb.0: 3652 ; HASWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:0.50] 3653 ; HASWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:0.50] 3654 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3655 ; 3656 ; HASWELL-LABEL: test_mulps: 3657 ; HASWELL: # %bb.0: 3658 ; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] 3659 ; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:0.50] 3660 ; HASWELL-NEXT: retq # sched: [7:1.00] 3661 ; 3662 ; BROADWELL-SSE-LABEL: test_mulps: 3663 ; BROADWELL-SSE: # %bb.0: 3664 ; BROADWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50] 3665 ; BROADWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [8:0.50] 3666 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3667 ; 3668 ; BROADWELL-LABEL: test_mulps: 3669 ; BROADWELL: # %bb.0: 3670 ; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50] 3671 ; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 3672 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3673 ; 3674 ; SKYLAKE-SSE-LABEL: test_mulps: 3675 ; SKYLAKE-SSE: # %bb.0: 3676 ; SKYLAKE-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50] 3677 ; SKYLAKE-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] 3678 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3679 ; 3680 ; SKYLAKE-LABEL: test_mulps: 3681 ; SKYLAKE: # %bb.0: 3682 ; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3683 ; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 3684 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3685 ; 3686 ; SKX-SSE-LABEL: test_mulps: 3687 ; SKX-SSE: # %bb.0: 3688 ; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50] 3689 ; SKX-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] 3690 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3691 ; 3692 ; SKX-LABEL: test_mulps: 3693 ; SKX: # %bb.0: 3694 ; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3695 ; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 3696 ; SKX-NEXT: retq # sched: [7:1.00] 3697 ; 3698 ; BTVER2-SSE-LABEL: test_mulps: 3699 ; BTVER2-SSE: # %bb.0: 3700 ; BTVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [2:1.00] 3701 ; BTVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [7:1.00] 3702 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3703 ; 3704 ; BTVER2-LABEL: test_mulps: 3705 ; BTVER2: # %bb.0: 3706 ; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 3707 ; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3708 ; BTVER2-NEXT: retq # sched: [4:1.00] 3709 ; 3710 ; ZNVER1-SSE-LABEL: test_mulps: 3711 ; ZNVER1-SSE: # %bb.0: 3712 ; ZNVER1-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50] 3713 ; ZNVER1-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] 3714 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3715 ; 3716 ; ZNVER1-LABEL: test_mulps: 3717 ; ZNVER1: # %bb.0: 3718 ; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50] 3719 ; ZNVER1-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 3720 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3721 %1 = fmul <4 x float> %a0, %a1 3722 %2 = load <4 x float>, <4 x float> *%a2, align 16 3723 %3 = fmul <4 x float> %1, %2 3724 ret <4 x float> %3 3725 } 3726 3727 define float @test_mulss(float %a0, float %a1, float *%a2) { 3728 ; GENERIC-LABEL: test_mulss: 3729 ; GENERIC: # %bb.0: 3730 ; GENERIC-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00] 3731 ; GENERIC-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00] 3732 ; GENERIC-NEXT: retq # sched: [1:1.00] 3733 ; 3734 ; ATOM-LABEL: test_mulss: 3735 ; ATOM: # %bb.0: 3736 ; ATOM-NEXT: mulss %xmm1, %xmm0 # sched: [4:4.00] 3737 ; ATOM-NEXT: mulss (%rdi), %xmm0 # sched: [4:4.00] 3738 ; ATOM-NEXT: retq # sched: [79:39.50] 3739 ; 3740 ; SLM-LABEL: test_mulss: 3741 ; SLM: # %bb.0: 3742 ; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00] 3743 ; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00] 3744 ; SLM-NEXT: retq # sched: [4:1.00] 3745 ; 3746 ; SANDY-SSE-LABEL: test_mulss: 3747 ; SANDY-SSE: # %bb.0: 3748 ; SANDY-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00] 3749 ; SANDY-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00] 3750 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3751 ; 3752 ; SANDY-LABEL: test_mulss: 3753 ; SANDY: # %bb.0: 3754 ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 3755 ; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 3756 ; SANDY-NEXT: retq # sched: [1:1.00] 3757 ; 3758 ; HASWELL-SSE-LABEL: test_mulss: 3759 ; HASWELL-SSE: # %bb.0: 3760 ; HASWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:0.50] 3761 ; HASWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50] 3762 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3763 ; 3764 ; HASWELL-LABEL: test_mulss: 3765 ; HASWELL: # %bb.0: 3766 ; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] 3767 ; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 3768 ; HASWELL-NEXT: retq # sched: [7:1.00] 3769 ; 3770 ; BROADWELL-SSE-LABEL: test_mulss: 3771 ; BROADWELL-SSE: # %bb.0: 3772 ; BROADWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50] 3773 ; BROADWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [8:0.50] 3774 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3775 ; 3776 ; BROADWELL-LABEL: test_mulss: 3777 ; BROADWELL: # %bb.0: 3778 ; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50] 3779 ; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 3780 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3781 ; 3782 ; SKYLAKE-SSE-LABEL: test_mulss: 3783 ; SKYLAKE-SSE: # %bb.0: 3784 ; SKYLAKE-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50] 3785 ; SKYLAKE-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50] 3786 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3787 ; 3788 ; SKYLAKE-LABEL: test_mulss: 3789 ; SKYLAKE: # %bb.0: 3790 ; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3791 ; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 3792 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3793 ; 3794 ; SKX-SSE-LABEL: test_mulss: 3795 ; SKX-SSE: # %bb.0: 3796 ; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50] 3797 ; SKX-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50] 3798 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3799 ; 3800 ; SKX-LABEL: test_mulss: 3801 ; SKX: # %bb.0: 3802 ; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3803 ; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 3804 ; SKX-NEXT: retq # sched: [7:1.00] 3805 ; 3806 ; BTVER2-SSE-LABEL: test_mulss: 3807 ; BTVER2-SSE: # %bb.0: 3808 ; BTVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [2:1.00] 3809 ; BTVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [7:1.00] 3810 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3811 ; 3812 ; BTVER2-LABEL: test_mulss: 3813 ; BTVER2: # %bb.0: 3814 ; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 3815 ; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3816 ; BTVER2-NEXT: retq # sched: [4:1.00] 3817 ; 3818 ; ZNVER1-SSE-LABEL: test_mulss: 3819 ; ZNVER1-SSE: # %bb.0: 3820 ; ZNVER1-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50] 3821 ; ZNVER1-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50] 3822 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3823 ; 3824 ; ZNVER1-LABEL: test_mulss: 3825 ; ZNVER1: # %bb.0: 3826 ; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50] 3827 ; ZNVER1-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 3828 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3829 %1 = fmul float %a0, %a1 3830 %2 = load float, float *%a2, align 4 3831 %3 = fmul float %1, %2 3832 ret float %3 3833 } 3834 3835 define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 3836 ; GENERIC-LABEL: test_orps: 3837 ; GENERIC: # %bb.0: 3838 ; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] 3839 ; GENERIC-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] 3840 ; GENERIC-NEXT: retq # sched: [1:1.00] 3841 ; 3842 ; ATOM-LABEL: test_orps: 3843 ; ATOM: # %bb.0: 3844 ; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] 3845 ; ATOM-NEXT: orps (%rdi), %xmm0 # sched: [1:1.00] 3846 ; ATOM-NEXT: nop # sched: [1:0.50] 3847 ; ATOM-NEXT: nop # sched: [1:0.50] 3848 ; ATOM-NEXT: nop # sched: [1:0.50] 3849 ; ATOM-NEXT: nop # sched: [1:0.50] 3850 ; ATOM-NEXT: retq # sched: [79:39.50] 3851 ; 3852 ; SLM-LABEL: test_orps: 3853 ; SLM: # %bb.0: 3854 ; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] 3855 ; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00] 3856 ; SLM-NEXT: retq # sched: [4:1.00] 3857 ; 3858 ; SANDY-SSE-LABEL: test_orps: 3859 ; SANDY-SSE: # %bb.0: 3860 ; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] 3861 ; SANDY-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] 3862 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3863 ; 3864 ; SANDY-LABEL: test_orps: 3865 ; SANDY: # %bb.0: 3866 ; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3867 ; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3868 ; SANDY-NEXT: retq # sched: [1:1.00] 3869 ; 3870 ; HASWELL-SSE-LABEL: test_orps: 3871 ; HASWELL-SSE: # %bb.0: 3872 ; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] 3873 ; HASWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] 3874 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3875 ; 3876 ; HASWELL-LABEL: test_orps: 3877 ; HASWELL: # %bb.0: 3878 ; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3879 ; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3880 ; HASWELL-NEXT: retq # sched: [7:1.00] 3881 ; 3882 ; BROADWELL-SSE-LABEL: test_orps: 3883 ; BROADWELL-SSE: # %bb.0: 3884 ; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] 3885 ; BROADWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00] 3886 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3887 ; 3888 ; BROADWELL-LABEL: test_orps: 3889 ; BROADWELL: # %bb.0: 3890 ; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 3891 ; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 3892 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3893 ; 3894 ; SKYLAKE-SSE-LABEL: test_orps: 3895 ; SKYLAKE-SSE: # %bb.0: 3896 ; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] 3897 ; SKYLAKE-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50] 3898 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3899 ; 3900 ; SKYLAKE-LABEL: test_orps: 3901 ; SKYLAKE: # %bb.0: 3902 ; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3903 ; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3904 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3905 ; 3906 ; SKX-SSE-LABEL: test_orps: 3907 ; SKX-SSE: # %bb.0: 3908 ; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] 3909 ; SKX-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50] 3910 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3911 ; 3912 ; SKX-LABEL: test_orps: 3913 ; SKX: # %bb.0: 3914 ; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 3915 ; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 3916 ; SKX-NEXT: retq # sched: [7:1.00] 3917 ; 3918 ; BTVER2-SSE-LABEL: test_orps: 3919 ; BTVER2-SSE: # %bb.0: 3920 ; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] 3921 ; BTVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00] 3922 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3923 ; 3924 ; BTVER2-LABEL: test_orps: 3925 ; BTVER2: # %bb.0: 3926 ; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 3927 ; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 3928 ; BTVER2-NEXT: retq # sched: [4:1.00] 3929 ; 3930 ; ZNVER1-SSE-LABEL: test_orps: 3931 ; ZNVER1-SSE: # %bb.0: 3932 ; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25] 3933 ; ZNVER1-SSE-NEXT: orps (%rdi), %xmm0 # sched: [8:0.50] 3934 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3935 ; 3936 ; ZNVER1-LABEL: test_orps: 3937 ; ZNVER1: # %bb.0: 3938 ; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 3939 ; ZNVER1-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 3940 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3941 %1 = bitcast <4 x float> %a0 to <4 x i32> 3942 %2 = bitcast <4 x float> %a1 to <4 x i32> 3943 %3 = or <4 x i32> %1, %2 3944 %4 = load <4 x float>, <4 x float> *%a2, align 16 3945 %5 = bitcast <4 x float> %4 to <4 x i32> 3946 %6 = or <4 x i32> %3, %5 3947 %7 = bitcast <4 x i32> %6 to <4 x float> 3948 ret <4 x float> %7 3949 } 3950 3951 define void @test_prefetch(i8* %a0) optsize { 3952 ; GENERIC-LABEL: test_prefetch: 3953 ; GENERIC: # %bb.0: 3954 ; GENERIC-NEXT: #APP 3955 ; GENERIC-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 3956 ; GENERIC-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 3957 ; GENERIC-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 3958 ; GENERIC-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 3959 ; GENERIC-NEXT: #NO_APP 3960 ; GENERIC-NEXT: retq # sched: [1:1.00] 3961 ; 3962 ; ATOM-LABEL: test_prefetch: 3963 ; ATOM: # %bb.0: 3964 ; ATOM-NEXT: #APP 3965 ; ATOM-NEXT: prefetchnta (%rdi) # sched: [1:1.00] 3966 ; ATOM-NEXT: prefetcht0 (%rdi) # sched: [1:1.00] 3967 ; ATOM-NEXT: prefetcht1 (%rdi) # sched: [1:1.00] 3968 ; ATOM-NEXT: prefetcht2 (%rdi) # sched: [1:1.00] 3969 ; ATOM-NEXT: #NO_APP 3970 ; ATOM-NEXT: retq # sched: [79:39.50] 3971 ; 3972 ; SLM-LABEL: test_prefetch: 3973 ; SLM: # %bb.0: 3974 ; SLM-NEXT: #APP 3975 ; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00] 3976 ; SLM-NEXT: prefetcht0 (%rdi) # sched: [3:1.00] 3977 ; SLM-NEXT: prefetcht1 (%rdi) # sched: [3:1.00] 3978 ; SLM-NEXT: prefetcht2 (%rdi) # sched: [3:1.00] 3979 ; SLM-NEXT: #NO_APP 3980 ; SLM-NEXT: retq # sched: [4:1.00] 3981 ; 3982 ; SANDY-SSE-LABEL: test_prefetch: 3983 ; SANDY-SSE: # %bb.0: 3984 ; SANDY-SSE-NEXT: #APP 3985 ; SANDY-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 3986 ; SANDY-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 3987 ; SANDY-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 3988 ; SANDY-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 3989 ; SANDY-SSE-NEXT: #NO_APP 3990 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3991 ; 3992 ; SANDY-LABEL: test_prefetch: 3993 ; SANDY: # %bb.0: 3994 ; SANDY-NEXT: #APP 3995 ; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 3996 ; SANDY-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 3997 ; SANDY-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 3998 ; SANDY-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 3999 ; SANDY-NEXT: #NO_APP 4000 ; SANDY-NEXT: retq # sched: [1:1.00] 4001 ; 4002 ; HASWELL-SSE-LABEL: test_prefetch: 4003 ; HASWELL-SSE: # %bb.0: 4004 ; HASWELL-SSE-NEXT: #APP 4005 ; HASWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 4006 ; HASWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 4007 ; HASWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 4008 ; HASWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 4009 ; HASWELL-SSE-NEXT: #NO_APP 4010 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4011 ; 4012 ; HASWELL-LABEL: test_prefetch: 4013 ; HASWELL: # %bb.0: 4014 ; HASWELL-NEXT: #APP 4015 ; HASWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 4016 ; HASWELL-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 4017 ; HASWELL-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 4018 ; HASWELL-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 4019 ; HASWELL-NEXT: #NO_APP 4020 ; HASWELL-NEXT: retq # sched: [7:1.00] 4021 ; 4022 ; BROADWELL-SSE-LABEL: test_prefetch: 4023 ; BROADWELL-SSE: # %bb.0: 4024 ; BROADWELL-SSE-NEXT: #APP 4025 ; BROADWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 4026 ; BROADWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 4027 ; BROADWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 4028 ; BROADWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 4029 ; BROADWELL-SSE-NEXT: #NO_APP 4030 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4031 ; 4032 ; BROADWELL-LABEL: test_prefetch: 4033 ; BROADWELL: # %bb.0: 4034 ; BROADWELL-NEXT: #APP 4035 ; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 4036 ; BROADWELL-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 4037 ; BROADWELL-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 4038 ; BROADWELL-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 4039 ; BROADWELL-NEXT: #NO_APP 4040 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4041 ; 4042 ; SKYLAKE-SSE-LABEL: test_prefetch: 4043 ; SKYLAKE-SSE: # %bb.0: 4044 ; SKYLAKE-SSE-NEXT: #APP 4045 ; SKYLAKE-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 4046 ; SKYLAKE-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 4047 ; SKYLAKE-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 4048 ; SKYLAKE-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 4049 ; SKYLAKE-SSE-NEXT: #NO_APP 4050 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4051 ; 4052 ; SKYLAKE-LABEL: test_prefetch: 4053 ; SKYLAKE: # %bb.0: 4054 ; SKYLAKE-NEXT: #APP 4055 ; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 4056 ; SKYLAKE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 4057 ; SKYLAKE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 4058 ; SKYLAKE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 4059 ; SKYLAKE-NEXT: #NO_APP 4060 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4061 ; 4062 ; SKX-SSE-LABEL: test_prefetch: 4063 ; SKX-SSE: # %bb.0: 4064 ; SKX-SSE-NEXT: #APP 4065 ; SKX-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 4066 ; SKX-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 4067 ; SKX-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 4068 ; SKX-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 4069 ; SKX-SSE-NEXT: #NO_APP 4070 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4071 ; 4072 ; SKX-LABEL: test_prefetch: 4073 ; SKX: # %bb.0: 4074 ; SKX-NEXT: #APP 4075 ; SKX-NEXT: prefetchnta (%rdi) # sched: [5:0.50] 4076 ; SKX-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] 4077 ; SKX-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] 4078 ; SKX-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] 4079 ; SKX-NEXT: #NO_APP 4080 ; SKX-NEXT: retq # sched: [7:1.00] 4081 ; 4082 ; BTVER2-SSE-LABEL: test_prefetch: 4083 ; BTVER2-SSE: # %bb.0: 4084 ; BTVER2-SSE-NEXT: #APP 4085 ; BTVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [5:1.00] 4086 ; BTVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:1.00] 4087 ; BTVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:1.00] 4088 ; BTVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:1.00] 4089 ; BTVER2-SSE-NEXT: #NO_APP 4090 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4091 ; 4092 ; BTVER2-LABEL: test_prefetch: 4093 ; BTVER2: # %bb.0: 4094 ; BTVER2-NEXT: #APP 4095 ; BTVER2-NEXT: prefetchnta (%rdi) # sched: [5:1.00] 4096 ; BTVER2-NEXT: prefetcht0 (%rdi) # sched: [5:1.00] 4097 ; BTVER2-NEXT: prefetcht1 (%rdi) # sched: [5:1.00] 4098 ; BTVER2-NEXT: prefetcht2 (%rdi) # sched: [5:1.00] 4099 ; BTVER2-NEXT: #NO_APP 4100 ; BTVER2-NEXT: retq # sched: [4:1.00] 4101 ; 4102 ; ZNVER1-SSE-LABEL: test_prefetch: 4103 ; ZNVER1-SSE: # %bb.0: 4104 ; ZNVER1-SSE-NEXT: #APP 4105 ; ZNVER1-SSE-NEXT: prefetchnta (%rdi) # sched: [8:0.50] 4106 ; ZNVER1-SSE-NEXT: prefetcht0 (%rdi) # sched: [8:0.50] 4107 ; ZNVER1-SSE-NEXT: prefetcht1 (%rdi) # sched: [8:0.50] 4108 ; ZNVER1-SSE-NEXT: prefetcht2 (%rdi) # sched: [8:0.50] 4109 ; ZNVER1-SSE-NEXT: #NO_APP 4110 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4111 ; 4112 ; ZNVER1-LABEL: test_prefetch: 4113 ; ZNVER1: # %bb.0: 4114 ; ZNVER1-NEXT: #APP 4115 ; ZNVER1-NEXT: prefetchnta (%rdi) # sched: [8:0.50] 4116 ; ZNVER1-NEXT: prefetcht0 (%rdi) # sched: [8:0.50] 4117 ; ZNVER1-NEXT: prefetcht1 (%rdi) # sched: [8:0.50] 4118 ; ZNVER1-NEXT: prefetcht2 (%rdi) # sched: [8:0.50] 4119 ; ZNVER1-NEXT: #NO_APP 4120 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4121 call void asm sideeffect "prefetchnta $0 \0A\09 prefetcht0 $0 \0A\09 prefetcht1 $0 \0A\09 prefetcht2 $0", "*m"(i8 *%a0) 4122 ret void 4123 } 4124 4125 define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { 4126 ; GENERIC-LABEL: test_rcpps: 4127 ; GENERIC: # %bb.0: 4128 ; GENERIC-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] 4129 ; GENERIC-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] 4130 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4131 ; GENERIC-NEXT: retq # sched: [1:1.00] 4132 ; 4133 ; ATOM-LABEL: test_rcpps: 4134 ; ATOM: # %bb.0: 4135 ; ATOM-NEXT: rcpps (%rdi), %xmm1 # sched: [10:5.00] 4136 ; ATOM-NEXT: rcpps %xmm0, %xmm0 # sched: [9:4.50] 4137 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] 4138 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 4139 ; ATOM-NEXT: retq # sched: [79:39.50] 4140 ; 4141 ; SLM-LABEL: test_rcpps: 4142 ; SLM: # %bb.0: 4143 ; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00] 4144 ; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00] 4145 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 4146 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 4147 ; SLM-NEXT: retq # sched: [4:1.00] 4148 ; 4149 ; SANDY-SSE-LABEL: test_rcpps: 4150 ; SANDY-SSE: # %bb.0: 4151 ; SANDY-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] 4152 ; SANDY-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] 4153 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4154 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4155 ; 4156 ; SANDY-LABEL: test_rcpps: 4157 ; SANDY: # %bb.0: 4158 ; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] 4159 ; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00] 4160 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4161 ; SANDY-NEXT: retq # sched: [1:1.00] 4162 ; 4163 ; HASWELL-SSE-LABEL: test_rcpps: 4164 ; HASWELL-SSE: # %bb.0: 4165 ; HASWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] 4166 ; HASWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] 4167 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4168 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4169 ; 4170 ; HASWELL-LABEL: test_rcpps: 4171 ; HASWELL: # %bb.0: 4172 ; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] 4173 ; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00] 4174 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4175 ; HASWELL-NEXT: retq # sched: [7:1.00] 4176 ; 4177 ; BROADWELL-SSE-LABEL: test_rcpps: 4178 ; BROADWELL-SSE: # %bb.0: 4179 ; BROADWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] 4180 ; BROADWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] 4181 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4182 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4183 ; 4184 ; BROADWELL-LABEL: test_rcpps: 4185 ; BROADWELL: # %bb.0: 4186 ; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] 4187 ; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] 4188 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4189 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4190 ; 4191 ; SKYLAKE-SSE-LABEL: test_rcpps: 4192 ; SKYLAKE-SSE: # %bb.0: 4193 ; SKYLAKE-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00] 4194 ; SKYLAKE-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] 4195 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4196 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4197 ; 4198 ; SKYLAKE-LABEL: test_rcpps: 4199 ; SKYLAKE: # %bb.0: 4200 ; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] 4201 ; SKYLAKE-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] 4202 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4203 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4204 ; 4205 ; SKX-SSE-LABEL: test_rcpps: 4206 ; SKX-SSE: # %bb.0: 4207 ; SKX-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00] 4208 ; SKX-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] 4209 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4210 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4211 ; 4212 ; SKX-LABEL: test_rcpps: 4213 ; SKX: # %bb.0: 4214 ; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] 4215 ; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] 4216 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4217 ; SKX-NEXT: retq # sched: [7:1.00] 4218 ; 4219 ; BTVER2-SSE-LABEL: test_rcpps: 4220 ; BTVER2-SSE: # %bb.0: 4221 ; BTVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [2:1.00] 4222 ; BTVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [7:1.00] 4223 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4224 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4225 ; 4226 ; BTVER2-LABEL: test_rcpps: 4227 ; BTVER2: # %bb.0: 4228 ; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00] 4229 ; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00] 4230 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4231 ; BTVER2-NEXT: retq # sched: [4:1.00] 4232 ; 4233 ; ZNVER1-SSE-LABEL: test_rcpps: 4234 ; ZNVER1-SSE: # %bb.0: 4235 ; ZNVER1-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:0.50] 4236 ; ZNVER1-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [12:0.50] 4237 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4238 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4239 ; 4240 ; ZNVER1-LABEL: test_rcpps: 4241 ; ZNVER1: # %bb.0: 4242 ; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50] 4243 ; ZNVER1-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:0.50] 4244 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4245 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4246 %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 4247 %2 = load <4 x float>, <4 x float> *%a1, align 16 4248 %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2) 4249 %4 = fadd <4 x float> %1, %3 4250 ret <4 x float> %4 4251 } 4252 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 4253 4254 ; TODO - rcpss_m 4255 4256 define <4 x float> @test_rcpss(float %a0, float *%a1) { 4257 ; GENERIC-LABEL: test_rcpss: 4258 ; GENERIC: # %bb.0: 4259 ; GENERIC-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] 4260 ; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] 4261 ; GENERIC-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] 4262 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4263 ; GENERIC-NEXT: retq # sched: [1:1.00] 4264 ; 4265 ; ATOM-LABEL: test_rcpss: 4266 ; ATOM: # %bb.0: 4267 ; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] 4268 ; ATOM-NEXT: rcpss %xmm0, %xmm0 # sched: [4:4.00] 4269 ; ATOM-NEXT: rcpss %xmm1, %xmm1 # sched: [4:4.00] 4270 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 4271 ; ATOM-NEXT: retq # sched: [79:39.50] 4272 ; 4273 ; SLM-LABEL: test_rcpss: 4274 ; SLM: # %bb.0: 4275 ; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00] 4276 ; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] 4277 ; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] 4278 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4279 ; SLM-NEXT: retq # sched: [4:1.00] 4280 ; 4281 ; SANDY-SSE-LABEL: test_rcpss: 4282 ; SANDY-SSE: # %bb.0: 4283 ; SANDY-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] 4284 ; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] 4285 ; SANDY-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] 4286 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4287 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4288 ; 4289 ; SANDY-LABEL: test_rcpss: 4290 ; SANDY: # %bb.0: 4291 ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 4292 ; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] 4293 ; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 4294 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4295 ; SANDY-NEXT: retq # sched: [1:1.00] 4296 ; 4297 ; HASWELL-SSE-LABEL: test_rcpss: 4298 ; HASWELL-SSE: # %bb.0: 4299 ; HASWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] 4300 ; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4301 ; HASWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] 4302 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4303 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4304 ; 4305 ; HASWELL-LABEL: test_rcpss: 4306 ; HASWELL: # %bb.0: 4307 ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 4308 ; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4309 ; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 4310 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4311 ; HASWELL-NEXT: retq # sched: [7:1.00] 4312 ; 4313 ; BROADWELL-SSE-LABEL: test_rcpss: 4314 ; BROADWELL-SSE: # %bb.0: 4315 ; BROADWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] 4316 ; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4317 ; BROADWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] 4318 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4319 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4320 ; 4321 ; BROADWELL-LABEL: test_rcpss: 4322 ; BROADWELL: # %bb.0: 4323 ; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 4324 ; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4325 ; BROADWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 4326 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4327 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4328 ; 4329 ; SKYLAKE-SSE-LABEL: test_rcpss: 4330 ; SKYLAKE-SSE: # %bb.0: 4331 ; SKYLAKE-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00] 4332 ; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4333 ; SKYLAKE-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00] 4334 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4335 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4336 ; 4337 ; SKYLAKE-LABEL: test_rcpss: 4338 ; SKYLAKE: # %bb.0: 4339 ; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 4340 ; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4341 ; SKYLAKE-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] 4342 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4343 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4344 ; 4345 ; SKX-SSE-LABEL: test_rcpss: 4346 ; SKX-SSE: # %bb.0: 4347 ; SKX-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00] 4348 ; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4349 ; SKX-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00] 4350 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4351 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4352 ; 4353 ; SKX-LABEL: test_rcpss: 4354 ; SKX: # %bb.0: 4355 ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 4356 ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4357 ; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] 4358 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4359 ; SKX-NEXT: retq # sched: [7:1.00] 4360 ; 4361 ; BTVER2-SSE-LABEL: test_rcpss: 4362 ; BTVER2-SSE: # %bb.0: 4363 ; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] 4364 ; BTVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [2:1.00] 4365 ; BTVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [2:1.00] 4366 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4367 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4368 ; 4369 ; BTVER2-LABEL: test_rcpss: 4370 ; BTVER2: # %bb.0: 4371 ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] 4372 ; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00] 4373 ; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [2:1.00] 4374 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4375 ; BTVER2-NEXT: retq # sched: [4:1.00] 4376 ; 4377 ; ZNVER1-SSE-LABEL: test_rcpss: 4378 ; ZNVER1-SSE: # %bb.0: 4379 ; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] 4380 ; ZNVER1-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:0.50] 4381 ; ZNVER1-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:0.50] 4382 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4383 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4384 ; 4385 ; ZNVER1-LABEL: test_rcpss: 4386 ; ZNVER1: # %bb.0: 4387 ; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] 4388 ; ZNVER1-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:0.50] 4389 ; ZNVER1-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:0.50] 4390 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4391 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4392 %1 = insertelement <4 x float> undef, float %a0, i32 0 4393 %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1) 4394 %3 = load float, float *%a1, align 4 4395 %4 = insertelement <4 x float> undef, float %3, i32 0 4396 %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) 4397 %6 = fadd <4 x float> %2, %5 4398 ret <4 x float> %6 4399 } 4400 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 4401 4402 define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { 4403 ; GENERIC-LABEL: test_rsqrtps: 4404 ; GENERIC: # %bb.0: 4405 ; GENERIC-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] 4406 ; GENERIC-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] 4407 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4408 ; GENERIC-NEXT: retq # sched: [1:1.00] 4409 ; 4410 ; ATOM-LABEL: test_rsqrtps: 4411 ; ATOM: # %bb.0: 4412 ; ATOM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [10:5.00] 4413 ; ATOM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [9:4.50] 4414 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] 4415 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 4416 ; ATOM-NEXT: retq # sched: [79:39.50] 4417 ; 4418 ; SLM-LABEL: test_rsqrtps: 4419 ; SLM: # %bb.0: 4420 ; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00] 4421 ; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00] 4422 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 4423 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 4424 ; SLM-NEXT: retq # sched: [4:1.00] 4425 ; 4426 ; SANDY-SSE-LABEL: test_rsqrtps: 4427 ; SANDY-SSE: # %bb.0: 4428 ; SANDY-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] 4429 ; SANDY-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] 4430 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4431 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4432 ; 4433 ; SANDY-LABEL: test_rsqrtps: 4434 ; SANDY: # %bb.0: 4435 ; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] 4436 ; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00] 4437 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4438 ; SANDY-NEXT: retq # sched: [1:1.00] 4439 ; 4440 ; HASWELL-SSE-LABEL: test_rsqrtps: 4441 ; HASWELL-SSE: # %bb.0: 4442 ; HASWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] 4443 ; HASWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] 4444 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4445 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4446 ; 4447 ; HASWELL-LABEL: test_rsqrtps: 4448 ; HASWELL: # %bb.0: 4449 ; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] 4450 ; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00] 4451 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4452 ; HASWELL-NEXT: retq # sched: [7:1.00] 4453 ; 4454 ; BROADWELL-SSE-LABEL: test_rsqrtps: 4455 ; BROADWELL-SSE: # %bb.0: 4456 ; BROADWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] 4457 ; BROADWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] 4458 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4459 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4460 ; 4461 ; BROADWELL-LABEL: test_rsqrtps: 4462 ; BROADWELL: # %bb.0: 4463 ; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] 4464 ; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] 4465 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4466 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4467 ; 4468 ; SKYLAKE-SSE-LABEL: test_rsqrtps: 4469 ; SKYLAKE-SSE: # %bb.0: 4470 ; SKYLAKE-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00] 4471 ; SKYLAKE-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] 4472 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4473 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4474 ; 4475 ; SKYLAKE-LABEL: test_rsqrtps: 4476 ; SKYLAKE: # %bb.0: 4477 ; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] 4478 ; SKYLAKE-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] 4479 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4480 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4481 ; 4482 ; SKX-SSE-LABEL: test_rsqrtps: 4483 ; SKX-SSE: # %bb.0: 4484 ; SKX-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00] 4485 ; SKX-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] 4486 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4487 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4488 ; 4489 ; SKX-LABEL: test_rsqrtps: 4490 ; SKX: # %bb.0: 4491 ; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] 4492 ; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] 4493 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4494 ; SKX-NEXT: retq # sched: [7:1.00] 4495 ; 4496 ; BTVER2-SSE-LABEL: test_rsqrtps: 4497 ; BTVER2-SSE: # %bb.0: 4498 ; BTVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [2:1.00] 4499 ; BTVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [7:1.00] 4500 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4501 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4502 ; 4503 ; BTVER2-LABEL: test_rsqrtps: 4504 ; BTVER2: # %bb.0: 4505 ; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00] 4506 ; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00] 4507 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4508 ; BTVER2-NEXT: retq # sched: [4:1.00] 4509 ; 4510 ; ZNVER1-SSE-LABEL: test_rsqrtps: 4511 ; ZNVER1-SSE: # %bb.0: 4512 ; ZNVER1-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:0.50] 4513 ; ZNVER1-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [12:0.50] 4514 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4515 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4516 ; 4517 ; ZNVER1-LABEL: test_rsqrtps: 4518 ; ZNVER1: # %bb.0: 4519 ; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50] 4520 ; ZNVER1-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:0.50] 4521 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4522 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4523 %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 4524 %2 = load <4 x float>, <4 x float> *%a1, align 16 4525 %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2) 4526 %4 = fadd <4 x float> %1, %3 4527 ret <4 x float> %4 4528 } 4529 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 4530 4531 ; TODO - rsqrtss_m 4532 4533 define <4 x float> @test_rsqrtss(float %a0, float *%a1) { 4534 ; GENERIC-LABEL: test_rsqrtss: 4535 ; GENERIC: # %bb.0: 4536 ; GENERIC-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] 4537 ; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] 4538 ; GENERIC-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] 4539 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4540 ; GENERIC-NEXT: retq # sched: [1:1.00] 4541 ; 4542 ; ATOM-LABEL: test_rsqrtss: 4543 ; ATOM: # %bb.0: 4544 ; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] 4545 ; ATOM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:4.00] 4546 ; ATOM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:4.00] 4547 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 4548 ; ATOM-NEXT: retq # sched: [79:39.50] 4549 ; 4550 ; SLM-LABEL: test_rsqrtss: 4551 ; SLM: # %bb.0: 4552 ; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00] 4553 ; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] 4554 ; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] 4555 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4556 ; SLM-NEXT: retq # sched: [4:1.00] 4557 ; 4558 ; SANDY-SSE-LABEL: test_rsqrtss: 4559 ; SANDY-SSE: # %bb.0: 4560 ; SANDY-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] 4561 ; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] 4562 ; SANDY-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] 4563 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4564 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4565 ; 4566 ; SANDY-LABEL: test_rsqrtss: 4567 ; SANDY: # %bb.0: 4568 ; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 4569 ; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] 4570 ; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 4571 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4572 ; SANDY-NEXT: retq # sched: [1:1.00] 4573 ; 4574 ; HASWELL-SSE-LABEL: test_rsqrtss: 4575 ; HASWELL-SSE: # %bb.0: 4576 ; HASWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] 4577 ; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4578 ; HASWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] 4579 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4580 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4581 ; 4582 ; HASWELL-LABEL: test_rsqrtss: 4583 ; HASWELL: # %bb.0: 4584 ; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 4585 ; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4586 ; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 4587 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4588 ; HASWELL-NEXT: retq # sched: [7:1.00] 4589 ; 4590 ; BROADWELL-SSE-LABEL: test_rsqrtss: 4591 ; BROADWELL-SSE: # %bb.0: 4592 ; BROADWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] 4593 ; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4594 ; BROADWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] 4595 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4596 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4597 ; 4598 ; BROADWELL-LABEL: test_rsqrtss: 4599 ; BROADWELL: # %bb.0: 4600 ; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 4601 ; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4602 ; BROADWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 4603 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4604 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4605 ; 4606 ; SKYLAKE-SSE-LABEL: test_rsqrtss: 4607 ; SKYLAKE-SSE: # %bb.0: 4608 ; SKYLAKE-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00] 4609 ; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4610 ; SKYLAKE-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00] 4611 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4612 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4613 ; 4614 ; SKYLAKE-LABEL: test_rsqrtss: 4615 ; SKYLAKE: # %bb.0: 4616 ; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 4617 ; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4618 ; SKYLAKE-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] 4619 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4620 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4621 ; 4622 ; SKX-SSE-LABEL: test_rsqrtss: 4623 ; SKX-SSE: # %bb.0: 4624 ; SKX-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00] 4625 ; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4626 ; SKX-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00] 4627 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4628 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4629 ; 4630 ; SKX-LABEL: test_rsqrtss: 4631 ; SKX: # %bb.0: 4632 ; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 4633 ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 4634 ; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] 4635 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4636 ; SKX-NEXT: retq # sched: [7:1.00] 4637 ; 4638 ; BTVER2-SSE-LABEL: test_rsqrtss: 4639 ; BTVER2-SSE: # %bb.0: 4640 ; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] 4641 ; BTVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [2:1.00] 4642 ; BTVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [2:1.00] 4643 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4644 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4645 ; 4646 ; BTVER2-LABEL: test_rsqrtss: 4647 ; BTVER2: # %bb.0: 4648 ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] 4649 ; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [2:1.00] 4650 ; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [2:1.00] 4651 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4652 ; BTVER2-NEXT: retq # sched: [4:1.00] 4653 ; 4654 ; ZNVER1-SSE-LABEL: test_rsqrtss: 4655 ; ZNVER1-SSE: # %bb.0: 4656 ; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] 4657 ; ZNVER1-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:0.50] 4658 ; ZNVER1-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:0.50] 4659 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4660 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4661 ; 4662 ; ZNVER1-LABEL: test_rsqrtss: 4663 ; ZNVER1: # %bb.0: 4664 ; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] 4665 ; ZNVER1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:0.50] 4666 ; ZNVER1-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:0.50] 4667 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4668 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4669 %1 = insertelement <4 x float> undef, float %a0, i32 0 4670 %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1) 4671 %3 = load float, float *%a1, align 4 4672 %4 = insertelement <4 x float> undef, float %3, i32 0 4673 %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) 4674 %6 = fadd <4 x float> %2, %5 4675 ret <4 x float> %6 4676 } 4677 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 4678 4679 define void @test_sfence() { 4680 ; GENERIC-LABEL: test_sfence: 4681 ; GENERIC: # %bb.0: 4682 ; GENERIC-NEXT: sfence # sched: [1:1.00] 4683 ; GENERIC-NEXT: retq # sched: [1:1.00] 4684 ; 4685 ; ATOM-LABEL: test_sfence: 4686 ; ATOM: # %bb.0: 4687 ; ATOM-NEXT: sfence # sched: [1:1.00] 4688 ; ATOM-NEXT: nop # sched: [1:0.50] 4689 ; ATOM-NEXT: nop # sched: [1:0.50] 4690 ; ATOM-NEXT: nop # sched: [1:0.50] 4691 ; ATOM-NEXT: nop # sched: [1:0.50] 4692 ; ATOM-NEXT: nop # sched: [1:0.50] 4693 ; ATOM-NEXT: nop # sched: [1:0.50] 4694 ; ATOM-NEXT: retq # sched: [79:39.50] 4695 ; 4696 ; SLM-LABEL: test_sfence: 4697 ; SLM: # %bb.0: 4698 ; SLM-NEXT: sfence # sched: [1:1.00] 4699 ; SLM-NEXT: retq # sched: [4:1.00] 4700 ; 4701 ; SANDY-SSE-LABEL: test_sfence: 4702 ; SANDY-SSE: # %bb.0: 4703 ; SANDY-SSE-NEXT: sfence # sched: [1:1.00] 4704 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4705 ; 4706 ; SANDY-LABEL: test_sfence: 4707 ; SANDY: # %bb.0: 4708 ; SANDY-NEXT: sfence # sched: [1:1.00] 4709 ; SANDY-NEXT: retq # sched: [1:1.00] 4710 ; 4711 ; HASWELL-SSE-LABEL: test_sfence: 4712 ; HASWELL-SSE: # %bb.0: 4713 ; HASWELL-SSE-NEXT: sfence # sched: [2:0.33] 4714 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4715 ; 4716 ; HASWELL-LABEL: test_sfence: 4717 ; HASWELL: # %bb.0: 4718 ; HASWELL-NEXT: sfence # sched: [2:0.33] 4719 ; HASWELL-NEXT: retq # sched: [7:1.00] 4720 ; 4721 ; BROADWELL-SSE-LABEL: test_sfence: 4722 ; BROADWELL-SSE: # %bb.0: 4723 ; BROADWELL-SSE-NEXT: sfence # sched: [2:0.33] 4724 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4725 ; 4726 ; BROADWELL-LABEL: test_sfence: 4727 ; BROADWELL: # %bb.0: 4728 ; BROADWELL-NEXT: sfence # sched: [2:0.33] 4729 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4730 ; 4731 ; SKYLAKE-SSE-LABEL: test_sfence: 4732 ; SKYLAKE-SSE: # %bb.0: 4733 ; SKYLAKE-SSE-NEXT: sfence # sched: [2:0.33] 4734 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4735 ; 4736 ; SKYLAKE-LABEL: test_sfence: 4737 ; SKYLAKE: # %bb.0: 4738 ; SKYLAKE-NEXT: sfence # sched: [2:0.33] 4739 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4740 ; 4741 ; SKX-SSE-LABEL: test_sfence: 4742 ; SKX-SSE: # %bb.0: 4743 ; SKX-SSE-NEXT: sfence # sched: [2:0.33] 4744 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4745 ; 4746 ; SKX-LABEL: test_sfence: 4747 ; SKX: # %bb.0: 4748 ; SKX-NEXT: sfence # sched: [2:0.33] 4749 ; SKX-NEXT: retq # sched: [7:1.00] 4750 ; 4751 ; BTVER2-SSE-LABEL: test_sfence: 4752 ; BTVER2-SSE: # %bb.0: 4753 ; BTVER2-SSE-NEXT: sfence # sched: [1:1.00] 4754 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4755 ; 4756 ; BTVER2-LABEL: test_sfence: 4757 ; BTVER2: # %bb.0: 4758 ; BTVER2-NEXT: sfence # sched: [1:1.00] 4759 ; BTVER2-NEXT: retq # sched: [4:1.00] 4760 ; 4761 ; ZNVER1-SSE-LABEL: test_sfence: 4762 ; ZNVER1-SSE: # %bb.0: 4763 ; ZNVER1-SSE-NEXT: sfence # sched: [1:0.50] 4764 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4765 ; 4766 ; ZNVER1-LABEL: test_sfence: 4767 ; ZNVER1: # %bb.0: 4768 ; ZNVER1-NEXT: sfence # sched: [1:0.50] 4769 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4770 call void @llvm.x86.sse.sfence() 4771 ret void 4772 } 4773 declare void @llvm.x86.sse.sfence() nounwind readnone 4774 4775 define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind { 4776 ; GENERIC-LABEL: test_shufps: 4777 ; GENERIC: # %bb.0: 4778 ; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4779 ; GENERIC-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] 4780 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4781 ; GENERIC-NEXT: retq # sched: [1:1.00] 4782 ; 4783 ; ATOM-LABEL: test_shufps: 4784 ; ATOM: # %bb.0: 4785 ; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4786 ; ATOM-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [1:1.00] 4787 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 4788 ; ATOM-NEXT: retq # sched: [79:39.50] 4789 ; 4790 ; SLM-LABEL: test_shufps: 4791 ; SLM: # %bb.0: 4792 ; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4793 ; SLM-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [4:1.00] 4794 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4795 ; SLM-NEXT: retq # sched: [4:1.00] 4796 ; 4797 ; SANDY-SSE-LABEL: test_shufps: 4798 ; SANDY-SSE: # %bb.0: 4799 ; SANDY-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4800 ; SANDY-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] 4801 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4802 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4803 ; 4804 ; SANDY-LABEL: test_shufps: 4805 ; SANDY: # %bb.0: 4806 ; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4807 ; SANDY-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] 4808 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4809 ; SANDY-NEXT: retq # sched: [1:1.00] 4810 ; 4811 ; HASWELL-SSE-LABEL: test_shufps: 4812 ; HASWELL-SSE: # %bb.0: 4813 ; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4814 ; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] 4815 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4816 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4817 ; 4818 ; HASWELL-LABEL: test_shufps: 4819 ; HASWELL: # %bb.0: 4820 ; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4821 ; HASWELL-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] 4822 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4823 ; HASWELL-NEXT: retq # sched: [7:1.00] 4824 ; 4825 ; BROADWELL-SSE-LABEL: test_shufps: 4826 ; BROADWELL-SSE: # %bb.0: 4827 ; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4828 ; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] 4829 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4830 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4831 ; 4832 ; BROADWELL-LABEL: test_shufps: 4833 ; BROADWELL: # %bb.0: 4834 ; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4835 ; BROADWELL-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] 4836 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4837 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4838 ; 4839 ; SKYLAKE-SSE-LABEL: test_shufps: 4840 ; SKYLAKE-SSE: # %bb.0: 4841 ; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4842 ; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] 4843 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4844 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4845 ; 4846 ; SKYLAKE-LABEL: test_shufps: 4847 ; SKYLAKE: # %bb.0: 4848 ; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4849 ; SKYLAKE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] 4850 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4851 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4852 ; 4853 ; SKX-SSE-LABEL: test_shufps: 4854 ; SKX-SSE: # %bb.0: 4855 ; SKX-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4856 ; SKX-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] 4857 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4858 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4859 ; 4860 ; SKX-LABEL: test_shufps: 4861 ; SKX: # %bb.0: 4862 ; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] 4863 ; SKX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] 4864 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4865 ; SKX-NEXT: retq # sched: [7:1.00] 4866 ; 4867 ; BTVER2-SSE-LABEL: test_shufps: 4868 ; BTVER2-SSE: # %bb.0: 4869 ; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] 4870 ; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] 4871 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4872 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4873 ; 4874 ; BTVER2-LABEL: test_shufps: 4875 ; BTVER2: # %bb.0: 4876 ; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] 4877 ; BTVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] 4878 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4879 ; BTVER2-NEXT: retq # sched: [4:1.00] 4880 ; 4881 ; ZNVER1-SSE-LABEL: test_shufps: 4882 ; ZNVER1-SSE: # %bb.0: 4883 ; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] 4884 ; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50] 4885 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4886 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4887 ; 4888 ; ZNVER1-LABEL: test_shufps: 4889 ; ZNVER1: # %bb.0: 4890 ; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] 4891 ; ZNVER1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50] 4892 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4893 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4894 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 4895 %2 = load <4 x float>, <4 x float> *%a2, align 16 4896 %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4> 4897 %4 = fadd <4 x float> %1, %3 4898 ret <4 x float> %4 4899 } 4900 4901 define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { 4902 ; GENERIC-LABEL: test_sqrtps: 4903 ; GENERIC: # %bb.0: 4904 ; GENERIC-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00] 4905 ; GENERIC-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00] 4906 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4907 ; GENERIC-NEXT: retq # sched: [1:1.00] 4908 ; 4909 ; ATOM-LABEL: test_sqrtps: 4910 ; ATOM: # %bb.0: 4911 ; ATOM-NEXT: sqrtps %xmm0, %xmm1 # sched: [70:35.00] 4912 ; ATOM-NEXT: sqrtps (%rdi), %xmm0 # sched: [70:35.00] 4913 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 4914 ; ATOM-NEXT: retq # sched: [79:39.50] 4915 ; 4916 ; SLM-LABEL: test_sqrtps: 4917 ; SLM: # %bb.0: 4918 ; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [44:40.00] 4919 ; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [41:40.00] 4920 ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 4921 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 4922 ; SLM-NEXT: retq # sched: [4:1.00] 4923 ; 4924 ; SANDY-SSE-LABEL: test_sqrtps: 4925 ; SANDY-SSE: # %bb.0: 4926 ; SANDY-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00] 4927 ; SANDY-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00] 4928 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4929 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4930 ; 4931 ; SANDY-LABEL: test_sqrtps: 4932 ; SANDY: # %bb.0: 4933 ; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:14.00] 4934 ; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:14.00] 4935 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4936 ; SANDY-NEXT: retq # sched: [1:1.00] 4937 ; 4938 ; HASWELL-SSE-LABEL: test_sqrtps: 4939 ; HASWELL-SSE: # %bb.0: 4940 ; HASWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:7.00] 4941 ; HASWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [17:7.00] 4942 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4943 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4944 ; 4945 ; HASWELL-LABEL: test_sqrtps: 4946 ; HASWELL: # %bb.0: 4947 ; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:7.00] 4948 ; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [17:7.00] 4949 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4950 ; HASWELL-NEXT: retq # sched: [7:1.00] 4951 ; 4952 ; BROADWELL-SSE-LABEL: test_sqrtps: 4953 ; BROADWELL-SSE: # %bb.0: 4954 ; BROADWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:7.00] 4955 ; BROADWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [16:7.00] 4956 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4957 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4958 ; 4959 ; BROADWELL-LABEL: test_sqrtps: 4960 ; BROADWELL: # %bb.0: 4961 ; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:7.00] 4962 ; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [16:7.00] 4963 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4964 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4965 ; 4966 ; SKYLAKE-SSE-LABEL: test_sqrtps: 4967 ; SKYLAKE-SSE: # %bb.0: 4968 ; SKYLAKE-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00] 4969 ; SKYLAKE-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00] 4970 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4971 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4972 ; 4973 ; SKYLAKE-LABEL: test_sqrtps: 4974 ; SKYLAKE: # %bb.0: 4975 ; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00] 4976 ; SKYLAKE-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00] 4977 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4978 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4979 ; 4980 ; SKX-SSE-LABEL: test_sqrtps: 4981 ; SKX-SSE: # %bb.0: 4982 ; SKX-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00] 4983 ; SKX-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00] 4984 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 4985 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4986 ; 4987 ; SKX-LABEL: test_sqrtps: 4988 ; SKX: # %bb.0: 4989 ; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00] 4990 ; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00] 4991 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4992 ; SKX-NEXT: retq # sched: [7:1.00] 4993 ; 4994 ; BTVER2-SSE-LABEL: test_sqrtps: 4995 ; BTVER2-SSE: # %bb.0: 4996 ; BTVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [21:21.00] 4997 ; BTVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [26:21.00] 4998 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 4999 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5000 ; 5001 ; BTVER2-LABEL: test_sqrtps: 5002 ; BTVER2: # %bb.0: 5003 ; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00] 5004 ; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00] 5005 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5006 ; BTVER2-NEXT: retq # sched: [4:1.00] 5007 ; 5008 ; ZNVER1-SSE-LABEL: test_sqrtps: 5009 ; ZNVER1-SSE: # %bb.0: 5010 ; ZNVER1-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [20:20.00] 5011 ; ZNVER1-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [27:20.00] 5012 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5013 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5014 ; 5015 ; ZNVER1-LABEL: test_sqrtps: 5016 ; ZNVER1: # %bb.0: 5017 ; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:20.00] 5018 ; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:20.00] 5019 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5020 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5021 %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 5022 %2 = load <4 x float>, <4 x float> *%a1, align 16 5023 %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2) 5024 %4 = fadd <4 x float> %1, %3 5025 ret <4 x float> %4 5026 } 5027 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 5028 5029 ; TODO - sqrtss_m 5030 5031 define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { 5032 ; GENERIC-LABEL: test_sqrtss: 5033 ; GENERIC: # %bb.0: 5034 ; GENERIC-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00] 5035 ; GENERIC-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] 5036 ; GENERIC-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00] 5037 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5038 ; GENERIC-NEXT: retq # sched: [1:1.00] 5039 ; 5040 ; ATOM-LABEL: test_sqrtss: 5041 ; ATOM: # %bb.0: 5042 ; ATOM-NEXT: movaps (%rdi), %xmm1 # sched: [1:1.00] 5043 ; ATOM-NEXT: sqrtss %xmm0, %xmm0 # sched: [34:17.00] 5044 ; ATOM-NEXT: sqrtss %xmm1, %xmm1 # sched: [34:17.00] 5045 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 5046 ; ATOM-NEXT: retq # sched: [79:39.50] 5047 ; 5048 ; SLM-LABEL: test_sqrtss: 5049 ; SLM: # %bb.0: 5050 ; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00] 5051 ; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:20.00] 5052 ; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:20.00] 5053 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5054 ; SLM-NEXT: retq # sched: [4:1.00] 5055 ; 5056 ; SANDY-SSE-LABEL: test_sqrtss: 5057 ; SANDY-SSE: # %bb.0: 5058 ; SANDY-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00] 5059 ; SANDY-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] 5060 ; SANDY-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00] 5061 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5062 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5063 ; 5064 ; SANDY-LABEL: test_sqrtss: 5065 ; SANDY: # %bb.0: 5066 ; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00] 5067 ; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] 5068 ; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:14.00] 5069 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5070 ; SANDY-NEXT: retq # sched: [1:1.00] 5071 ; 5072 ; HASWELL-SSE-LABEL: test_sqrtss: 5073 ; HASWELL-SSE: # %bb.0: 5074 ; HASWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:7.00] 5075 ; HASWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] 5076 ; HASWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:7.00] 5077 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5078 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5079 ; 5080 ; HASWELL-LABEL: test_sqrtss: 5081 ; HASWELL: # %bb.0: 5082 ; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:7.00] 5083 ; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] 5084 ; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:7.00] 5085 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5086 ; HASWELL-NEXT: retq # sched: [7:1.00] 5087 ; 5088 ; BROADWELL-SSE-LABEL: test_sqrtss: 5089 ; BROADWELL-SSE: # %bb.0: 5090 ; BROADWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:4.00] 5091 ; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50] 5092 ; BROADWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:4.00] 5093 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5094 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5095 ; 5096 ; BROADWELL-LABEL: test_sqrtss: 5097 ; BROADWELL: # %bb.0: 5098 ; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:4.00] 5099 ; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50] 5100 ; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:4.00] 5101 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5102 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5103 ; 5104 ; SKYLAKE-SSE-LABEL: test_sqrtss: 5105 ; SKYLAKE-SSE: # %bb.0: 5106 ; SKYLAKE-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00] 5107 ; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] 5108 ; SKYLAKE-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00] 5109 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 5110 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5111 ; 5112 ; SKYLAKE-LABEL: test_sqrtss: 5113 ; SKYLAKE: # %bb.0: 5114 ; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] 5115 ; SKYLAKE-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] 5116 ; SKYLAKE-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00] 5117 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5118 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5119 ; 5120 ; SKX-SSE-LABEL: test_sqrtss: 5121 ; SKX-SSE: # %bb.0: 5122 ; SKX-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00] 5123 ; SKX-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] 5124 ; SKX-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00] 5125 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 5126 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5127 ; 5128 ; SKX-LABEL: test_sqrtss: 5129 ; SKX: # %bb.0: 5130 ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] 5131 ; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] 5132 ; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00] 5133 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5134 ; SKX-NEXT: retq # sched: [7:1.00] 5135 ; 5136 ; BTVER2-SSE-LABEL: test_sqrtss: 5137 ; BTVER2-SSE: # %bb.0: 5138 ; BTVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:1.00] 5139 ; BTVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [21:21.00] 5140 ; BTVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [21:21.00] 5141 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5142 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5143 ; 5144 ; BTVER2-LABEL: test_sqrtss: 5145 ; BTVER2: # %bb.0: 5146 ; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00] 5147 ; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [21:21.00] 5148 ; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [21:21.00] 5149 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5150 ; BTVER2-NEXT: retq # sched: [4:1.00] 5151 ; 5152 ; ZNVER1-SSE-LABEL: test_sqrtss: 5153 ; ZNVER1-SSE: # %bb.0: 5154 ; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [8:0.50] 5155 ; ZNVER1-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:20.00] 5156 ; ZNVER1-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:20.00] 5157 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5158 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5159 ; 5160 ; ZNVER1-LABEL: test_sqrtss: 5161 ; ZNVER1: # %bb.0: 5162 ; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50] 5163 ; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:20.00] 5164 ; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:20.00] 5165 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5166 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5167 %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) 5168 %2 = load <4 x float>, <4 x float> *%a1, align 16 5169 %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2) 5170 %4 = fadd <4 x float> %1, %3 5171 ret <4 x float> %4 5172 } 5173 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 5174 5175 define i32 @test_stmxcsr() { 5176 ; GENERIC-LABEL: test_stmxcsr: 5177 ; GENERIC: # %bb.0: 5178 ; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] 5179 ; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5180 ; GENERIC-NEXT: retq # sched: [1:1.00] 5181 ; 5182 ; ATOM-LABEL: test_stmxcsr: 5183 ; ATOM: # %bb.0: 5184 ; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [15:7.50] 5185 ; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:1.00] 5186 ; ATOM-NEXT: retq # sched: [79:39.50] 5187 ; 5188 ; SLM-LABEL: test_stmxcsr: 5189 ; SLM: # %bb.0: 5190 ; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] 5191 ; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00] 5192 ; SLM-NEXT: retq # sched: [4:1.00] 5193 ; 5194 ; SANDY-SSE-LABEL: test_stmxcsr: 5195 ; SANDY-SSE: # %bb.0: 5196 ; SANDY-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] 5197 ; SANDY-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5198 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5199 ; 5200 ; SANDY-LABEL: test_stmxcsr: 5201 ; SANDY: # %bb.0: 5202 ; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] 5203 ; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5204 ; SANDY-NEXT: retq # sched: [1:1.00] 5205 ; 5206 ; HASWELL-SSE-LABEL: test_stmxcsr: 5207 ; HASWELL-SSE: # %bb.0: 5208 ; HASWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] 5209 ; HASWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5210 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5211 ; 5212 ; HASWELL-LABEL: test_stmxcsr: 5213 ; HASWELL: # %bb.0: 5214 ; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] 5215 ; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5216 ; HASWELL-NEXT: retq # sched: [7:1.00] 5217 ; 5218 ; BROADWELL-SSE-LABEL: test_stmxcsr: 5219 ; BROADWELL-SSE: # %bb.0: 5220 ; BROADWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] 5221 ; BROADWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5222 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5223 ; 5224 ; BROADWELL-LABEL: test_stmxcsr: 5225 ; BROADWELL: # %bb.0: 5226 ; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] 5227 ; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5228 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5229 ; 5230 ; SKYLAKE-SSE-LABEL: test_stmxcsr: 5231 ; SKYLAKE-SSE: # %bb.0: 5232 ; SKYLAKE-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] 5233 ; SKYLAKE-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5234 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5235 ; 5236 ; SKYLAKE-LABEL: test_stmxcsr: 5237 ; SKYLAKE: # %bb.0: 5238 ; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] 5239 ; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5240 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5241 ; 5242 ; SKX-SSE-LABEL: test_stmxcsr: 5243 ; SKX-SSE: # %bb.0: 5244 ; SKX-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] 5245 ; SKX-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5246 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5247 ; 5248 ; SKX-LABEL: test_stmxcsr: 5249 ; SKX: # %bb.0: 5250 ; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] 5251 ; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] 5252 ; SKX-NEXT: retq # sched: [7:1.00] 5253 ; 5254 ; BTVER2-SSE-LABEL: test_stmxcsr: 5255 ; BTVER2-SSE: # %bb.0: 5256 ; BTVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] 5257 ; BTVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00] 5258 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5259 ; 5260 ; BTVER2-LABEL: test_stmxcsr: 5261 ; BTVER2: # %bb.0: 5262 ; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] 5263 ; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00] 5264 ; BTVER2-NEXT: retq # sched: [4:1.00] 5265 ; 5266 ; ZNVER1-SSE-LABEL: test_stmxcsr: 5267 ; ZNVER1-SSE: # %bb.0: 5268 ; ZNVER1-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25] 5269 ; ZNVER1-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50] 5270 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5271 ; 5272 ; ZNVER1-LABEL: test_stmxcsr: 5273 ; ZNVER1: # %bb.0: 5274 ; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25] 5275 ; ZNVER1-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50] 5276 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5277 %1 = alloca i32, align 4 5278 %2 = bitcast i32* %1 to i8* 5279 call void @llvm.x86.sse.stmxcsr(i8* %2) 5280 %3 = load i32, i32* %1, align 4 5281 ret i32 %3 5282 } 5283 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone 5284 5285 define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 5286 ; GENERIC-LABEL: test_subps: 5287 ; GENERIC: # %bb.0: 5288 ; GENERIC-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] 5289 ; GENERIC-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] 5290 ; GENERIC-NEXT: retq # sched: [1:1.00] 5291 ; 5292 ; ATOM-LABEL: test_subps: 5293 ; ATOM: # %bb.0: 5294 ; ATOM-NEXT: subps %xmm1, %xmm0 # sched: [5:5.00] 5295 ; ATOM-NEXT: subps (%rdi), %xmm0 # sched: [5:5.00] 5296 ; ATOM-NEXT: retq # sched: [79:39.50] 5297 ; 5298 ; SLM-LABEL: test_subps: 5299 ; SLM: # %bb.0: 5300 ; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] 5301 ; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00] 5302 ; SLM-NEXT: retq # sched: [4:1.00] 5303 ; 5304 ; SANDY-SSE-LABEL: test_subps: 5305 ; SANDY-SSE: # %bb.0: 5306 ; SANDY-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] 5307 ; SANDY-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] 5308 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5309 ; 5310 ; SANDY-LABEL: test_subps: 5311 ; SANDY: # %bb.0: 5312 ; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5313 ; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 5314 ; SANDY-NEXT: retq # sched: [1:1.00] 5315 ; 5316 ; HASWELL-SSE-LABEL: test_subps: 5317 ; HASWELL-SSE: # %bb.0: 5318 ; HASWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] 5319 ; HASWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] 5320 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5321 ; 5322 ; HASWELL-LABEL: test_subps: 5323 ; HASWELL: # %bb.0: 5324 ; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5325 ; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 5326 ; HASWELL-NEXT: retq # sched: [7:1.00] 5327 ; 5328 ; BROADWELL-SSE-LABEL: test_subps: 5329 ; BROADWELL-SSE: # %bb.0: 5330 ; BROADWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] 5331 ; BROADWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00] 5332 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5333 ; 5334 ; BROADWELL-LABEL: test_subps: 5335 ; BROADWELL: # %bb.0: 5336 ; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5337 ; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 5338 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5339 ; 5340 ; SKYLAKE-SSE-LABEL: test_subps: 5341 ; SKYLAKE-SSE: # %bb.0: 5342 ; SKYLAKE-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50] 5343 ; SKYLAKE-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50] 5344 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5345 ; 5346 ; SKYLAKE-LABEL: test_subps: 5347 ; SKYLAKE: # %bb.0: 5348 ; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5349 ; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 5350 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5351 ; 5352 ; SKX-SSE-LABEL: test_subps: 5353 ; SKX-SSE: # %bb.0: 5354 ; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50] 5355 ; SKX-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50] 5356 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5357 ; 5358 ; SKX-LABEL: test_subps: 5359 ; SKX: # %bb.0: 5360 ; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5361 ; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 5362 ; SKX-NEXT: retq # sched: [7:1.00] 5363 ; 5364 ; BTVER2-SSE-LABEL: test_subps: 5365 ; BTVER2-SSE: # %bb.0: 5366 ; BTVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] 5367 ; BTVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00] 5368 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5369 ; 5370 ; BTVER2-LABEL: test_subps: 5371 ; BTVER2: # %bb.0: 5372 ; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5373 ; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 5374 ; BTVER2-NEXT: retq # sched: [4:1.00] 5375 ; 5376 ; ZNVER1-SSE-LABEL: test_subps: 5377 ; ZNVER1-SSE: # %bb.0: 5378 ; ZNVER1-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] 5379 ; ZNVER1-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:1.00] 5380 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5381 ; 5382 ; ZNVER1-LABEL: test_subps: 5383 ; ZNVER1: # %bb.0: 5384 ; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5385 ; ZNVER1-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 5386 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5387 %1 = fsub <4 x float> %a0, %a1 5388 %2 = load <4 x float>, <4 x float> *%a2, align 16 5389 %3 = fsub <4 x float> %1, %2 5390 ret <4 x float> %3 5391 } 5392 5393 define float @test_subss(float %a0, float %a1, float *%a2) { 5394 ; GENERIC-LABEL: test_subss: 5395 ; GENERIC: # %bb.0: 5396 ; GENERIC-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] 5397 ; GENERIC-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00] 5398 ; GENERIC-NEXT: retq # sched: [1:1.00] 5399 ; 5400 ; ATOM-LABEL: test_subss: 5401 ; ATOM: # %bb.0: 5402 ; ATOM-NEXT: subss %xmm1, %xmm0 # sched: [5:5.00] 5403 ; ATOM-NEXT: subss (%rdi), %xmm0 # sched: [5:5.00] 5404 ; ATOM-NEXT: retq # sched: [79:39.50] 5405 ; 5406 ; SLM-LABEL: test_subss: 5407 ; SLM: # %bb.0: 5408 ; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] 5409 ; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00] 5410 ; SLM-NEXT: retq # sched: [4:1.00] 5411 ; 5412 ; SANDY-SSE-LABEL: test_subss: 5413 ; SANDY-SSE: # %bb.0: 5414 ; SANDY-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] 5415 ; SANDY-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00] 5416 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5417 ; 5418 ; SANDY-LABEL: test_subss: 5419 ; SANDY: # %bb.0: 5420 ; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5421 ; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 5422 ; SANDY-NEXT: retq # sched: [1:1.00] 5423 ; 5424 ; HASWELL-SSE-LABEL: test_subss: 5425 ; HASWELL-SSE: # %bb.0: 5426 ; HASWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] 5427 ; HASWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00] 5428 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5429 ; 5430 ; HASWELL-LABEL: test_subss: 5431 ; HASWELL: # %bb.0: 5432 ; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5433 ; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 5434 ; HASWELL-NEXT: retq # sched: [7:1.00] 5435 ; 5436 ; BROADWELL-SSE-LABEL: test_subss: 5437 ; BROADWELL-SSE: # %bb.0: 5438 ; BROADWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] 5439 ; BROADWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00] 5440 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5441 ; 5442 ; BROADWELL-LABEL: test_subss: 5443 ; BROADWELL: # %bb.0: 5444 ; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5445 ; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 5446 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5447 ; 5448 ; SKYLAKE-SSE-LABEL: test_subss: 5449 ; SKYLAKE-SSE: # %bb.0: 5450 ; SKYLAKE-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50] 5451 ; SKYLAKE-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50] 5452 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5453 ; 5454 ; SKYLAKE-LABEL: test_subss: 5455 ; SKYLAKE: # %bb.0: 5456 ; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5457 ; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 5458 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5459 ; 5460 ; SKX-SSE-LABEL: test_subss: 5461 ; SKX-SSE: # %bb.0: 5462 ; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50] 5463 ; SKX-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50] 5464 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5465 ; 5466 ; SKX-LABEL: test_subss: 5467 ; SKX: # %bb.0: 5468 ; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5469 ; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 5470 ; SKX-NEXT: retq # sched: [7:1.00] 5471 ; 5472 ; BTVER2-SSE-LABEL: test_subss: 5473 ; BTVER2-SSE: # %bb.0: 5474 ; BTVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] 5475 ; BTVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00] 5476 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5477 ; 5478 ; BTVER2-LABEL: test_subss: 5479 ; BTVER2: # %bb.0: 5480 ; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5481 ; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 5482 ; BTVER2-NEXT: retq # sched: [4:1.00] 5483 ; 5484 ; ZNVER1-SSE-LABEL: test_subss: 5485 ; ZNVER1-SSE: # %bb.0: 5486 ; ZNVER1-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] 5487 ; ZNVER1-SSE-NEXT: subss (%rdi), %xmm0 # sched: [10:1.00] 5488 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5489 ; 5490 ; ZNVER1-LABEL: test_subss: 5491 ; ZNVER1: # %bb.0: 5492 ; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5493 ; ZNVER1-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 5494 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5495 %1 = fsub float %a0, %a1 5496 %2 = load float, float *%a2, align 4 5497 %3 = fsub float %1, %2 5498 ret float %3 5499 } 5500 5501 define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 5502 ; GENERIC-LABEL: test_ucomiss: 5503 ; GENERIC: # %bb.0: 5504 ; GENERIC-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] 5505 ; GENERIC-NEXT: setnp %al # sched: [1:0.50] 5506 ; GENERIC-NEXT: sete %cl # sched: [1:0.50] 5507 ; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] 5508 ; GENERIC-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] 5509 ; GENERIC-NEXT: setnp %al # sched: [1:0.50] 5510 ; GENERIC-NEXT: sete %dl # sched: [1:0.50] 5511 ; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] 5512 ; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] 5513 ; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] 5514 ; GENERIC-NEXT: retq # sched: [1:1.00] 5515 ; 5516 ; ATOM-LABEL: test_ucomiss: 5517 ; ATOM: # %bb.0: 5518 ; ATOM-NEXT: ucomiss %xmm1, %xmm0 # sched: [9:4.50] 5519 ; ATOM-NEXT: setnp %al # sched: [1:0.50] 5520 ; ATOM-NEXT: sete %cl # sched: [1:0.50] 5521 ; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] 5522 ; ATOM-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:5.00] 5523 ; ATOM-NEXT: setnp %al # sched: [1:0.50] 5524 ; ATOM-NEXT: sete %dl # sched: [1:0.50] 5525 ; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] 5526 ; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] 5527 ; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] 5528 ; ATOM-NEXT: retq # sched: [79:39.50] 5529 ; 5530 ; SLM-LABEL: test_ucomiss: 5531 ; SLM: # %bb.0: 5532 ; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] 5533 ; SLM-NEXT: setnp %al # sched: [1:0.50] 5534 ; SLM-NEXT: sete %cl # sched: [1:0.50] 5535 ; SLM-NEXT: andb %al, %cl # sched: [1:0.50] 5536 ; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00] 5537 ; SLM-NEXT: setnp %al # sched: [1:0.50] 5538 ; SLM-NEXT: sete %dl # sched: [1:0.50] 5539 ; SLM-NEXT: andb %al, %dl # sched: [1:0.50] 5540 ; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] 5541 ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] 5542 ; SLM-NEXT: retq # sched: [4:1.00] 5543 ; 5544 ; SANDY-SSE-LABEL: test_ucomiss: 5545 ; SANDY-SSE: # %bb.0: 5546 ; SANDY-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] 5547 ; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] 5548 ; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] 5549 ; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] 5550 ; SANDY-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] 5551 ; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] 5552 ; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] 5553 ; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] 5554 ; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] 5555 ; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] 5556 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5557 ; 5558 ; SANDY-LABEL: test_ucomiss: 5559 ; SANDY: # %bb.0: 5560 ; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] 5561 ; SANDY-NEXT: setnp %al # sched: [1:0.50] 5562 ; SANDY-NEXT: sete %cl # sched: [1:0.50] 5563 ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] 5564 ; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] 5565 ; SANDY-NEXT: setnp %al # sched: [1:0.50] 5566 ; SANDY-NEXT: sete %dl # sched: [1:0.50] 5567 ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] 5568 ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] 5569 ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] 5570 ; SANDY-NEXT: retq # sched: [1:1.00] 5571 ; 5572 ; HASWELL-SSE-LABEL: test_ucomiss: 5573 ; HASWELL-SSE: # %bb.0: 5574 ; HASWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] 5575 ; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 5576 ; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] 5577 ; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 5578 ; HASWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] 5579 ; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 5580 ; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] 5581 ; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 5582 ; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 5583 ; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5584 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5585 ; 5586 ; HASWELL-LABEL: test_ucomiss: 5587 ; HASWELL: # %bb.0: 5588 ; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] 5589 ; HASWELL-NEXT: setnp %al # sched: [1:0.50] 5590 ; HASWELL-NEXT: sete %cl # sched: [1:0.50] 5591 ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] 5592 ; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] 5593 ; HASWELL-NEXT: setnp %al # sched: [1:0.50] 5594 ; HASWELL-NEXT: sete %dl # sched: [1:0.50] 5595 ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] 5596 ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] 5597 ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5598 ; HASWELL-NEXT: retq # sched: [7:1.00] 5599 ; 5600 ; BROADWELL-SSE-LABEL: test_ucomiss: 5601 ; BROADWELL-SSE: # %bb.0: 5602 ; BROADWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] 5603 ; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 5604 ; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] 5605 ; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 5606 ; BROADWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] 5607 ; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 5608 ; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] 5609 ; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 5610 ; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 5611 ; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5612 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5613 ; 5614 ; BROADWELL-LABEL: test_ucomiss: 5615 ; BROADWELL: # %bb.0: 5616 ; BROADWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] 5617 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] 5618 ; BROADWELL-NEXT: sete %cl # sched: [1:0.50] 5619 ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] 5620 ; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] 5621 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] 5622 ; BROADWELL-NEXT: sete %dl # sched: [1:0.50] 5623 ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] 5624 ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] 5625 ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5626 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5627 ; 5628 ; SKYLAKE-SSE-LABEL: test_ucomiss: 5629 ; SKYLAKE-SSE: # %bb.0: 5630 ; SKYLAKE-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] 5631 ; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] 5632 ; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] 5633 ; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 5634 ; SKYLAKE-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00] 5635 ; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] 5636 ; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] 5637 ; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 5638 ; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 5639 ; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5640 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5641 ; 5642 ; SKYLAKE-LABEL: test_ucomiss: 5643 ; SKYLAKE: # %bb.0: 5644 ; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] 5645 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] 5646 ; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] 5647 ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] 5648 ; SKYLAKE-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] 5649 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] 5650 ; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] 5651 ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] 5652 ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] 5653 ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5654 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5655 ; 5656 ; SKX-SSE-LABEL: test_ucomiss: 5657 ; SKX-SSE: # %bb.0: 5658 ; SKX-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] 5659 ; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] 5660 ; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] 5661 ; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 5662 ; SKX-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00] 5663 ; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] 5664 ; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] 5665 ; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 5666 ; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 5667 ; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5668 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5669 ; 5670 ; SKX-LABEL: test_ucomiss: 5671 ; SKX: # %bb.0: 5672 ; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] 5673 ; SKX-NEXT: setnp %al # sched: [1:0.50] 5674 ; SKX-NEXT: sete %cl # sched: [1:0.50] 5675 ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] 5676 ; SKX-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] 5677 ; SKX-NEXT: setnp %al # sched: [1:0.50] 5678 ; SKX-NEXT: sete %dl # sched: [1:0.50] 5679 ; SKX-NEXT: andb %al, %dl # sched: [1:0.25] 5680 ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] 5681 ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5682 ; SKX-NEXT: retq # sched: [7:1.00] 5683 ; 5684 ; BTVER2-SSE-LABEL: test_ucomiss: 5685 ; BTVER2-SSE: # %bb.0: 5686 ; BTVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] 5687 ; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] 5688 ; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] 5689 ; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] 5690 ; BTVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] 5691 ; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] 5692 ; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] 5693 ; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] 5694 ; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] 5695 ; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] 5696 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5697 ; 5698 ; BTVER2-LABEL: test_ucomiss: 5699 ; BTVER2: # %bb.0: 5700 ; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] 5701 ; BTVER2-NEXT: setnp %al # sched: [1:0.50] 5702 ; BTVER2-NEXT: sete %cl # sched: [1:0.50] 5703 ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] 5704 ; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00] 5705 ; BTVER2-NEXT: setnp %al # sched: [1:0.50] 5706 ; BTVER2-NEXT: sete %dl # sched: [1:0.50] 5707 ; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] 5708 ; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] 5709 ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] 5710 ; BTVER2-NEXT: retq # sched: [4:1.00] 5711 ; 5712 ; ZNVER1-SSE-LABEL: test_ucomiss: 5713 ; ZNVER1-SSE: # %bb.0: 5714 ; ZNVER1-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] 5715 ; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] 5716 ; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] 5717 ; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 5718 ; ZNVER1-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:1.00] 5719 ; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] 5720 ; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] 5721 ; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 5722 ; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 5723 ; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5724 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5725 ; 5726 ; ZNVER1-LABEL: test_ucomiss: 5727 ; ZNVER1: # %bb.0: 5728 ; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] 5729 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25] 5730 ; ZNVER1-NEXT: sete %cl # sched: [1:0.25] 5731 ; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] 5732 ; ZNVER1-NEXT: vucomiss (%rdi), %xmm0 # sched: [10:1.00] 5733 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25] 5734 ; ZNVER1-NEXT: sete %dl # sched: [1:0.25] 5735 ; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] 5736 ; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] 5737 ; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] 5738 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5739 %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 5740 %2 = load <4 x float>, <4 x float> *%a2, align 4 5741 %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2) 5742 %4 = or i32 %1, %3 5743 ret i32 %4 5744 } 5745 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 5746 5747 define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 5748 ; GENERIC-LABEL: test_unpckhps: 5749 ; GENERIC: # %bb.0: 5750 ; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5751 ; GENERIC-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 5752 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5753 ; GENERIC-NEXT: retq # sched: [1:1.00] 5754 ; 5755 ; ATOM-LABEL: test_unpckhps: 5756 ; ATOM: # %bb.0: 5757 ; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5758 ; ATOM-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] 5759 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 5760 ; ATOM-NEXT: retq # sched: [79:39.50] 5761 ; 5762 ; SLM-LABEL: test_unpckhps: 5763 ; SLM: # %bb.0: 5764 ; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5765 ; SLM-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00] 5766 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5767 ; SLM-NEXT: retq # sched: [4:1.00] 5768 ; 5769 ; SANDY-SSE-LABEL: test_unpckhps: 5770 ; SANDY-SSE: # %bb.0: 5771 ; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5772 ; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 5773 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5774 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5775 ; 5776 ; SANDY-LABEL: test_unpckhps: 5777 ; SANDY: # %bb.0: 5778 ; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5779 ; SANDY-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 5780 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5781 ; SANDY-NEXT: retq # sched: [1:1.00] 5782 ; 5783 ; HASWELL-SSE-LABEL: test_unpckhps: 5784 ; HASWELL-SSE: # %bb.0: 5785 ; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5786 ; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 5787 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5788 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5789 ; 5790 ; HASWELL-LABEL: test_unpckhps: 5791 ; HASWELL: # %bb.0: 5792 ; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5793 ; HASWELL-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 5794 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5795 ; HASWELL-NEXT: retq # sched: [7:1.00] 5796 ; 5797 ; BROADWELL-SSE-LABEL: test_unpckhps: 5798 ; BROADWELL-SSE: # %bb.0: 5799 ; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5800 ; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] 5801 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5802 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5803 ; 5804 ; BROADWELL-LABEL: test_unpckhps: 5805 ; BROADWELL: # %bb.0: 5806 ; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5807 ; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] 5808 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5809 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5810 ; 5811 ; SKYLAKE-SSE-LABEL: test_unpckhps: 5812 ; SKYLAKE-SSE: # %bb.0: 5813 ; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5814 ; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 5815 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 5816 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5817 ; 5818 ; SKYLAKE-LABEL: test_unpckhps: 5819 ; SKYLAKE: # %bb.0: 5820 ; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5821 ; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 5822 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5823 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5824 ; 5825 ; SKX-SSE-LABEL: test_unpckhps: 5826 ; SKX-SSE: # %bb.0: 5827 ; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5828 ; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 5829 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 5830 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5831 ; 5832 ; SKX-LABEL: test_unpckhps: 5833 ; SKX: # %bb.0: 5834 ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 5835 ; SKX-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 5836 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5837 ; SKX-NEXT: retq # sched: [7:1.00] 5838 ; 5839 ; BTVER2-SSE-LABEL: test_unpckhps: 5840 ; BTVER2-SSE: # %bb.0: 5841 ; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 5842 ; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] 5843 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5844 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5845 ; 5846 ; BTVER2-LABEL: test_unpckhps: 5847 ; BTVER2: # %bb.0: 5848 ; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 5849 ; BTVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] 5850 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5851 ; BTVER2-NEXT: retq # sched: [4:1.00] 5852 ; 5853 ; ZNVER1-SSE-LABEL: test_unpckhps: 5854 ; ZNVER1-SSE: # %bb.0: 5855 ; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 5856 ; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] 5857 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5858 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5859 ; 5860 ; ZNVER1-LABEL: test_unpckhps: 5861 ; ZNVER1: # %bb.0: 5862 ; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 5863 ; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] 5864 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5865 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5866 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 5867 %2 = load <4 x float>, <4 x float> *%a2, align 16 5868 %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 5869 %4 = fadd <4 x float> %1, %3 5870 ret <4 x float> %4 5871 } 5872 5873 define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 5874 ; GENERIC-LABEL: test_unpcklps: 5875 ; GENERIC: # %bb.0: 5876 ; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5877 ; GENERIC-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 5878 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5879 ; GENERIC-NEXT: retq # sched: [1:1.00] 5880 ; 5881 ; ATOM-LABEL: test_unpcklps: 5882 ; ATOM: # %bb.0: 5883 ; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5884 ; ATOM-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] 5885 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 5886 ; ATOM-NEXT: retq # sched: [79:39.50] 5887 ; 5888 ; SLM-LABEL: test_unpcklps: 5889 ; SLM: # %bb.0: 5890 ; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5891 ; SLM-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00] 5892 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5893 ; SLM-NEXT: retq # sched: [4:1.00] 5894 ; 5895 ; SANDY-SSE-LABEL: test_unpcklps: 5896 ; SANDY-SSE: # %bb.0: 5897 ; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5898 ; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 5899 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5900 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5901 ; 5902 ; SANDY-LABEL: test_unpcklps: 5903 ; SANDY: # %bb.0: 5904 ; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5905 ; SANDY-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 5906 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5907 ; SANDY-NEXT: retq # sched: [1:1.00] 5908 ; 5909 ; HASWELL-SSE-LABEL: test_unpcklps: 5910 ; HASWELL-SSE: # %bb.0: 5911 ; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5912 ; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 5913 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5914 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5915 ; 5916 ; HASWELL-LABEL: test_unpcklps: 5917 ; HASWELL: # %bb.0: 5918 ; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5919 ; HASWELL-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 5920 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5921 ; HASWELL-NEXT: retq # sched: [7:1.00] 5922 ; 5923 ; BROADWELL-SSE-LABEL: test_unpcklps: 5924 ; BROADWELL-SSE: # %bb.0: 5925 ; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5926 ; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] 5927 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5928 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5929 ; 5930 ; BROADWELL-LABEL: test_unpcklps: 5931 ; BROADWELL: # %bb.0: 5932 ; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5933 ; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] 5934 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5935 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5936 ; 5937 ; SKYLAKE-SSE-LABEL: test_unpcklps: 5938 ; SKYLAKE-SSE: # %bb.0: 5939 ; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5940 ; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 5941 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 5942 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5943 ; 5944 ; SKYLAKE-LABEL: test_unpcklps: 5945 ; SKYLAKE: # %bb.0: 5946 ; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5947 ; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 5948 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5949 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5950 ; 5951 ; SKX-SSE-LABEL: test_unpcklps: 5952 ; SKX-SSE: # %bb.0: 5953 ; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5954 ; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 5955 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 5956 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5957 ; 5958 ; SKX-LABEL: test_unpcklps: 5959 ; SKX: # %bb.0: 5960 ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 5961 ; SKX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 5962 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5963 ; SKX-NEXT: retq # sched: [7:1.00] 5964 ; 5965 ; BTVER2-SSE-LABEL: test_unpcklps: 5966 ; BTVER2-SSE: # %bb.0: 5967 ; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] 5968 ; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] 5969 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5970 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5971 ; 5972 ; BTVER2-LABEL: test_unpcklps: 5973 ; BTVER2: # %bb.0: 5974 ; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] 5975 ; BTVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] 5976 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5977 ; BTVER2-NEXT: retq # sched: [4:1.00] 5978 ; 5979 ; ZNVER1-SSE-LABEL: test_unpcklps: 5980 ; ZNVER1-SSE: # %bb.0: 5981 ; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] 5982 ; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] 5983 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 5984 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5985 ; 5986 ; ZNVER1-LABEL: test_unpcklps: 5987 ; ZNVER1: # %bb.0: 5988 ; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] 5989 ; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] 5990 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5991 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5992 %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 5993 %2 = load <4 x float>, <4 x float> *%a2, align 16 5994 %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 5995 %4 = fadd <4 x float> %1, %3 5996 ret <4 x float> %4 5997 } 5998 5999 define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { 6000 ; GENERIC-LABEL: test_xorps: 6001 ; GENERIC: # %bb.0: 6002 ; GENERIC-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] 6003 ; GENERIC-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] 6004 ; GENERIC-NEXT: retq # sched: [1:1.00] 6005 ; 6006 ; ATOM-LABEL: test_xorps: 6007 ; ATOM: # %bb.0: 6008 ; ATOM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50] 6009 ; ATOM-NEXT: xorps (%rdi), %xmm0 # sched: [1:1.00] 6010 ; ATOM-NEXT: nop # sched: [1:0.50] 6011 ; ATOM-NEXT: nop # sched: [1:0.50] 6012 ; ATOM-NEXT: nop # sched: [1:0.50] 6013 ; ATOM-NEXT: nop # sched: [1:0.50] 6014 ; ATOM-NEXT: retq # sched: [79:39.50] 6015 ; 6016 ; SLM-LABEL: test_xorps: 6017 ; SLM: # %bb.0: 6018 ; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50] 6019 ; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00] 6020 ; SLM-NEXT: retq # sched: [4:1.00] 6021 ; 6022 ; SANDY-SSE-LABEL: test_xorps: 6023 ; SANDY-SSE: # %bb.0: 6024 ; SANDY-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] 6025 ; SANDY-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] 6026 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6027 ; 6028 ; SANDY-LABEL: test_xorps: 6029 ; SANDY: # %bb.0: 6030 ; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6031 ; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6032 ; SANDY-NEXT: retq # sched: [1:1.00] 6033 ; 6034 ; HASWELL-SSE-LABEL: test_xorps: 6035 ; HASWELL-SSE: # %bb.0: 6036 ; HASWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] 6037 ; HASWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] 6038 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6039 ; 6040 ; HASWELL-LABEL: test_xorps: 6041 ; HASWELL: # %bb.0: 6042 ; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6043 ; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6044 ; HASWELL-NEXT: retq # sched: [7:1.00] 6045 ; 6046 ; BROADWELL-SSE-LABEL: test_xorps: 6047 ; BROADWELL-SSE: # %bb.0: 6048 ; BROADWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] 6049 ; BROADWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00] 6050 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6051 ; 6052 ; BROADWELL-LABEL: test_xorps: 6053 ; BROADWELL: # %bb.0: 6054 ; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6055 ; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6056 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6057 ; 6058 ; SKYLAKE-SSE-LABEL: test_xorps: 6059 ; SKYLAKE-SSE: # %bb.0: 6060 ; SKYLAKE-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33] 6061 ; SKYLAKE-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50] 6062 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6063 ; 6064 ; SKYLAKE-LABEL: test_xorps: 6065 ; SKYLAKE: # %bb.0: 6066 ; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6067 ; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6068 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6069 ; 6070 ; SKX-SSE-LABEL: test_xorps: 6071 ; SKX-SSE: # %bb.0: 6072 ; SKX-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33] 6073 ; SKX-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50] 6074 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6075 ; 6076 ; SKX-LABEL: test_xorps: 6077 ; SKX: # %bb.0: 6078 ; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6079 ; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6080 ; SKX-NEXT: retq # sched: [7:1.00] 6081 ; 6082 ; BTVER2-SSE-LABEL: test_xorps: 6083 ; BTVER2-SSE: # %bb.0: 6084 ; BTVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50] 6085 ; BTVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00] 6086 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6087 ; 6088 ; BTVER2-LABEL: test_xorps: 6089 ; BTVER2: # %bb.0: 6090 ; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6091 ; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6092 ; BTVER2-NEXT: retq # sched: [4:1.00] 6093 ; 6094 ; ZNVER1-SSE-LABEL: test_xorps: 6095 ; ZNVER1-SSE: # %bb.0: 6096 ; ZNVER1-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.25] 6097 ; ZNVER1-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [8:0.50] 6098 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6099 ; 6100 ; ZNVER1-LABEL: test_xorps: 6101 ; ZNVER1: # %bb.0: 6102 ; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 6103 ; ZNVER1-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6104 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6105 %1 = bitcast <4 x float> %a0 to <4 x i32> 6106 %2 = bitcast <4 x float> %a1 to <4 x i32> 6107 %3 = xor <4 x i32> %1, %2 6108 %4 = load <4 x float>, <4 x float> *%a2, align 16 6109 %5 = bitcast <4 x float> %4 to <4 x i32> 6110 %6 = xor <4 x i32> %3, %5 6111 %7 = bitcast <4 x i32> %6 to <4 x float> 6112 ret <4 x float> %7 6113 } 6114 6115 ; 'WriteZero' and 'WriteNop' class instructions. 6116 6117 define <4 x float> @test_fnop() nounwind { 6118 ; GENERIC-LABEL: test_fnop: 6119 ; GENERIC: # %bb.0: 6120 ; GENERIC-NEXT: #APP 6121 ; GENERIC-NEXT: nop # sched: [1:0.25] 6122 ; GENERIC-NEXT: #NO_APP 6123 ; GENERIC-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00] 6124 ; GENERIC-NEXT: retq # sched: [1:1.00] 6125 ; 6126 ; ATOM-LABEL: test_fnop: 6127 ; ATOM: # %bb.0: 6128 ; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] 6129 ; ATOM-NEXT: #APP 6130 ; ATOM-NEXT: nop # sched: [1:0.50] 6131 ; ATOM-NEXT: #NO_APP 6132 ; ATOM-NEXT: nop # sched: [1:0.50] 6133 ; ATOM-NEXT: nop # sched: [1:0.50] 6134 ; ATOM-NEXT: nop # sched: [1:0.50] 6135 ; ATOM-NEXT: nop # sched: [1:0.50] 6136 ; ATOM-NEXT: retq # sched: [79:39.50] 6137 ; 6138 ; SLM-LABEL: test_fnop: 6139 ; SLM: # %bb.0: 6140 ; SLM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] 6141 ; SLM-NEXT: #APP 6142 ; SLM-NEXT: nop # sched: [1:0.50] 6143 ; SLM-NEXT: #NO_APP 6144 ; SLM-NEXT: retq # sched: [4:1.00] 6145 ; 6146 ; SANDY-SSE-LABEL: test_fnop: 6147 ; SANDY-SSE: # %bb.0: 6148 ; SANDY-SSE-NEXT: #APP 6149 ; SANDY-SSE-NEXT: nop # sched: [1:0.25] 6150 ; SANDY-SSE-NEXT: #NO_APP 6151 ; SANDY-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00] 6152 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6153 ; 6154 ; SANDY-LABEL: test_fnop: 6155 ; SANDY: # %bb.0: 6156 ; SANDY-NEXT: #APP 6157 ; SANDY-NEXT: nop # sched: [1:0.25] 6158 ; SANDY-NEXT: #NO_APP 6159 ; SANDY-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] 6160 ; SANDY-NEXT: retq # sched: [1:1.00] 6161 ; 6162 ; HASWELL-SSE-LABEL: test_fnop: 6163 ; HASWELL-SSE: # %bb.0: 6164 ; HASWELL-SSE-NEXT: #APP 6165 ; HASWELL-SSE-NEXT: nop # sched: [1:0.25] 6166 ; HASWELL-SSE-NEXT: #NO_APP 6167 ; HASWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00] 6168 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6169 ; 6170 ; HASWELL-LABEL: test_fnop: 6171 ; HASWELL: # %bb.0: 6172 ; HASWELL-NEXT: #APP 6173 ; HASWELL-NEXT: nop # sched: [1:0.25] 6174 ; HASWELL-NEXT: #NO_APP 6175 ; HASWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] 6176 ; HASWELL-NEXT: retq # sched: [7:1.00] 6177 ; 6178 ; BROADWELL-SSE-LABEL: test_fnop: 6179 ; BROADWELL-SSE: # %bb.0: 6180 ; BROADWELL-SSE-NEXT: #APP 6181 ; BROADWELL-SSE-NEXT: nop # sched: [1:0.25] 6182 ; BROADWELL-SSE-NEXT: #NO_APP 6183 ; BROADWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00] 6184 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6185 ; 6186 ; BROADWELL-LABEL: test_fnop: 6187 ; BROADWELL: # %bb.0: 6188 ; BROADWELL-NEXT: #APP 6189 ; BROADWELL-NEXT: nop # sched: [1:0.25] 6190 ; BROADWELL-NEXT: #NO_APP 6191 ; BROADWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] 6192 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6193 ; 6194 ; SKYLAKE-SSE-LABEL: test_fnop: 6195 ; SKYLAKE-SSE: # %bb.0: 6196 ; SKYLAKE-SSE-NEXT: #APP 6197 ; SKYLAKE-SSE-NEXT: nop # sched: [1:0.17] 6198 ; SKYLAKE-SSE-NEXT: #NO_APP 6199 ; SKYLAKE-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33] 6200 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6201 ; 6202 ; SKYLAKE-LABEL: test_fnop: 6203 ; SKYLAKE: # %bb.0: 6204 ; SKYLAKE-NEXT: #APP 6205 ; SKYLAKE-NEXT: nop # sched: [1:0.17] 6206 ; SKYLAKE-NEXT: #NO_APP 6207 ; SKYLAKE-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 6208 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6209 ; 6210 ; SKX-SSE-LABEL: test_fnop: 6211 ; SKX-SSE: # %bb.0: 6212 ; SKX-SSE-NEXT: #APP 6213 ; SKX-SSE-NEXT: nop # sched: [1:0.17] 6214 ; SKX-SSE-NEXT: #NO_APP 6215 ; SKX-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33] 6216 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6217 ; 6218 ; SKX-LABEL: test_fnop: 6219 ; SKX: # %bb.0: 6220 ; SKX-NEXT: #APP 6221 ; SKX-NEXT: nop # sched: [1:0.17] 6222 ; SKX-NEXT: #NO_APP 6223 ; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 6224 ; SKX-NEXT: retq # sched: [7:1.00] 6225 ; 6226 ; BTVER2-SSE-LABEL: test_fnop: 6227 ; BTVER2-SSE: # %bb.0: 6228 ; BTVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.50] 6229 ; BTVER2-SSE-NEXT: #APP 6230 ; BTVER2-SSE-NEXT: nop # sched: [1:0.50] 6231 ; BTVER2-SSE-NEXT: #NO_APP 6232 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6233 ; 6234 ; BTVER2-LABEL: test_fnop: 6235 ; BTVER2: # %bb.0: 6236 ; BTVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.50] 6237 ; BTVER2-NEXT: #APP 6238 ; BTVER2-NEXT: nop # sched: [1:0.50] 6239 ; BTVER2-NEXT: #NO_APP 6240 ; BTVER2-NEXT: retq # sched: [4:1.00] 6241 ; 6242 ; ZNVER1-SSE-LABEL: test_fnop: 6243 ; ZNVER1-SSE: # %bb.0: 6244 ; ZNVER1-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.25] 6245 ; ZNVER1-SSE-NEXT: #APP 6246 ; ZNVER1-SSE-NEXT: nop # sched: [1:0.25] 6247 ; ZNVER1-SSE-NEXT: #NO_APP 6248 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6249 ; 6250 ; ZNVER1-LABEL: test_fnop: 6251 ; ZNVER1: # %bb.0: 6252 ; ZNVER1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.25] 6253 ; ZNVER1-NEXT: #APP 6254 ; ZNVER1-NEXT: nop # sched: [1:0.25] 6255 ; ZNVER1-NEXT: #NO_APP 6256 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6257 tail call void asm sideeffect "nop", ""() nounwind 6258 ret <4 x float> zeroinitializer 6259 } 6260 6261 !0 = !{i32 1} 6262