1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE 10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL 11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE 12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL 13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE 14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE 15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE 16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX 17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE 18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE 20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 21 22 define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 23 ; GENERIC-LABEL: test_addpd: 24 ; GENERIC: # %bb.0: 25 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 26 ; GENERIC-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] 27 ; GENERIC-NEXT: retq # sched: [1:1.00] 28 ; 29 ; ATOM-LABEL: test_addpd: 30 ; ATOM: # %bb.0: 31 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 32 ; ATOM-NEXT: addpd (%rdi), %xmm0 # sched: [7:3.50] 33 ; ATOM-NEXT: retq # sched: [79:39.50] 34 ; 35 ; SLM-LABEL: test_addpd: 36 ; SLM: # %bb.0: 37 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 38 ; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00] 39 ; SLM-NEXT: retq # sched: [4:1.00] 40 ; 41 ; SANDY-SSE-LABEL: test_addpd: 42 ; SANDY-SSE: # %bb.0: 43 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 44 ; SANDY-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] 45 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 46 ; 47 ; SANDY-LABEL: test_addpd: 48 ; SANDY: # %bb.0: 49 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 50 ; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 51 ; SANDY-NEXT: retq # sched: [1:1.00] 52 ; 53 ; HASWELL-SSE-LABEL: test_addpd: 54 ; HASWELL-SSE: # %bb.0: 55 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 56 ; HASWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] 57 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 58 ; 59 ; HASWELL-LABEL: test_addpd: 60 ; HASWELL: # %bb.0: 61 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 62 ; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 63 ; HASWELL-NEXT: retq # sched: [7:1.00] 64 ; 65 ; BROADWELL-SSE-LABEL: test_addpd: 66 ; BROADWELL-SSE: # %bb.0: 67 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 68 ; BROADWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] 69 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 70 ; 71 ; BROADWELL-LABEL: test_addpd: 72 ; BROADWELL: # %bb.0: 73 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 74 ; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 75 ; BROADWELL-NEXT: retq # sched: [7:1.00] 76 ; 77 ; SKYLAKE-SSE-LABEL: test_addpd: 78 ; SKYLAKE-SSE: # %bb.0: 79 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 80 ; SKYLAKE-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] 81 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 82 ; 83 ; SKYLAKE-LABEL: test_addpd: 84 ; SKYLAKE: # %bb.0: 85 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 86 ; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 87 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 88 ; 89 ; SKX-SSE-LABEL: test_addpd: 90 ; SKX-SSE: # %bb.0: 91 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 92 ; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] 93 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 94 ; 95 ; SKX-LABEL: test_addpd: 96 ; SKX: # %bb.0: 97 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 98 ; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 99 ; SKX-NEXT: retq # sched: [7:1.00] 100 ; 101 ; BTVER2-SSE-LABEL: test_addpd: 102 ; BTVER2-SSE: # %bb.0: 103 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 104 ; BTVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] 105 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 106 ; 107 ; BTVER2-LABEL: test_addpd: 108 ; BTVER2: # %bb.0: 109 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 110 ; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 111 ; BTVER2-NEXT: retq # sched: [4:1.00] 112 ; 113 ; ZNVER1-SSE-LABEL: test_addpd: 114 ; ZNVER1-SSE: # %bb.0: 115 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 116 ; ZNVER1-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00] 117 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 118 ; 119 ; ZNVER1-LABEL: test_addpd: 120 ; ZNVER1: # %bb.0: 121 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 122 ; ZNVER1-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 123 ; ZNVER1-NEXT: retq # sched: [1:0.50] 124 %1 = fadd <2 x double> %a0, %a1 125 %2 = load <2 x double>, <2 x double> *%a2, align 16 126 %3 = fadd <2 x double> %1, %2 127 ret <2 x double> %3 128 } 129 130 define double @test_addsd(double %a0, double %a1, double *%a2) { 131 ; GENERIC-LABEL: test_addsd: 132 ; GENERIC: # %bb.0: 133 ; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 134 ; GENERIC-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] 135 ; GENERIC-NEXT: retq # sched: [1:1.00] 136 ; 137 ; ATOM-LABEL: test_addsd: 138 ; ATOM: # %bb.0: 139 ; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00] 140 ; ATOM-NEXT: addsd (%rdi), %xmm0 # sched: [5:5.00] 141 ; ATOM-NEXT: retq # sched: [79:39.50] 142 ; 143 ; SLM-LABEL: test_addsd: 144 ; SLM: # %bb.0: 145 ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 146 ; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00] 147 ; SLM-NEXT: retq # sched: [4:1.00] 148 ; 149 ; SANDY-SSE-LABEL: test_addsd: 150 ; SANDY-SSE: # %bb.0: 151 ; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 152 ; SANDY-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] 153 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 154 ; 155 ; SANDY-LABEL: test_addsd: 156 ; SANDY: # %bb.0: 157 ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 158 ; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 159 ; SANDY-NEXT: retq # sched: [1:1.00] 160 ; 161 ; HASWELL-SSE-LABEL: test_addsd: 162 ; HASWELL-SSE: # %bb.0: 163 ; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 164 ; HASWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] 165 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 166 ; 167 ; HASWELL-LABEL: test_addsd: 168 ; HASWELL: # %bb.0: 169 ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 170 ; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 171 ; HASWELL-NEXT: retq # sched: [7:1.00] 172 ; 173 ; BROADWELL-SSE-LABEL: test_addsd: 174 ; BROADWELL-SSE: # %bb.0: 175 ; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 176 ; BROADWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] 177 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 178 ; 179 ; BROADWELL-LABEL: test_addsd: 180 ; BROADWELL: # %bb.0: 181 ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 182 ; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 183 ; BROADWELL-NEXT: retq # sched: [7:1.00] 184 ; 185 ; SKYLAKE-SSE-LABEL: test_addsd: 186 ; SKYLAKE-SSE: # %bb.0: 187 ; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] 188 ; SKYLAKE-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] 189 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 190 ; 191 ; SKYLAKE-LABEL: test_addsd: 192 ; SKYLAKE: # %bb.0: 193 ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 194 ; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 195 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 196 ; 197 ; SKX-SSE-LABEL: test_addsd: 198 ; SKX-SSE: # %bb.0: 199 ; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] 200 ; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] 201 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 202 ; 203 ; SKX-LABEL: test_addsd: 204 ; SKX: # %bb.0: 205 ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 206 ; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 207 ; SKX-NEXT: retq # sched: [7:1.00] 208 ; 209 ; BTVER2-SSE-LABEL: test_addsd: 210 ; BTVER2-SSE: # %bb.0: 211 ; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 212 ; BTVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] 213 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 214 ; 215 ; BTVER2-LABEL: test_addsd: 216 ; BTVER2: # %bb.0: 217 ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 218 ; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 219 ; BTVER2-NEXT: retq # sched: [4:1.00] 220 ; 221 ; ZNVER1-SSE-LABEL: test_addsd: 222 ; ZNVER1-SSE: # %bb.0: 223 ; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 224 ; ZNVER1-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00] 225 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 226 ; 227 ; ZNVER1-LABEL: test_addsd: 228 ; ZNVER1: # %bb.0: 229 ; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 230 ; ZNVER1-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 231 ; ZNVER1-NEXT: retq # sched: [1:0.50] 232 %1 = fadd double %a0, %a1 233 %2 = load double, double *%a2, align 8 234 %3 = fadd double %1, %2 235 ret double %3 236 } 237 238 define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 239 ; GENERIC-LABEL: test_andpd: 240 ; GENERIC: # %bb.0: 241 ; GENERIC-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] 242 ; GENERIC-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] 243 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 244 ; GENERIC-NEXT: retq # sched: [1:1.00] 245 ; 246 ; ATOM-LABEL: test_andpd: 247 ; ATOM: # %bb.0: 248 ; ATOM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] 249 ; ATOM-NEXT: andpd (%rdi), %xmm0 # sched: [1:1.00] 250 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 251 ; ATOM-NEXT: retq # sched: [79:39.50] 252 ; 253 ; SLM-LABEL: test_andpd: 254 ; SLM: # %bb.0: 255 ; SLM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] 256 ; SLM-NEXT: andpd (%rdi), %xmm0 # sched: [4:1.00] 257 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 258 ; SLM-NEXT: retq # sched: [4:1.00] 259 ; 260 ; SANDY-SSE-LABEL: test_andpd: 261 ; SANDY-SSE: # %bb.0: 262 ; SANDY-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] 263 ; SANDY-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] 264 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 265 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 266 ; 267 ; SANDY-LABEL: test_andpd: 268 ; SANDY: # %bb.0: 269 ; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 270 ; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 271 ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 272 ; SANDY-NEXT: retq # sched: [1:1.00] 273 ; 274 ; HASWELL-SSE-LABEL: test_andpd: 275 ; HASWELL-SSE: # %bb.0: 276 ; HASWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] 277 ; HASWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] 278 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 279 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 280 ; 281 ; HASWELL-LABEL: test_andpd: 282 ; HASWELL: # %bb.0: 283 ; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 284 ; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 285 ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 286 ; HASWELL-NEXT: retq # sched: [7:1.00] 287 ; 288 ; BROADWELL-SSE-LABEL: test_andpd: 289 ; BROADWELL-SSE: # %bb.0: 290 ; BROADWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] 291 ; BROADWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] 292 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 293 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 294 ; 295 ; BROADWELL-LABEL: test_andpd: 296 ; BROADWELL: # %bb.0: 297 ; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 298 ; BROADWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 299 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 300 ; BROADWELL-NEXT: retq # sched: [7:1.00] 301 ; 302 ; SKYLAKE-SSE-LABEL: test_andpd: 303 ; SKYLAKE-SSE: # %bb.0: 304 ; SKYLAKE-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] 305 ; SKYLAKE-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] 306 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 307 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 308 ; 309 ; SKYLAKE-LABEL: test_andpd: 310 ; SKYLAKE: # %bb.0: 311 ; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 312 ; SKYLAKE-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 313 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 314 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 315 ; 316 ; SKX-SSE-LABEL: test_andpd: 317 ; SKX-SSE: # %bb.0: 318 ; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] 319 ; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] 320 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 321 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 322 ; 323 ; SKX-LABEL: test_andpd: 324 ; SKX: # %bb.0: 325 ; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 326 ; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 327 ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 328 ; SKX-NEXT: retq # sched: [7:1.00] 329 ; 330 ; BTVER2-SSE-LABEL: test_andpd: 331 ; BTVER2-SSE: # %bb.0: 332 ; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] 333 ; BTVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] 334 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 335 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 336 ; 337 ; BTVER2-LABEL: test_andpd: 338 ; BTVER2: # %bb.0: 339 ; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 340 ; BTVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 341 ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 342 ; BTVER2-NEXT: retq # sched: [4:1.00] 343 ; 344 ; ZNVER1-SSE-LABEL: test_andpd: 345 ; ZNVER1-SSE: # %bb.0: 346 ; ZNVER1-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.25] 347 ; ZNVER1-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [8:0.50] 348 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 349 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 350 ; 351 ; ZNVER1-LABEL: test_andpd: 352 ; ZNVER1: # %bb.0: 353 ; ZNVER1-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 354 ; ZNVER1-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 355 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 356 ; ZNVER1-NEXT: retq # sched: [1:0.50] 357 %1 = bitcast <2 x double> %a0 to <4 x i32> 358 %2 = bitcast <2 x double> %a1 to <4 x i32> 359 %3 = and <4 x i32> %1, %2 360 %4 = load <2 x double>, <2 x double> *%a2, align 16 361 %5 = bitcast <2 x double> %4 to <4 x i32> 362 %6 = and <4 x i32> %3, %5 363 %7 = bitcast <4 x i32> %6 to <2 x double> 364 %8 = fadd <2 x double> %a1, %7 365 ret <2 x double> %8 366 } 367 368 define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 369 ; GENERIC-LABEL: test_andnotpd: 370 ; GENERIC: # %bb.0: 371 ; GENERIC-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] 372 ; GENERIC-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] 373 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 374 ; GENERIC-NEXT: retq # sched: [1:1.00] 375 ; 376 ; ATOM-LABEL: test_andnotpd: 377 ; ATOM: # %bb.0: 378 ; ATOM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] 379 ; ATOM-NEXT: andnpd (%rdi), %xmm0 # sched: [1:1.00] 380 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 381 ; ATOM-NEXT: retq # sched: [79:39.50] 382 ; 383 ; SLM-LABEL: test_andnotpd: 384 ; SLM: # %bb.0: 385 ; SLM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] 386 ; SLM-NEXT: andnpd (%rdi), %xmm0 # sched: [4:1.00] 387 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 388 ; SLM-NEXT: retq # sched: [4:1.00] 389 ; 390 ; SANDY-SSE-LABEL: test_andnotpd: 391 ; SANDY-SSE: # %bb.0: 392 ; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] 393 ; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] 394 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 395 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 396 ; 397 ; SANDY-LABEL: test_andnotpd: 398 ; SANDY: # %bb.0: 399 ; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 400 ; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 401 ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 402 ; SANDY-NEXT: retq # sched: [1:1.00] 403 ; 404 ; HASWELL-SSE-LABEL: test_andnotpd: 405 ; HASWELL-SSE: # %bb.0: 406 ; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] 407 ; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] 408 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 409 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 410 ; 411 ; HASWELL-LABEL: test_andnotpd: 412 ; HASWELL: # %bb.0: 413 ; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 414 ; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 415 ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 416 ; HASWELL-NEXT: retq # sched: [7:1.00] 417 ; 418 ; BROADWELL-SSE-LABEL: test_andnotpd: 419 ; BROADWELL-SSE: # %bb.0: 420 ; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] 421 ; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] 422 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 423 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 424 ; 425 ; BROADWELL-LABEL: test_andnotpd: 426 ; BROADWELL: # %bb.0: 427 ; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 428 ; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 429 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 430 ; BROADWELL-NEXT: retq # sched: [7:1.00] 431 ; 432 ; SKYLAKE-SSE-LABEL: test_andnotpd: 433 ; SKYLAKE-SSE: # %bb.0: 434 ; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] 435 ; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] 436 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 437 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 438 ; 439 ; SKYLAKE-LABEL: test_andnotpd: 440 ; SKYLAKE: # %bb.0: 441 ; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 442 ; SKYLAKE-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 443 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 444 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 445 ; 446 ; SKX-SSE-LABEL: test_andnotpd: 447 ; SKX-SSE: # %bb.0: 448 ; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] 449 ; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] 450 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 451 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 452 ; 453 ; SKX-LABEL: test_andnotpd: 454 ; SKX: # %bb.0: 455 ; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 456 ; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 457 ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 458 ; SKX-NEXT: retq # sched: [7:1.00] 459 ; 460 ; BTVER2-SSE-LABEL: test_andnotpd: 461 ; BTVER2-SSE: # %bb.0: 462 ; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] 463 ; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] 464 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 465 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 466 ; 467 ; BTVER2-LABEL: test_andnotpd: 468 ; BTVER2: # %bb.0: 469 ; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 470 ; BTVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 471 ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 472 ; BTVER2-NEXT: retq # sched: [4:1.00] 473 ; 474 ; ZNVER1-SSE-LABEL: test_andnotpd: 475 ; ZNVER1-SSE: # %bb.0: 476 ; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25] 477 ; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50] 478 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 479 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 480 ; 481 ; ZNVER1-LABEL: test_andnotpd: 482 ; ZNVER1: # %bb.0: 483 ; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 484 ; ZNVER1-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 485 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 486 ; ZNVER1-NEXT: retq # sched: [1:0.50] 487 %1 = bitcast <2 x double> %a0 to <4 x i32> 488 %2 = bitcast <2 x double> %a1 to <4 x i32> 489 %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1> 490 %4 = and <4 x i32> %3, %2 491 %5 = load <2 x double>, <2 x double> *%a2, align 16 492 %6 = bitcast <2 x double> %5 to <4 x i32> 493 %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1> 494 %8 = and <4 x i32> %6, %7 495 %9 = bitcast <4 x i32> %8 to <2 x double> 496 %10 = fadd <2 x double> %a1, %9 497 ret <2 x double> %10 498 } 499 500 define void @test_clflush(i8* %p){ 501 ; GENERIC-LABEL: test_clflush: 502 ; GENERIC: # %bb.0: 503 ; GENERIC-NEXT: clflush (%rdi) # sched: [5:1.00] 504 ; GENERIC-NEXT: retq # sched: [1:1.00] 505 ; 506 ; ATOM-LABEL: test_clflush: 507 ; ATOM: # %bb.0: 508 ; ATOM-NEXT: clflush (%rdi) # sched: [1:1.00] 509 ; ATOM-NEXT: nop # sched: [1:0.50] 510 ; ATOM-NEXT: nop # sched: [1:0.50] 511 ; ATOM-NEXT: nop # sched: [1:0.50] 512 ; ATOM-NEXT: nop # sched: [1:0.50] 513 ; ATOM-NEXT: nop # sched: [1:0.50] 514 ; ATOM-NEXT: nop # sched: [1:0.50] 515 ; ATOM-NEXT: retq # sched: [79:39.50] 516 ; 517 ; SLM-LABEL: test_clflush: 518 ; SLM: # %bb.0: 519 ; SLM-NEXT: clflush (%rdi) # sched: [3:1.00] 520 ; SLM-NEXT: retq # sched: [4:1.00] 521 ; 522 ; SANDY-SSE-LABEL: test_clflush: 523 ; SANDY-SSE: # %bb.0: 524 ; SANDY-SSE-NEXT: clflush (%rdi) # sched: [5:1.00] 525 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 526 ; 527 ; SANDY-LABEL: test_clflush: 528 ; SANDY: # %bb.0: 529 ; SANDY-NEXT: clflush (%rdi) # sched: [5:1.00] 530 ; SANDY-NEXT: retq # sched: [1:1.00] 531 ; 532 ; HASWELL-SSE-LABEL: test_clflush: 533 ; HASWELL-SSE: # %bb.0: 534 ; HASWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] 535 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 536 ; 537 ; HASWELL-LABEL: test_clflush: 538 ; HASWELL: # %bb.0: 539 ; HASWELL-NEXT: clflush (%rdi) # sched: [2:1.00] 540 ; HASWELL-NEXT: retq # sched: [7:1.00] 541 ; 542 ; BROADWELL-SSE-LABEL: test_clflush: 543 ; BROADWELL-SSE: # %bb.0: 544 ; BROADWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] 545 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 546 ; 547 ; BROADWELL-LABEL: test_clflush: 548 ; BROADWELL: # %bb.0: 549 ; BROADWELL-NEXT: clflush (%rdi) # sched: [2:1.00] 550 ; BROADWELL-NEXT: retq # sched: [7:1.00] 551 ; 552 ; SKYLAKE-SSE-LABEL: test_clflush: 553 ; SKYLAKE-SSE: # %bb.0: 554 ; SKYLAKE-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] 555 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 556 ; 557 ; SKYLAKE-LABEL: test_clflush: 558 ; SKYLAKE: # %bb.0: 559 ; SKYLAKE-NEXT: clflush (%rdi) # sched: [2:1.00] 560 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 561 ; 562 ; SKX-SSE-LABEL: test_clflush: 563 ; SKX-SSE: # %bb.0: 564 ; SKX-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] 565 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 566 ; 567 ; SKX-LABEL: test_clflush: 568 ; SKX: # %bb.0: 569 ; SKX-NEXT: clflush (%rdi) # sched: [2:1.00] 570 ; SKX-NEXT: retq # sched: [7:1.00] 571 ; 572 ; BTVER2-SSE-LABEL: test_clflush: 573 ; BTVER2-SSE: # %bb.0: 574 ; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00] 575 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 576 ; 577 ; BTVER2-LABEL: test_clflush: 578 ; BTVER2: # %bb.0: 579 ; BTVER2-NEXT: clflush (%rdi) # sched: [5:1.00] 580 ; BTVER2-NEXT: retq # sched: [4:1.00] 581 ; 582 ; ZNVER1-SSE-LABEL: test_clflush: 583 ; ZNVER1-SSE: # %bb.0: 584 ; ZNVER1-SSE-NEXT: clflush (%rdi) # sched: [8:0.50] 585 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 586 ; 587 ; ZNVER1-LABEL: test_clflush: 588 ; ZNVER1: # %bb.0: 589 ; ZNVER1-NEXT: clflush (%rdi) # sched: [8:0.50] 590 ; ZNVER1-NEXT: retq # sched: [1:0.50] 591 tail call void @llvm.x86.sse2.clflush(i8* %p) 592 ret void 593 } 594 declare void @llvm.x86.sse2.clflush(i8*) nounwind 595 596 define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 597 ; GENERIC-LABEL: test_cmppd: 598 ; GENERIC: # %bb.0: 599 ; GENERIC-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] 600 ; GENERIC-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] 601 ; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] 602 ; GENERIC-NEXT: retq # sched: [1:1.00] 603 ; 604 ; ATOM-LABEL: test_cmppd: 605 ; ATOM: # %bb.0: 606 ; ATOM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [6:3.00] 607 ; ATOM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:3.50] 608 ; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] 609 ; ATOM-NEXT: retq # sched: [79:39.50] 610 ; 611 ; SLM-LABEL: test_cmppd: 612 ; SLM: # %bb.0: 613 ; SLM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] 614 ; SLM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [6:1.00] 615 ; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] 616 ; SLM-NEXT: retq # sched: [4:1.00] 617 ; 618 ; SANDY-SSE-LABEL: test_cmppd: 619 ; SANDY-SSE: # %bb.0: 620 ; SANDY-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] 621 ; SANDY-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] 622 ; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] 623 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 624 ; 625 ; SANDY-LABEL: test_cmppd: 626 ; SANDY: # %bb.0: 627 ; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 628 ; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 629 ; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] 630 ; SANDY-NEXT: retq # sched: [1:1.00] 631 ; 632 ; HASWELL-SSE-LABEL: test_cmppd: 633 ; HASWELL-SSE: # %bb.0: 634 ; HASWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] 635 ; HASWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] 636 ; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] 637 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 638 ; 639 ; HASWELL-LABEL: test_cmppd: 640 ; HASWELL: # %bb.0: 641 ; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 642 ; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 643 ; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] 644 ; HASWELL-NEXT: retq # sched: [7:1.00] 645 ; 646 ; BROADWELL-SSE-LABEL: test_cmppd: 647 ; BROADWELL-SSE: # %bb.0: 648 ; BROADWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] 649 ; BROADWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [8:1.00] 650 ; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] 651 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 652 ; 653 ; BROADWELL-LABEL: test_cmppd: 654 ; BROADWELL: # %bb.0: 655 ; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 656 ; BROADWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 657 ; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] 658 ; BROADWELL-NEXT: retq # sched: [7:1.00] 659 ; 660 ; SKYLAKE-SSE-LABEL: test_cmppd: 661 ; SKYLAKE-SSE: # %bb.0: 662 ; SKYLAKE-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] 663 ; SKYLAKE-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] 664 ; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] 665 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 666 ; 667 ; SKYLAKE-LABEL: test_cmppd: 668 ; SKYLAKE: # %bb.0: 669 ; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] 670 ; SKYLAKE-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 671 ; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 672 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 673 ; 674 ; SKX-SSE-LABEL: test_cmppd: 675 ; SKX-SSE: # %bb.0: 676 ; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] 677 ; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] 678 ; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] 679 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 680 ; 681 ; SKX-LABEL: test_cmppd: 682 ; SKX: # %bb.0: 683 ; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] 684 ; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 685 ; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 686 ; SKX-NEXT: retq # sched: [7:1.00] 687 ; 688 ; BTVER2-SSE-LABEL: test_cmppd: 689 ; BTVER2-SSE: # %bb.0: 690 ; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00] 691 ; BTVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00] 692 ; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] 693 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 694 ; 695 ; BTVER2-LABEL: test_cmppd: 696 ; BTVER2: # %bb.0: 697 ; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00] 698 ; BTVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 699 ; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 700 ; BTVER2-NEXT: retq # sched: [4:1.00] 701 ; 702 ; ZNVER1-SSE-LABEL: test_cmppd: 703 ; ZNVER1-SSE: # %bb.0: 704 ; ZNVER1-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] 705 ; ZNVER1-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:1.00] 706 ; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] 707 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 708 ; 709 ; ZNVER1-LABEL: test_cmppd: 710 ; ZNVER1: # %bb.0: 711 ; ZNVER1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] 712 ; ZNVER1-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 713 ; ZNVER1-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.25] 714 ; ZNVER1-NEXT: retq # sched: [1:0.50] 715 %1 = fcmp oeq <2 x double> %a0, %a1 716 %2 = load <2 x double>, <2 x double> *%a2, align 16 717 %3 = fcmp oeq <2 x double> %a0, %2 718 %4 = or <2 x i1> %1, %3 719 %5 = sext <2 x i1> %4 to <2 x i64> 720 %6 = bitcast <2 x i64> %5 to <2 x double> 721 ret <2 x double> %6 722 } 723 724 define double @test_cmpsd(double %a0, double %a1, double *%a2) { 725 ; GENERIC-LABEL: test_cmpsd: 726 ; GENERIC: # %bb.0: 727 ; GENERIC-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] 728 ; GENERIC-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00] 729 ; GENERIC-NEXT: retq # sched: [1:1.00] 730 ; 731 ; ATOM-LABEL: test_cmpsd: 732 ; ATOM: # %bb.0: 733 ; ATOM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [5:5.00] 734 ; ATOM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [5:5.00] 735 ; ATOM-NEXT: retq # sched: [79:39.50] 736 ; 737 ; SLM-LABEL: test_cmpsd: 738 ; SLM: # %bb.0: 739 ; SLM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] 740 ; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00] 741 ; SLM-NEXT: retq # sched: [4:1.00] 742 ; 743 ; SANDY-SSE-LABEL: test_cmpsd: 744 ; SANDY-SSE: # %bb.0: 745 ; SANDY-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] 746 ; SANDY-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00] 747 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 748 ; 749 ; SANDY-LABEL: test_cmpsd: 750 ; SANDY: # %bb.0: 751 ; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 752 ; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 753 ; SANDY-NEXT: retq # sched: [1:1.00] 754 ; 755 ; HASWELL-SSE-LABEL: test_cmpsd: 756 ; HASWELL-SSE: # %bb.0: 757 ; HASWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] 758 ; HASWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] 759 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 760 ; 761 ; HASWELL-LABEL: test_cmpsd: 762 ; HASWELL: # %bb.0: 763 ; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 764 ; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 765 ; HASWELL-NEXT: retq # sched: [7:1.00] 766 ; 767 ; BROADWELL-SSE-LABEL: test_cmpsd: 768 ; BROADWELL-SSE: # %bb.0: 769 ; BROADWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] 770 ; BROADWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] 771 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 772 ; 773 ; BROADWELL-LABEL: test_cmpsd: 774 ; BROADWELL: # %bb.0: 775 ; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 776 ; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 777 ; BROADWELL-NEXT: retq # sched: [7:1.00] 778 ; 779 ; SKYLAKE-SSE-LABEL: test_cmpsd: 780 ; SKYLAKE-SSE: # %bb.0: 781 ; SKYLAKE-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] 782 ; SKYLAKE-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] 783 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 784 ; 785 ; SKYLAKE-LABEL: test_cmpsd: 786 ; SKYLAKE: # %bb.0: 787 ; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 788 ; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 789 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 790 ; 791 ; SKX-SSE-LABEL: test_cmpsd: 792 ; SKX-SSE: # %bb.0: 793 ; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] 794 ; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] 795 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 796 ; 797 ; SKX-LABEL: test_cmpsd: 798 ; SKX: # %bb.0: 799 ; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 800 ; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 801 ; SKX-NEXT: retq # sched: [7:1.00] 802 ; 803 ; BTVER2-SSE-LABEL: test_cmpsd: 804 ; BTVER2-SSE: # %bb.0: 805 ; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00] 806 ; BTVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00] 807 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 808 ; 809 ; BTVER2-LABEL: test_cmpsd: 810 ; BTVER2: # %bb.0: 811 ; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 812 ; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 813 ; BTVER2-NEXT: retq # sched: [4:1.00] 814 ; 815 ; ZNVER1-SSE-LABEL: test_cmpsd: 816 ; ZNVER1-SSE: # %bb.0: 817 ; ZNVER1-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] 818 ; ZNVER1-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [10:1.00] 819 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 820 ; 821 ; ZNVER1-LABEL: test_cmpsd: 822 ; ZNVER1: # %bb.0: 823 ; ZNVER1-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 824 ; ZNVER1-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 825 ; ZNVER1-NEXT: retq # sched: [1:0.50] 826 %1 = insertelement <2 x double> undef, double %a0, i32 0 827 %2 = insertelement <2 x double> undef, double %a1, i32 0 828 %3 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %2, i8 0) 829 %4 = load double, double *%a2, align 8 830 %5 = insertelement <2 x double> undef, double %4, i32 0 831 %6 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %3, <2 x double> %5, i8 0) 832 %7 = extractelement <2 x double> %6, i32 0 833 ret double %7 834 } 835 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 836 837 define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 838 ; GENERIC-LABEL: test_comisd: 839 ; GENERIC: # %bb.0: 840 ; GENERIC-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] 841 ; GENERIC-NEXT: setnp %al # sched: [1:0.50] 842 ; GENERIC-NEXT: sete %cl # sched: [1:0.50] 843 ; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] 844 ; GENERIC-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] 845 ; GENERIC-NEXT: setnp %al # sched: [1:0.50] 846 ; GENERIC-NEXT: sete %dl # sched: [1:0.50] 847 ; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] 848 ; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] 849 ; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] 850 ; GENERIC-NEXT: retq # sched: [1:1.00] 851 ; 852 ; ATOM-LABEL: test_comisd: 853 ; ATOM: # %bb.0: 854 ; ATOM-NEXT: comisd %xmm1, %xmm0 # sched: [9:4.50] 855 ; ATOM-NEXT: setnp %al # sched: [1:0.50] 856 ; ATOM-NEXT: sete %cl # sched: [1:0.50] 857 ; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] 858 ; ATOM-NEXT: comisd (%rdi), %xmm0 # sched: [10:5.00] 859 ; ATOM-NEXT: setnp %al # sched: [1:0.50] 860 ; ATOM-NEXT: sete %dl # sched: [1:0.50] 861 ; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] 862 ; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] 863 ; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] 864 ; ATOM-NEXT: retq # sched: [79:39.50] 865 ; 866 ; SLM-LABEL: test_comisd: 867 ; SLM: # %bb.0: 868 ; SLM-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] 869 ; SLM-NEXT: setnp %al # sched: [1:0.50] 870 ; SLM-NEXT: sete %cl # sched: [1:0.50] 871 ; SLM-NEXT: andb %al, %cl # sched: [1:0.50] 872 ; SLM-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00] 873 ; SLM-NEXT: setnp %al # sched: [1:0.50] 874 ; SLM-NEXT: sete %dl # sched: [1:0.50] 875 ; SLM-NEXT: andb %al, %dl # sched: [1:0.50] 876 ; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] 877 ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] 878 ; SLM-NEXT: retq # sched: [4:1.00] 879 ; 880 ; SANDY-SSE-LABEL: test_comisd: 881 ; SANDY-SSE: # %bb.0: 882 ; SANDY-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] 883 ; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] 884 ; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] 885 ; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] 886 ; SANDY-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] 887 ; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] 888 ; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] 889 ; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] 890 ; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] 891 ; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] 892 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 893 ; 894 ; SANDY-LABEL: test_comisd: 895 ; SANDY: # %bb.0: 896 ; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] 897 ; SANDY-NEXT: setnp %al # sched: [1:0.50] 898 ; SANDY-NEXT: sete %cl # sched: [1:0.50] 899 ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] 900 ; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] 901 ; SANDY-NEXT: setnp %al # sched: [1:0.50] 902 ; SANDY-NEXT: sete %dl # sched: [1:0.50] 903 ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] 904 ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] 905 ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] 906 ; SANDY-NEXT: retq # sched: [1:1.00] 907 ; 908 ; HASWELL-SSE-LABEL: test_comisd: 909 ; HASWELL-SSE: # %bb.0: 910 ; HASWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] 911 ; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 912 ; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] 913 ; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 914 ; HASWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] 915 ; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 916 ; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] 917 ; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 918 ; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 919 ; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 920 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 921 ; 922 ; HASWELL-LABEL: test_comisd: 923 ; HASWELL: # %bb.0: 924 ; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] 925 ; HASWELL-NEXT: setnp %al # sched: [1:0.50] 926 ; HASWELL-NEXT: sete %cl # sched: [1:0.50] 927 ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] 928 ; HASWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] 929 ; HASWELL-NEXT: setnp %al # sched: [1:0.50] 930 ; HASWELL-NEXT: sete %dl # sched: [1:0.50] 931 ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] 932 ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] 933 ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] 934 ; HASWELL-NEXT: retq # sched: [7:1.00] 935 ; 936 ; BROADWELL-SSE-LABEL: test_comisd: 937 ; BROADWELL-SSE: # %bb.0: 938 ; BROADWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] 939 ; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 940 ; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] 941 ; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 942 ; BROADWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] 943 ; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 944 ; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] 945 ; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 946 ; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 947 ; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 948 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 949 ; 950 ; BROADWELL-LABEL: test_comisd: 951 ; BROADWELL: # %bb.0: 952 ; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] 953 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] 954 ; BROADWELL-NEXT: sete %cl # sched: [1:0.50] 955 ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] 956 ; BROADWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] 957 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] 958 ; BROADWELL-NEXT: sete %dl # sched: [1:0.50] 959 ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] 960 ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] 961 ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] 962 ; BROADWELL-NEXT: retq # sched: [7:1.00] 963 ; 964 ; SKYLAKE-SSE-LABEL: test_comisd: 965 ; SKYLAKE-SSE: # %bb.0: 966 ; SKYLAKE-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] 967 ; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] 968 ; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] 969 ; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 970 ; SKYLAKE-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] 971 ; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] 972 ; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] 973 ; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 974 ; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 975 ; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 976 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 977 ; 978 ; SKYLAKE-LABEL: test_comisd: 979 ; SKYLAKE: # %bb.0: 980 ; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] 981 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] 982 ; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] 983 ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] 984 ; SKYLAKE-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] 985 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] 986 ; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] 987 ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] 988 ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] 989 ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 990 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 991 ; 992 ; SKX-SSE-LABEL: test_comisd: 993 ; SKX-SSE: # %bb.0: 994 ; SKX-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] 995 ; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] 996 ; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] 997 ; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 998 ; SKX-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] 999 ; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] 1000 ; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] 1001 ; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 1002 ; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 1003 ; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 1004 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1005 ; 1006 ; SKX-LABEL: test_comisd: 1007 ; SKX: # %bb.0: 1008 ; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] 1009 ; SKX-NEXT: setnp %al # sched: [1:0.50] 1010 ; SKX-NEXT: sete %cl # sched: [1:0.50] 1011 ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] 1012 ; SKX-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] 1013 ; SKX-NEXT: setnp %al # sched: [1:0.50] 1014 ; SKX-NEXT: sete %dl # sched: [1:0.50] 1015 ; SKX-NEXT: andb %al, %dl # sched: [1:0.25] 1016 ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] 1017 ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] 1018 ; SKX-NEXT: retq # sched: [7:1.00] 1019 ; 1020 ; BTVER2-SSE-LABEL: test_comisd: 1021 ; BTVER2-SSE: # %bb.0: 1022 ; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] 1023 ; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] 1024 ; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] 1025 ; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] 1026 ; BTVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] 1027 ; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] 1028 ; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] 1029 ; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] 1030 ; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] 1031 ; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] 1032 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1033 ; 1034 ; BTVER2-LABEL: test_comisd: 1035 ; BTVER2: # %bb.0: 1036 ; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] 1037 ; BTVER2-NEXT: setnp %al # sched: [1:0.50] 1038 ; BTVER2-NEXT: sete %cl # sched: [1:0.50] 1039 ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] 1040 ; BTVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] 1041 ; BTVER2-NEXT: setnp %al # sched: [1:0.50] 1042 ; BTVER2-NEXT: sete %dl # sched: [1:0.50] 1043 ; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] 1044 ; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] 1045 ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] 1046 ; BTVER2-NEXT: retq # sched: [4:1.00] 1047 ; 1048 ; ZNVER1-SSE-LABEL: test_comisd: 1049 ; ZNVER1-SSE: # %bb.0: 1050 ; ZNVER1-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] 1051 ; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] 1052 ; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] 1053 ; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 1054 ; ZNVER1-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [10:1.00] 1055 ; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] 1056 ; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] 1057 ; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 1058 ; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 1059 ; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 1060 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1061 ; 1062 ; ZNVER1-LABEL: test_comisd: 1063 ; ZNVER1: # %bb.0: 1064 ; ZNVER1-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] 1065 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25] 1066 ; ZNVER1-NEXT: sete %cl # sched: [1:0.25] 1067 ; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] 1068 ; ZNVER1-NEXT: vcomisd (%rdi), %xmm0 # sched: [10:1.00] 1069 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25] 1070 ; ZNVER1-NEXT: sete %dl # sched: [1:0.25] 1071 ; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] 1072 ; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] 1073 ; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] 1074 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1075 %1 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) 1076 %2 = load <2 x double>, <2 x double> *%a2, align 8 1077 %3 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %2) 1078 %4 = or i32 %1, %3 1079 ret i32 %4 1080 } 1081 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 1082 1083 define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { 1084 ; GENERIC-LABEL: test_cvtdq2pd: 1085 ; GENERIC: # %bb.0: 1086 ; GENERIC-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] 1087 ; GENERIC-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] 1088 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1089 ; GENERIC-NEXT: retq # sched: [1:1.00] 1090 ; 1091 ; ATOM-LABEL: test_cvtdq2pd: 1092 ; ATOM: # %bb.0: 1093 ; ATOM-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [8:4.00] 1094 ; ATOM-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [7:3.50] 1095 ; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] 1096 ; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] 1097 ; ATOM-NEXT: retq # sched: [79:39.50] 1098 ; 1099 ; SLM-LABEL: test_cvtdq2pd: 1100 ; SLM: # %bb.0: 1101 ; SLM-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50] 1102 ; SLM-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [7:1.00] 1103 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1104 ; SLM-NEXT: retq # sched: [4:1.00] 1105 ; 1106 ; SANDY-SSE-LABEL: test_cvtdq2pd: 1107 ; SANDY-SSE: # %bb.0: 1108 ; SANDY-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] 1109 ; SANDY-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] 1110 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1111 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1112 ; 1113 ; SANDY-LABEL: test_cvtdq2pd: 1114 ; SANDY: # %bb.0: 1115 ; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] 1116 ; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] 1117 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1118 ; SANDY-NEXT: retq # sched: [1:1.00] 1119 ; 1120 ; HASWELL-SSE-LABEL: test_cvtdq2pd: 1121 ; HASWELL-SSE: # %bb.0: 1122 ; HASWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] 1123 ; HASWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] 1124 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1125 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1126 ; 1127 ; HASWELL-LABEL: test_cvtdq2pd: 1128 ; HASWELL: # %bb.0: 1129 ; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] 1130 ; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] 1131 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1132 ; HASWELL-NEXT: retq # sched: [7:1.00] 1133 ; 1134 ; BROADWELL-SSE-LABEL: test_cvtdq2pd: 1135 ; BROADWELL-SSE: # %bb.0: 1136 ; BROADWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] 1137 ; BROADWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] 1138 ; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 1139 ; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] 1140 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1141 ; 1142 ; BROADWELL-LABEL: test_cvtdq2pd: 1143 ; BROADWELL: # %bb.0: 1144 ; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] 1145 ; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] 1146 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1147 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1148 ; 1149 ; SKYLAKE-SSE-LABEL: test_cvtdq2pd: 1150 ; SKYLAKE-SSE: # %bb.0: 1151 ; SKYLAKE-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] 1152 ; SKYLAKE-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] 1153 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 1154 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1155 ; 1156 ; SKYLAKE-LABEL: test_cvtdq2pd: 1157 ; SKYLAKE: # %bb.0: 1158 ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] 1159 ; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] 1160 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1161 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1162 ; 1163 ; SKX-SSE-LABEL: test_cvtdq2pd: 1164 ; SKX-SSE: # %bb.0: 1165 ; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50] 1166 ; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] 1167 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 1168 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1169 ; 1170 ; SKX-LABEL: test_cvtdq2pd: 1171 ; SKX: # %bb.0: 1172 ; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] 1173 ; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] 1174 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1175 ; SKX-NEXT: retq # sched: [7:1.00] 1176 ; 1177 ; BTVER2-SSE-LABEL: test_cvtdq2pd: 1178 ; BTVER2-SSE: # %bb.0: 1179 ; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00] 1180 ; BTVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00] 1181 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1182 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1183 ; 1184 ; BTVER2-LABEL: test_cvtdq2pd: 1185 ; BTVER2: # %bb.0: 1186 ; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] 1187 ; BTVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [3:1.00] 1188 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1189 ; BTVER2-NEXT: retq # sched: [4:1.00] 1190 ; 1191 ; ZNVER1-SSE-LABEL: test_cvtdq2pd: 1192 ; ZNVER1-SSE: # %bb.0: 1193 ; ZNVER1-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] 1194 ; ZNVER1-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00] 1195 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1196 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1197 ; 1198 ; ZNVER1-LABEL: test_cvtdq2pd: 1199 ; ZNVER1: # %bb.0: 1200 ; ZNVER1-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00] 1201 ; ZNVER1-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] 1202 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1203 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1204 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 1205 %2 = sitofp <2 x i32> %1 to <2 x double> 1206 %3 = load <4 x i32>, <4 x i32>*%a1, align 16 1207 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 1208 %5 = sitofp <2 x i32> %4 to <2 x double> 1209 %6 = fadd <2 x double> %2, %5 1210 ret <2 x double> %6 1211 } 1212 1213 define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { 1214 ; GENERIC-LABEL: test_cvtdq2ps: 1215 ; GENERIC: # %bb.0: 1216 ; GENERIC-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] 1217 ; GENERIC-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] 1218 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1219 ; GENERIC-NEXT: retq # sched: [1:1.00] 1220 ; 1221 ; ATOM-LABEL: test_cvtdq2ps: 1222 ; ATOM: # %bb.0: 1223 ; ATOM-NEXT: cvtdq2ps (%rdi), %xmm1 # sched: [7:3.50] 1224 ; ATOM-NEXT: cvtdq2ps %xmm0, %xmm0 # sched: [6:3.00] 1225 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] 1226 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 1227 ; ATOM-NEXT: retq # sched: [79:39.50] 1228 ; 1229 ; SLM-LABEL: test_cvtdq2ps: 1230 ; SLM: # %bb.0: 1231 ; SLM-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] 1232 ; SLM-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [7:1.00] 1233 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1234 ; SLM-NEXT: retq # sched: [4:1.00] 1235 ; 1236 ; SANDY-SSE-LABEL: test_cvtdq2ps: 1237 ; SANDY-SSE: # %bb.0: 1238 ; SANDY-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] 1239 ; SANDY-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] 1240 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1241 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1242 ; 1243 ; SANDY-LABEL: test_cvtdq2ps: 1244 ; SANDY: # %bb.0: 1245 ; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] 1246 ; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] 1247 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1248 ; SANDY-NEXT: retq # sched: [1:1.00] 1249 ; 1250 ; HASWELL-SSE-LABEL: test_cvtdq2ps: 1251 ; HASWELL-SSE: # %bb.0: 1252 ; HASWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] 1253 ; HASWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] 1254 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1255 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1256 ; 1257 ; HASWELL-LABEL: test_cvtdq2ps: 1258 ; HASWELL: # %bb.0: 1259 ; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] 1260 ; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] 1261 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1262 ; HASWELL-NEXT: retq # sched: [7:1.00] 1263 ; 1264 ; BROADWELL-SSE-LABEL: test_cvtdq2ps: 1265 ; BROADWELL-SSE: # %bb.0: 1266 ; BROADWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] 1267 ; BROADWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] 1268 ; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1269 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1270 ; 1271 ; BROADWELL-LABEL: test_cvtdq2ps: 1272 ; BROADWELL: # %bb.0: 1273 ; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] 1274 ; BROADWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] 1275 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1276 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1277 ; 1278 ; SKYLAKE-SSE-LABEL: test_cvtdq2ps: 1279 ; SKYLAKE-SSE: # %bb.0: 1280 ; SKYLAKE-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] 1281 ; SKYLAKE-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] 1282 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1283 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1284 ; 1285 ; SKYLAKE-LABEL: test_cvtdq2ps: 1286 ; SKYLAKE: # %bb.0: 1287 ; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] 1288 ; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] 1289 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1290 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1291 ; 1292 ; SKX-SSE-LABEL: test_cvtdq2ps: 1293 ; SKX-SSE: # %bb.0: 1294 ; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] 1295 ; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] 1296 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1297 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1298 ; 1299 ; SKX-LABEL: test_cvtdq2ps: 1300 ; SKX: # %bb.0: 1301 ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] 1302 ; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] 1303 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1304 ; SKX-NEXT: retq # sched: [7:1.00] 1305 ; 1306 ; BTVER2-SSE-LABEL: test_cvtdq2ps: 1307 ; BTVER2-SSE: # %bb.0: 1308 ; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] 1309 ; BTVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] 1310 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1311 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1312 ; 1313 ; BTVER2-LABEL: test_cvtdq2ps: 1314 ; BTVER2: # %bb.0: 1315 ; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] 1316 ; BTVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] 1317 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1318 ; BTVER2-NEXT: retq # sched: [4:1.00] 1319 ; 1320 ; ZNVER1-SSE-LABEL: test_cvtdq2ps: 1321 ; ZNVER1-SSE: # %bb.0: 1322 ; ZNVER1-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00] 1323 ; ZNVER1-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00] 1324 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1325 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1326 ; 1327 ; ZNVER1-LABEL: test_cvtdq2ps: 1328 ; ZNVER1: # %bb.0: 1329 ; ZNVER1-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00] 1330 ; ZNVER1-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [5:1.00] 1331 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1332 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1333 %1 = sitofp <4 x i32> %a0 to <4 x float> 1334 %2 = load <4 x i32>, <4 x i32>*%a1, align 16 1335 %3 = sitofp <4 x i32> %2 to <4 x float> 1336 %4 = fadd <4 x float> %1, %3 1337 ret <4 x float> %4 1338 } 1339 1340 define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { 1341 ; GENERIC-LABEL: test_cvtpd2dq: 1342 ; GENERIC: # %bb.0: 1343 ; GENERIC-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] 1344 ; GENERIC-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] 1345 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1346 ; GENERIC-NEXT: retq # sched: [1:1.00] 1347 ; 1348 ; ATOM-LABEL: test_cvtpd2dq: 1349 ; ATOM: # %bb.0: 1350 ; ATOM-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [8:4.00] 1351 ; ATOM-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [7:3.50] 1352 ; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 1353 ; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 1354 ; ATOM-NEXT: retq # sched: [79:39.50] 1355 ; 1356 ; SLM-LABEL: test_cvtpd2dq: 1357 ; SLM: # %bb.0: 1358 ; SLM-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:0.50] 1359 ; SLM-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [7:1.00] 1360 ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1361 ; SLM-NEXT: retq # sched: [4:1.00] 1362 ; 1363 ; SANDY-SSE-LABEL: test_cvtpd2dq: 1364 ; SANDY-SSE: # %bb.0: 1365 ; SANDY-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] 1366 ; SANDY-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] 1367 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1368 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1369 ; 1370 ; SANDY-LABEL: test_cvtpd2dq: 1371 ; SANDY: # %bb.0: 1372 ; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] 1373 ; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00] 1374 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1375 ; SANDY-NEXT: retq # sched: [1:1.00] 1376 ; 1377 ; HASWELL-SSE-LABEL: test_cvtpd2dq: 1378 ; HASWELL-SSE: # %bb.0: 1379 ; HASWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] 1380 ; HASWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] 1381 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1382 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1383 ; 1384 ; HASWELL-LABEL: test_cvtpd2dq: 1385 ; HASWELL: # %bb.0: 1386 ; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] 1387 ; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] 1388 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1389 ; HASWELL-NEXT: retq # sched: [7:1.00] 1390 ; 1391 ; BROADWELL-SSE-LABEL: test_cvtpd2dq: 1392 ; BROADWELL-SSE: # %bb.0: 1393 ; BROADWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00] 1394 ; BROADWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] 1395 ; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 1396 ; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 1397 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1398 ; 1399 ; BROADWELL-LABEL: test_cvtpd2dq: 1400 ; BROADWELL: # %bb.0: 1401 ; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] 1402 ; BROADWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] 1403 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1404 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1405 ; 1406 ; SKYLAKE-SSE-LABEL: test_cvtpd2dq: 1407 ; SKYLAKE-SSE: # %bb.0: 1408 ; SKYLAKE-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] 1409 ; SKYLAKE-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] 1410 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 1411 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1412 ; 1413 ; SKYLAKE-LABEL: test_cvtpd2dq: 1414 ; SKYLAKE: # %bb.0: 1415 ; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] 1416 ; SKYLAKE-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] 1417 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1418 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1419 ; 1420 ; SKX-SSE-LABEL: test_cvtpd2dq: 1421 ; SKX-SSE: # %bb.0: 1422 ; SKX-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] 1423 ; SKX-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] 1424 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 1425 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1426 ; 1427 ; SKX-LABEL: test_cvtpd2dq: 1428 ; SKX: # %bb.0: 1429 ; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] 1430 ; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50] 1431 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1432 ; SKX-NEXT: retq # sched: [7:1.00] 1433 ; 1434 ; BTVER2-SSE-LABEL: test_cvtpd2dq: 1435 ; BTVER2-SSE: # %bb.0: 1436 ; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00] 1437 ; BTVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00] 1438 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1439 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1440 ; 1441 ; BTVER2-LABEL: test_cvtpd2dq: 1442 ; BTVER2: # %bb.0: 1443 ; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] 1444 ; BTVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00] 1445 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1446 ; BTVER2-NEXT: retq # sched: [4:1.00] 1447 ; 1448 ; ZNVER1-SSE-LABEL: test_cvtpd2dq: 1449 ; ZNVER1-SSE: # %bb.0: 1450 ; ZNVER1-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] 1451 ; ZNVER1-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00] 1452 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 1453 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1454 ; 1455 ; ZNVER1-LABEL: test_cvtpd2dq: 1456 ; ZNVER1: # %bb.0: 1457 ; ZNVER1-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00] 1458 ; ZNVER1-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] 1459 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1460 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1461 %1 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 1462 %2 = load <2 x double>, <2 x double> *%a1, align 16 1463 %3 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %2) 1464 %4 = add <4 x i32> %1, %3 1465 ret <4 x i32> %4 1466 } 1467 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 1468 1469 define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { 1470 ; GENERIC-LABEL: test_cvtpd2ps: 1471 ; GENERIC: # %bb.0: 1472 ; GENERIC-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] 1473 ; GENERIC-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] 1474 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1475 ; GENERIC-NEXT: retq # sched: [1:1.00] 1476 ; 1477 ; ATOM-LABEL: test_cvtpd2ps: 1478 ; ATOM: # %bb.0: 1479 ; ATOM-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [8:4.00] 1480 ; ATOM-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [7:3.50] 1481 ; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] 1482 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 1483 ; ATOM-NEXT: retq # sched: [79:39.50] 1484 ; 1485 ; SLM-LABEL: test_cvtpd2ps: 1486 ; SLM: # %bb.0: 1487 ; SLM-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:0.50] 1488 ; SLM-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [7:1.00] 1489 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1490 ; SLM-NEXT: retq # sched: [4:1.00] 1491 ; 1492 ; SANDY-SSE-LABEL: test_cvtpd2ps: 1493 ; SANDY-SSE: # %bb.0: 1494 ; SANDY-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] 1495 ; SANDY-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] 1496 ; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1497 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1498 ; 1499 ; SANDY-LABEL: test_cvtpd2ps: 1500 ; SANDY: # %bb.0: 1501 ; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] 1502 ; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00] 1503 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1504 ; SANDY-NEXT: retq # sched: [1:1.00] 1505 ; 1506 ; HASWELL-SSE-LABEL: test_cvtpd2ps: 1507 ; HASWELL-SSE: # %bb.0: 1508 ; HASWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] 1509 ; HASWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] 1510 ; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1511 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1512 ; 1513 ; HASWELL-LABEL: test_cvtpd2ps: 1514 ; HASWELL: # %bb.0: 1515 ; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] 1516 ; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] 1517 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1518 ; HASWELL-NEXT: retq # sched: [7:1.00] 1519 ; 1520 ; BROADWELL-SSE-LABEL: test_cvtpd2ps: 1521 ; BROADWELL-SSE: # %bb.0: 1522 ; BROADWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00] 1523 ; BROADWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] 1524 ; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] 1525 ; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] 1526 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1527 ; 1528 ; BROADWELL-LABEL: test_cvtpd2ps: 1529 ; BROADWELL: # %bb.0: 1530 ; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] 1531 ; BROADWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] 1532 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1533 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1534 ; 1535 ; SKYLAKE-SSE-LABEL: test_cvtpd2ps: 1536 ; SKYLAKE-SSE: # %bb.0: 1537 ; SKYLAKE-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] 1538 ; SKYLAKE-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] 1539 ; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1540 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1541 ; 1542 ; SKYLAKE-LABEL: test_cvtpd2ps: 1543 ; SKYLAKE: # %bb.0: 1544 ; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] 1545 ; SKYLAKE-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] 1546 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1547 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1548 ; 1549 ; SKX-SSE-LABEL: test_cvtpd2ps: 1550 ; SKX-SSE: # %bb.0: 1551 ; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] 1552 ; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] 1553 ; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] 1554 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1555 ; 1556 ; SKX-LABEL: test_cvtpd2ps: 1557 ; SKX: # %bb.0: 1558 ; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] 1559 ; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] 1560 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1561 ; SKX-NEXT: retq # sched: [7:1.00] 1562 ; 1563 ; BTVER2-SSE-LABEL: test_cvtpd2ps: 1564 ; BTVER2-SSE: # %bb.0: 1565 ; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00] 1566 ; BTVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00] 1567 ; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1568 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1569 ; 1570 ; BTVER2-LABEL: test_cvtpd2ps: 1571 ; BTVER2: # %bb.0: 1572 ; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] 1573 ; BTVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00] 1574 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1575 ; BTVER2-NEXT: retq # sched: [4:1.00] 1576 ; 1577 ; ZNVER1-SSE-LABEL: test_cvtpd2ps: 1578 ; ZNVER1-SSE: # %bb.0: 1579 ; ZNVER1-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] 1580 ; ZNVER1-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] 1581 ; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 1582 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1583 ; 1584 ; ZNVER1-LABEL: test_cvtpd2ps: 1585 ; ZNVER1: # %bb.0: 1586 ; ZNVER1-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00] 1587 ; ZNVER1-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] 1588 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1589 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1590 %1 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) 1591 %2 = load <2 x double>, <2 x double> *%a1, align 16 1592 %3 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %2) 1593 %4 = fadd <4 x float> %1, %3 1594 ret <4 x float> %4 1595 } 1596 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 1597 1598 define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { 1599 ; GENERIC-LABEL: test_cvtps2dq: 1600 ; GENERIC: # %bb.0: 1601 ; GENERIC-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] 1602 ; GENERIC-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] 1603 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1604 ; GENERIC-NEXT: retq # sched: [1:1.00] 1605 ; 1606 ; ATOM-LABEL: test_cvtps2dq: 1607 ; ATOM: # %bb.0: 1608 ; ATOM-NEXT: cvtps2dq (%rdi), %xmm1 # sched: [7:3.50] 1609 ; ATOM-NEXT: cvtps2dq %xmm0, %xmm0 # sched: [6:3.00] 1610 ; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 1611 ; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 1612 ; ATOM-NEXT: retq # sched: [79:39.50] 1613 ; 1614 ; SLM-LABEL: test_cvtps2dq: 1615 ; SLM: # %bb.0: 1616 ; SLM-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] 1617 ; SLM-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [7:1.00] 1618 ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1619 ; SLM-NEXT: retq # sched: [4:1.00] 1620 ; 1621 ; SANDY-SSE-LABEL: test_cvtps2dq: 1622 ; SANDY-SSE: # %bb.0: 1623 ; SANDY-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] 1624 ; SANDY-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] 1625 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1626 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1627 ; 1628 ; SANDY-LABEL: test_cvtps2dq: 1629 ; SANDY: # %bb.0: 1630 ; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] 1631 ; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] 1632 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1633 ; SANDY-NEXT: retq # sched: [1:1.00] 1634 ; 1635 ; HASWELL-SSE-LABEL: test_cvtps2dq: 1636 ; HASWELL-SSE: # %bb.0: 1637 ; HASWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] 1638 ; HASWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] 1639 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1640 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1641 ; 1642 ; HASWELL-LABEL: test_cvtps2dq: 1643 ; HASWELL: # %bb.0: 1644 ; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] 1645 ; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] 1646 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1647 ; HASWELL-NEXT: retq # sched: [7:1.00] 1648 ; 1649 ; BROADWELL-SSE-LABEL: test_cvtps2dq: 1650 ; BROADWELL-SSE: # %bb.0: 1651 ; BROADWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] 1652 ; BROADWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] 1653 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1654 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1655 ; 1656 ; BROADWELL-LABEL: test_cvtps2dq: 1657 ; BROADWELL: # %bb.0: 1658 ; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] 1659 ; BROADWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] 1660 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1661 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1662 ; 1663 ; SKYLAKE-SSE-LABEL: test_cvtps2dq: 1664 ; SKYLAKE-SSE: # %bb.0: 1665 ; SKYLAKE-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] 1666 ; SKYLAKE-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] 1667 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 1668 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1669 ; 1670 ; SKYLAKE-LABEL: test_cvtps2dq: 1671 ; SKYLAKE: # %bb.0: 1672 ; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] 1673 ; SKYLAKE-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] 1674 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1675 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1676 ; 1677 ; SKX-SSE-LABEL: test_cvtps2dq: 1678 ; SKX-SSE: # %bb.0: 1679 ; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] 1680 ; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] 1681 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 1682 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1683 ; 1684 ; SKX-LABEL: test_cvtps2dq: 1685 ; SKX: # %bb.0: 1686 ; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] 1687 ; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] 1688 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1689 ; SKX-NEXT: retq # sched: [7:1.00] 1690 ; 1691 ; BTVER2-SSE-LABEL: test_cvtps2dq: 1692 ; BTVER2-SSE: # %bb.0: 1693 ; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] 1694 ; BTVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] 1695 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 1696 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1697 ; 1698 ; BTVER2-LABEL: test_cvtps2dq: 1699 ; BTVER2: # %bb.0: 1700 ; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] 1701 ; BTVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] 1702 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1703 ; BTVER2-NEXT: retq # sched: [4:1.00] 1704 ; 1705 ; ZNVER1-SSE-LABEL: test_cvtps2dq: 1706 ; ZNVER1-SSE: # %bb.0: 1707 ; ZNVER1-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [5:1.00] 1708 ; ZNVER1-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [12:1.00] 1709 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 1710 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1711 ; 1712 ; ZNVER1-LABEL: test_cvtps2dq: 1713 ; ZNVER1: # %bb.0: 1714 ; ZNVER1-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00] 1715 ; ZNVER1-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [5:1.00] 1716 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1717 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1718 %1 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) 1719 %2 = load <4 x float>, <4 x float> *%a1, align 16 1720 %3 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %2) 1721 %4 = add <4 x i32> %1, %3 1722 ret <4 x i32> %4 1723 } 1724 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 1725 1726 define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { 1727 ; GENERIC-LABEL: test_cvtps2pd: 1728 ; GENERIC: # %bb.0: 1729 ; GENERIC-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] 1730 ; GENERIC-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] 1731 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1732 ; GENERIC-NEXT: retq # sched: [1:1.00] 1733 ; 1734 ; ATOM-LABEL: test_cvtps2pd: 1735 ; ATOM: # %bb.0: 1736 ; ATOM-NEXT: cvtps2pd (%rdi), %xmm1 # sched: [8:4.00] 1737 ; ATOM-NEXT: cvtps2pd %xmm0, %xmm0 # sched: [7:3.50] 1738 ; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] 1739 ; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] 1740 ; ATOM-NEXT: retq # sched: [79:39.50] 1741 ; 1742 ; SLM-LABEL: test_cvtps2pd: 1743 ; SLM: # %bb.0: 1744 ; SLM-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [4:0.50] 1745 ; SLM-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] 1746 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1747 ; SLM-NEXT: retq # sched: [4:1.00] 1748 ; 1749 ; SANDY-SSE-LABEL: test_cvtps2pd: 1750 ; SANDY-SSE: # %bb.0: 1751 ; SANDY-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] 1752 ; SANDY-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] 1753 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1754 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1755 ; 1756 ; SANDY-LABEL: test_cvtps2pd: 1757 ; SANDY: # %bb.0: 1758 ; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] 1759 ; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] 1760 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1761 ; SANDY-NEXT: retq # sched: [1:1.00] 1762 ; 1763 ; HASWELL-SSE-LABEL: test_cvtps2pd: 1764 ; HASWELL-SSE: # %bb.0: 1765 ; HASWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] 1766 ; HASWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] 1767 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1768 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1769 ; 1770 ; HASWELL-LABEL: test_cvtps2pd: 1771 ; HASWELL: # %bb.0: 1772 ; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] 1773 ; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00] 1774 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1775 ; HASWELL-NEXT: retq # sched: [7:1.00] 1776 ; 1777 ; BROADWELL-SSE-LABEL: test_cvtps2pd: 1778 ; BROADWELL-SSE: # %bb.0: 1779 ; BROADWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] 1780 ; BROADWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] 1781 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1782 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1783 ; 1784 ; BROADWELL-LABEL: test_cvtps2pd: 1785 ; BROADWELL: # %bb.0: 1786 ; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] 1787 ; BROADWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00] 1788 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1789 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1790 ; 1791 ; SKYLAKE-SSE-LABEL: test_cvtps2pd: 1792 ; SKYLAKE-SSE: # %bb.0: 1793 ; SKYLAKE-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] 1794 ; SKYLAKE-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] 1795 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 1796 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1797 ; 1798 ; SKYLAKE-LABEL: test_cvtps2pd: 1799 ; SKYLAKE: # %bb.0: 1800 ; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] 1801 ; SKYLAKE-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] 1802 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1803 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1804 ; 1805 ; SKX-SSE-LABEL: test_cvtps2pd: 1806 ; SKX-SSE: # %bb.0: 1807 ; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] 1808 ; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] 1809 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 1810 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1811 ; 1812 ; SKX-LABEL: test_cvtps2pd: 1813 ; SKX: # %bb.0: 1814 ; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] 1815 ; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] 1816 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1817 ; SKX-NEXT: retq # sched: [7:1.00] 1818 ; 1819 ; BTVER2-SSE-LABEL: test_cvtps2pd: 1820 ; BTVER2-SSE: # %bb.0: 1821 ; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] 1822 ; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] 1823 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1824 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1825 ; 1826 ; BTVER2-LABEL: test_cvtps2pd: 1827 ; BTVER2: # %bb.0: 1828 ; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] 1829 ; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] 1830 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1831 ; BTVER2-NEXT: retq # sched: [4:1.00] 1832 ; 1833 ; ZNVER1-SSE-LABEL: test_cvtps2pd: 1834 ; ZNVER1-SSE: # %bb.0: 1835 ; ZNVER1-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00] 1836 ; ZNVER1-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [10:1.00] 1837 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 1838 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1839 ; 1840 ; ZNVER1-LABEL: test_cvtps2pd: 1841 ; ZNVER1: # %bb.0: 1842 ; ZNVER1-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00] 1843 ; ZNVER1-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00] 1844 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 1845 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1846 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1847 %2 = fpext <2 x float> %1 to <2 x double> 1848 %3 = load <4 x float>, <4 x float> *%a1, align 16 1849 %4 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1850 %5 = fpext <2 x float> %4 to <2 x double> 1851 %6 = fadd <2 x double> %2, %5 1852 ret <2 x double> %6 1853 } 1854 1855 define i32 @test_cvtsd2si(double %a0, double *%a1) { 1856 ; GENERIC-LABEL: test_cvtsd2si: 1857 ; GENERIC: # %bb.0: 1858 ; GENERIC-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] 1859 ; GENERIC-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] 1860 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 1861 ; GENERIC-NEXT: retq # sched: [1:1.00] 1862 ; 1863 ; ATOM-LABEL: test_cvtsd2si: 1864 ; ATOM: # %bb.0: 1865 ; ATOM-NEXT: cvtsd2si (%rdi), %eax # sched: [9:4.50] 1866 ; ATOM-NEXT: cvtsd2si %xmm0, %ecx # sched: [8:4.00] 1867 ; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] 1868 ; ATOM-NEXT: retq # sched: [79:39.50] 1869 ; 1870 ; SLM-LABEL: test_cvtsd2si: 1871 ; SLM: # %bb.0: 1872 ; SLM-NEXT: cvtsd2si (%rdi), %eax # sched: [7:1.00] 1873 ; SLM-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:0.50] 1874 ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] 1875 ; SLM-NEXT: retq # sched: [4:1.00] 1876 ; 1877 ; SANDY-SSE-LABEL: test_cvtsd2si: 1878 ; SANDY-SSE: # %bb.0: 1879 ; SANDY-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] 1880 ; SANDY-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] 1881 ; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] 1882 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1883 ; 1884 ; SANDY-LABEL: test_cvtsd2si: 1885 ; SANDY: # %bb.0: 1886 ; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00] 1887 ; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [10:1.00] 1888 ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] 1889 ; SANDY-NEXT: retq # sched: [1:1.00] 1890 ; 1891 ; HASWELL-SSE-LABEL: test_cvtsd2si: 1892 ; HASWELL-SSE: # %bb.0: 1893 ; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] 1894 ; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] 1895 ; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1896 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1897 ; 1898 ; HASWELL-LABEL: test_cvtsd2si: 1899 ; HASWELL: # %bb.0: 1900 ; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] 1901 ; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] 1902 ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] 1903 ; HASWELL-NEXT: retq # sched: [7:1.00] 1904 ; 1905 ; BROADWELL-SSE-LABEL: test_cvtsd2si: 1906 ; BROADWELL-SSE: # %bb.0: 1907 ; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] 1908 ; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] 1909 ; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1910 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1911 ; 1912 ; BROADWELL-LABEL: test_cvtsd2si: 1913 ; BROADWELL: # %bb.0: 1914 ; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] 1915 ; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] 1916 ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] 1917 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1918 ; 1919 ; SKYLAKE-SSE-LABEL: test_cvtsd2si: 1920 ; SKYLAKE-SSE: # %bb.0: 1921 ; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] 1922 ; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] 1923 ; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1924 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1925 ; 1926 ; SKYLAKE-LABEL: test_cvtsd2si: 1927 ; SKYLAKE: # %bb.0: 1928 ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] 1929 ; SKYLAKE-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00] 1930 ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1931 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1932 ; 1933 ; SKX-SSE-LABEL: test_cvtsd2si: 1934 ; SKX-SSE: # %bb.0: 1935 ; SKX-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] 1936 ; SKX-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] 1937 ; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1938 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1939 ; 1940 ; SKX-LABEL: test_cvtsd2si: 1941 ; SKX: # %bb.0: 1942 ; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] 1943 ; SKX-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00] 1944 ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] 1945 ; SKX-NEXT: retq # sched: [7:1.00] 1946 ; 1947 ; BTVER2-SSE-LABEL: test_cvtsd2si: 1948 ; BTVER2-SSE: # %bb.0: 1949 ; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00] 1950 ; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [7:1.00] 1951 ; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] 1952 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1953 ; 1954 ; BTVER2-LABEL: test_cvtsd2si: 1955 ; BTVER2: # %bb.0: 1956 ; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00] 1957 ; BTVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [7:1.00] 1958 ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] 1959 ; BTVER2-NEXT: retq # sched: [4:1.00] 1960 ; 1961 ; ZNVER1-SSE-LABEL: test_cvtsd2si: 1962 ; ZNVER1-SSE: # %bb.0: 1963 ; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00] 1964 ; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] 1965 ; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 1966 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1967 ; 1968 ; ZNVER1-LABEL: test_cvtsd2si: 1969 ; ZNVER1: # %bb.0: 1970 ; ZNVER1-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00] 1971 ; ZNVER1-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00] 1972 ; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] 1973 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1974 %1 = insertelement <2 x double> undef, double %a0, i32 0 1975 %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %1) 1976 %3 = load double, double *%a1, align 8 1977 %4 = insertelement <2 x double> undef, double %3, i32 0 1978 %5 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %4) 1979 %6 = add i32 %2, %5 1980 ret i32 %6 1981 } 1982 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 1983 1984 define i64 @test_cvtsd2siq(double %a0, double *%a1) { 1985 ; GENERIC-LABEL: test_cvtsd2siq: 1986 ; GENERIC: # %bb.0: 1987 ; GENERIC-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] 1988 ; GENERIC-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] 1989 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 1990 ; GENERIC-NEXT: retq # sched: [1:1.00] 1991 ; 1992 ; ATOM-LABEL: test_cvtsd2siq: 1993 ; ATOM: # %bb.0: 1994 ; ATOM-NEXT: cvtsd2si (%rdi), %rax # sched: [9:4.50] 1995 ; ATOM-NEXT: cvtsd2si %xmm0, %rcx # sched: [8:4.00] 1996 ; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] 1997 ; ATOM-NEXT: retq # sched: [79:39.50] 1998 ; 1999 ; SLM-LABEL: test_cvtsd2siq: 2000 ; SLM: # %bb.0: 2001 ; SLM-NEXT: cvtsd2si (%rdi), %rax # sched: [7:1.00] 2002 ; SLM-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:0.50] 2003 ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] 2004 ; SLM-NEXT: retq # sched: [4:1.00] 2005 ; 2006 ; SANDY-SSE-LABEL: test_cvtsd2siq: 2007 ; SANDY-SSE: # %bb.0: 2008 ; SANDY-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] 2009 ; SANDY-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] 2010 ; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] 2011 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2012 ; 2013 ; SANDY-LABEL: test_cvtsd2siq: 2014 ; SANDY: # %bb.0: 2015 ; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] 2016 ; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00] 2017 ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] 2018 ; SANDY-NEXT: retq # sched: [1:1.00] 2019 ; 2020 ; HASWELL-SSE-LABEL: test_cvtsd2siq: 2021 ; HASWELL-SSE: # %bb.0: 2022 ; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] 2023 ; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] 2024 ; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 2025 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2026 ; 2027 ; HASWELL-LABEL: test_cvtsd2siq: 2028 ; HASWELL: # %bb.0: 2029 ; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] 2030 ; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] 2031 ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] 2032 ; HASWELL-NEXT: retq # sched: [7:1.00] 2033 ; 2034 ; BROADWELL-SSE-LABEL: test_cvtsd2siq: 2035 ; BROADWELL-SSE: # %bb.0: 2036 ; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] 2037 ; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] 2038 ; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 2039 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2040 ; 2041 ; BROADWELL-LABEL: test_cvtsd2siq: 2042 ; BROADWELL: # %bb.0: 2043 ; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] 2044 ; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] 2045 ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] 2046 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2047 ; 2048 ; SKYLAKE-SSE-LABEL: test_cvtsd2siq: 2049 ; SKYLAKE-SSE: # %bb.0: 2050 ; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] 2051 ; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] 2052 ; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 2053 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2054 ; 2055 ; SKYLAKE-LABEL: test_cvtsd2siq: 2056 ; SKYLAKE: # %bb.0: 2057 ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] 2058 ; SKYLAKE-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00] 2059 ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] 2060 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2061 ; 2062 ; SKX-SSE-LABEL: test_cvtsd2siq: 2063 ; SKX-SSE: # %bb.0: 2064 ; SKX-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] 2065 ; SKX-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] 2066 ; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 2067 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2068 ; 2069 ; SKX-LABEL: test_cvtsd2siq: 2070 ; SKX: # %bb.0: 2071 ; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] 2072 ; SKX-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00] 2073 ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] 2074 ; SKX-NEXT: retq # sched: [7:1.00] 2075 ; 2076 ; BTVER2-SSE-LABEL: test_cvtsd2siq: 2077 ; BTVER2-SSE: # %bb.0: 2078 ; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00] 2079 ; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [7:1.00] 2080 ; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] 2081 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2082 ; 2083 ; BTVER2-LABEL: test_cvtsd2siq: 2084 ; BTVER2: # %bb.0: 2085 ; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00] 2086 ; BTVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [7:1.00] 2087 ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] 2088 ; BTVER2-NEXT: retq # sched: [4:1.00] 2089 ; 2090 ; ZNVER1-SSE-LABEL: test_cvtsd2siq: 2091 ; ZNVER1-SSE: # %bb.0: 2092 ; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00] 2093 ; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] 2094 ; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 2095 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2096 ; 2097 ; ZNVER1-LABEL: test_cvtsd2siq: 2098 ; ZNVER1: # %bb.0: 2099 ; ZNVER1-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00] 2100 ; ZNVER1-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] 2101 ; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] 2102 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2103 %1 = insertelement <2 x double> undef, double %a0, i32 0 2104 %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %1) 2105 %3 = load double, double *%a1, align 8 2106 %4 = insertelement <2 x double> undef, double %3, i32 0 2107 %5 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %4) 2108 %6 = add i64 %2, %5 2109 ret i64 %6 2110 } 2111 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone 2112 2113 define float @test_cvtsd2ss(double %a0, double *%a1) { 2114 ; GENERIC-LABEL: test_cvtsd2ss: 2115 ; GENERIC: # %bb.0: 2116 ; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] 2117 ; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] 2118 ; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] 2119 ; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 2120 ; GENERIC-NEXT: retq # sched: [1:1.00] 2121 ; 2122 ; ATOM-LABEL: test_cvtsd2ss: 2123 ; ATOM: # %bb.0: 2124 ; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] 2125 ; ATOM-NEXT: cvtsd2ss %xmm0, %xmm2 # sched: [6:3.00] 2126 ; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] 2127 ; ATOM-NEXT: cvtsd2ss %xmm1, %xmm0 # sched: [6:3.00] 2128 ; ATOM-NEXT: addss %xmm2, %xmm0 # sched: [5:5.00] 2129 ; ATOM-NEXT: retq # sched: [79:39.50] 2130 ; 2131 ; SLM-LABEL: test_cvtsd2ss: 2132 ; SLM: # %bb.0: 2133 ; SLM-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:0.50] 2134 ; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00] 2135 ; SLM-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:0.50] 2136 ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 2137 ; SLM-NEXT: retq # sched: [4:1.00] 2138 ; 2139 ; SANDY-SSE-LABEL: test_cvtsd2ss: 2140 ; SANDY-SSE: # %bb.0: 2141 ; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] 2142 ; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] 2143 ; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] 2144 ; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 2145 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2146 ; 2147 ; SANDY-LABEL: test_cvtsd2ss: 2148 ; SANDY: # %bb.0: 2149 ; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 2150 ; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] 2151 ; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] 2152 ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2153 ; SANDY-NEXT: retq # sched: [1:1.00] 2154 ; 2155 ; HASWELL-SSE-LABEL: test_cvtsd2ss: 2156 ; HASWELL-SSE: # %bb.0: 2157 ; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] 2158 ; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 2159 ; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] 2160 ; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 2161 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2162 ; 2163 ; HASWELL-LABEL: test_cvtsd2ss: 2164 ; HASWELL: # %bb.0: 2165 ; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 2166 ; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 2167 ; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] 2168 ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2169 ; HASWELL-NEXT: retq # sched: [7:1.00] 2170 ; 2171 ; BROADWELL-SSE-LABEL: test_cvtsd2ss: 2172 ; BROADWELL-SSE: # %bb.0: 2173 ; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] 2174 ; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 2175 ; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] 2176 ; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 2177 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2178 ; 2179 ; BROADWELL-LABEL: test_cvtsd2ss: 2180 ; BROADWELL: # %bb.0: 2181 ; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 2182 ; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 2183 ; BROADWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] 2184 ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2185 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2186 ; 2187 ; SKYLAKE-SSE-LABEL: test_cvtsd2ss: 2188 ; SKYLAKE-SSE: # %bb.0: 2189 ; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] 2190 ; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 2191 ; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] 2192 ; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] 2193 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2194 ; 2195 ; SKYLAKE-LABEL: test_cvtsd2ss: 2196 ; SKYLAKE: # %bb.0: 2197 ; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 2198 ; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 2199 ; SKYLAKE-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 2200 ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2201 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2202 ; 2203 ; SKX-SSE-LABEL: test_cvtsd2ss: 2204 ; SKX-SSE: # %bb.0: 2205 ; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] 2206 ; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 2207 ; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] 2208 ; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] 2209 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2210 ; 2211 ; SKX-LABEL: test_cvtsd2ss: 2212 ; SKX: # %bb.0: 2213 ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 2214 ; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 2215 ; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 2216 ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2217 ; SKX-NEXT: retq # sched: [7:1.00] 2218 ; 2219 ; BTVER2-SSE-LABEL: test_cvtsd2ss: 2220 ; BTVER2-SSE: # %bb.0: 2221 ; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [7:2.00] 2222 ; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] 2223 ; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [7:2.00] 2224 ; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 2225 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2226 ; 2227 ; BTVER2-LABEL: test_cvtsd2ss: 2228 ; BTVER2: # %bb.0: 2229 ; BTVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [7:2.00] 2230 ; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] 2231 ; BTVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [7:2.00] 2232 ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2233 ; BTVER2-NEXT: retq # sched: [4:1.00] 2234 ; 2235 ; ZNVER1-SSE-LABEL: test_cvtsd2ss: 2236 ; ZNVER1-SSE: # %bb.0: 2237 ; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] 2238 ; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] 2239 ; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] 2240 ; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] 2241 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2242 ; 2243 ; ZNVER1-LABEL: test_cvtsd2ss: 2244 ; ZNVER1: # %bb.0: 2245 ; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] 2246 ; ZNVER1-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 2247 ; ZNVER1-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] 2248 ; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2249 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2250 %1 = fptrunc double %a0 to float 2251 %2 = load double, double *%a1, align 8 2252 %3 = fptrunc double %2 to float 2253 %4 = fadd float %1, %3 2254 ret float %4 2255 } 2256 2257 define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { 2258 ; GENERIC-LABEL: test_cvtsi2sd: 2259 ; GENERIC: # %bb.0: 2260 ; GENERIC-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] 2261 ; GENERIC-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] 2262 ; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2263 ; GENERIC-NEXT: retq # sched: [1:1.00] 2264 ; 2265 ; ATOM-LABEL: test_cvtsi2sd: 2266 ; ATOM: # %bb.0: 2267 ; ATOM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:3.50] 2268 ; ATOM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [6:3.00] 2269 ; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00] 2270 ; ATOM-NEXT: retq # sched: [79:39.50] 2271 ; 2272 ; SLM-LABEL: test_cvtsi2sd: 2273 ; SLM: # %bb.0: 2274 ; SLM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:1.00] 2275 ; SLM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:0.50] 2276 ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2277 ; SLM-NEXT: retq # sched: [4:1.00] 2278 ; 2279 ; SANDY-SSE-LABEL: test_cvtsi2sd: 2280 ; SANDY-SSE: # %bb.0: 2281 ; SANDY-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] 2282 ; SANDY-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] 2283 ; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2284 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2285 ; 2286 ; SANDY-LABEL: test_cvtsi2sd: 2287 ; SANDY: # %bb.0: 2288 ; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] 2289 ; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2290 ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2291 ; SANDY-NEXT: retq # sched: [1:1.00] 2292 ; 2293 ; HASWELL-SSE-LABEL: test_cvtsi2sd: 2294 ; HASWELL-SSE: # %bb.0: 2295 ; HASWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] 2296 ; HASWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] 2297 ; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2298 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2299 ; 2300 ; HASWELL-LABEL: test_cvtsi2sd: 2301 ; HASWELL: # %bb.0: 2302 ; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] 2303 ; HASWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2304 ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2305 ; HASWELL-NEXT: retq # sched: [7:1.00] 2306 ; 2307 ; BROADWELL-SSE-LABEL: test_cvtsi2sd: 2308 ; BROADWELL-SSE: # %bb.0: 2309 ; BROADWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] 2310 ; BROADWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] 2311 ; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2312 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2313 ; 2314 ; BROADWELL-LABEL: test_cvtsi2sd: 2315 ; BROADWELL: # %bb.0: 2316 ; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] 2317 ; BROADWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2318 ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2319 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2320 ; 2321 ; SKYLAKE-SSE-LABEL: test_cvtsi2sd: 2322 ; SKYLAKE-SSE: # %bb.0: 2323 ; SKYLAKE-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] 2324 ; SKYLAKE-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] 2325 ; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] 2326 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2327 ; 2328 ; SKYLAKE-LABEL: test_cvtsi2sd: 2329 ; SKYLAKE: # %bb.0: 2330 ; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] 2331 ; SKYLAKE-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2332 ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2333 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2334 ; 2335 ; SKX-SSE-LABEL: test_cvtsi2sd: 2336 ; SKX-SSE: # %bb.0: 2337 ; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] 2338 ; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] 2339 ; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] 2340 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2341 ; 2342 ; SKX-LABEL: test_cvtsi2sd: 2343 ; SKX: # %bb.0: 2344 ; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] 2345 ; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2346 ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2347 ; SKX-NEXT: retq # sched: [7:1.00] 2348 ; 2349 ; BTVER2-SSE-LABEL: test_cvtsi2sd: 2350 ; BTVER2-SSE: # %bb.0: 2351 ; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [14:1.00] 2352 ; BTVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [9:1.00] 2353 ; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2354 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2355 ; 2356 ; BTVER2-LABEL: test_cvtsi2sd: 2357 ; BTVER2: # %bb.0: 2358 ; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [9:1.00] 2359 ; BTVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [14:1.00] 2360 ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2361 ; BTVER2-NEXT: retq # sched: [4:1.00] 2362 ; 2363 ; ZNVER1-SSE-LABEL: test_cvtsi2sd: 2364 ; ZNVER1-SSE: # %bb.0: 2365 ; ZNVER1-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00] 2366 ; ZNVER1-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] 2367 ; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2368 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2369 ; 2370 ; ZNVER1-LABEL: test_cvtsi2sd: 2371 ; ZNVER1: # %bb.0: 2372 ; ZNVER1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] 2373 ; ZNVER1-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [12:1.00] 2374 ; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2375 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2376 %1 = sitofp i32 %a0 to double 2377 %2 = load i32, i32 *%a1, align 8 2378 %3 = sitofp i32 %2 to double 2379 %4 = fadd double %1, %3 2380 ret double %4 2381 } 2382 2383 define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { 2384 ; GENERIC-LABEL: test_cvtsi2sdq: 2385 ; GENERIC: # %bb.0: 2386 ; GENERIC-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] 2387 ; GENERIC-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] 2388 ; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2389 ; GENERIC-NEXT: retq # sched: [1:1.00] 2390 ; 2391 ; ATOM-LABEL: test_cvtsi2sdq: 2392 ; ATOM: # %bb.0: 2393 ; ATOM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:3.50] 2394 ; ATOM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [6:3.00] 2395 ; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00] 2396 ; ATOM-NEXT: retq # sched: [79:39.50] 2397 ; 2398 ; SLM-LABEL: test_cvtsi2sdq: 2399 ; SLM: # %bb.0: 2400 ; SLM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:1.00] 2401 ; SLM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:0.50] 2402 ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2403 ; SLM-NEXT: retq # sched: [4:1.00] 2404 ; 2405 ; SANDY-SSE-LABEL: test_cvtsi2sdq: 2406 ; SANDY-SSE: # %bb.0: 2407 ; SANDY-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] 2408 ; SANDY-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] 2409 ; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2410 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2411 ; 2412 ; SANDY-LABEL: test_cvtsi2sdq: 2413 ; SANDY: # %bb.0: 2414 ; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] 2415 ; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2416 ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2417 ; SANDY-NEXT: retq # sched: [1:1.00] 2418 ; 2419 ; HASWELL-SSE-LABEL: test_cvtsi2sdq: 2420 ; HASWELL-SSE: # %bb.0: 2421 ; HASWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] 2422 ; HASWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] 2423 ; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2424 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2425 ; 2426 ; HASWELL-LABEL: test_cvtsi2sdq: 2427 ; HASWELL: # %bb.0: 2428 ; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] 2429 ; HASWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2430 ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2431 ; HASWELL-NEXT: retq # sched: [7:1.00] 2432 ; 2433 ; BROADWELL-SSE-LABEL: test_cvtsi2sdq: 2434 ; BROADWELL-SSE: # %bb.0: 2435 ; BROADWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] 2436 ; BROADWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] 2437 ; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2438 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2439 ; 2440 ; BROADWELL-LABEL: test_cvtsi2sdq: 2441 ; BROADWELL: # %bb.0: 2442 ; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] 2443 ; BROADWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2444 ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2445 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2446 ; 2447 ; SKYLAKE-SSE-LABEL: test_cvtsi2sdq: 2448 ; SKYLAKE-SSE: # %bb.0: 2449 ; SKYLAKE-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] 2450 ; SKYLAKE-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] 2451 ; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] 2452 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2453 ; 2454 ; SKYLAKE-LABEL: test_cvtsi2sdq: 2455 ; SKYLAKE: # %bb.0: 2456 ; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] 2457 ; SKYLAKE-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2458 ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2459 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2460 ; 2461 ; SKX-SSE-LABEL: test_cvtsi2sdq: 2462 ; SKX-SSE: # %bb.0: 2463 ; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] 2464 ; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] 2465 ; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] 2466 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2467 ; 2468 ; SKX-LABEL: test_cvtsi2sdq: 2469 ; SKX: # %bb.0: 2470 ; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] 2471 ; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] 2472 ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2473 ; SKX-NEXT: retq # sched: [7:1.00] 2474 ; 2475 ; BTVER2-SSE-LABEL: test_cvtsi2sdq: 2476 ; BTVER2-SSE: # %bb.0: 2477 ; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [14:1.00] 2478 ; BTVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [9:1.00] 2479 ; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2480 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2481 ; 2482 ; BTVER2-LABEL: test_cvtsi2sdq: 2483 ; BTVER2: # %bb.0: 2484 ; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [9:1.00] 2485 ; BTVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [14:1.00] 2486 ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2487 ; BTVER2-NEXT: retq # sched: [4:1.00] 2488 ; 2489 ; ZNVER1-SSE-LABEL: test_cvtsi2sdq: 2490 ; ZNVER1-SSE: # %bb.0: 2491 ; ZNVER1-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00] 2492 ; ZNVER1-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] 2493 ; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2494 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2495 ; 2496 ; ZNVER1-LABEL: test_cvtsi2sdq: 2497 ; ZNVER1: # %bb.0: 2498 ; ZNVER1-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] 2499 ; ZNVER1-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [12:1.00] 2500 ; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2501 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2502 %1 = sitofp i64 %a0 to double 2503 %2 = load i64, i64 *%a1, align 8 2504 %3 = sitofp i64 %2 to double 2505 %4 = fadd double %1, %3 2506 ret double %4 2507 } 2508 2509 ; TODO - cvtss2sd_m 2510 2511 define double @test_cvtss2sd(float %a0, float *%a1) { 2512 ; GENERIC-LABEL: test_cvtss2sd: 2513 ; GENERIC: # %bb.0: 2514 ; GENERIC-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] 2515 ; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 2516 ; GENERIC-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] 2517 ; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2518 ; GENERIC-NEXT: retq # sched: [1:1.00] 2519 ; 2520 ; ATOM-LABEL: test_cvtss2sd: 2521 ; ATOM: # %bb.0: 2522 ; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] 2523 ; ATOM-NEXT: cvtss2sd %xmm0, %xmm2 # sched: [6:3.00] 2524 ; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] 2525 ; ATOM-NEXT: cvtss2sd %xmm1, %xmm0 # sched: [6:3.00] 2526 ; ATOM-NEXT: addsd %xmm2, %xmm0 # sched: [5:5.00] 2527 ; ATOM-NEXT: retq # sched: [79:39.50] 2528 ; 2529 ; SLM-LABEL: test_cvtss2sd: 2530 ; SLM: # %bb.0: 2531 ; SLM-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:0.50] 2532 ; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00] 2533 ; SLM-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:0.50] 2534 ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2535 ; SLM-NEXT: retq # sched: [4:1.00] 2536 ; 2537 ; SANDY-SSE-LABEL: test_cvtss2sd: 2538 ; SANDY-SSE: # %bb.0: 2539 ; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] 2540 ; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] 2541 ; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] 2542 ; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2543 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2544 ; 2545 ; SANDY-LABEL: test_cvtss2sd: 2546 ; SANDY: # %bb.0: 2547 ; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] 2548 ; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] 2549 ; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] 2550 ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2551 ; SANDY-NEXT: retq # sched: [1:1.00] 2552 ; 2553 ; HASWELL-SSE-LABEL: test_cvtss2sd: 2554 ; HASWELL-SSE: # %bb.0: 2555 ; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] 2556 ; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 2557 ; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] 2558 ; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2559 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2560 ; 2561 ; HASWELL-LABEL: test_cvtss2sd: 2562 ; HASWELL: # %bb.0: 2563 ; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] 2564 ; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 2565 ; HASWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] 2566 ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2567 ; HASWELL-NEXT: retq # sched: [7:1.00] 2568 ; 2569 ; BROADWELL-SSE-LABEL: test_cvtss2sd: 2570 ; BROADWELL-SSE: # %bb.0: 2571 ; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] 2572 ; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 2573 ; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] 2574 ; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2575 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2576 ; 2577 ; BROADWELL-LABEL: test_cvtss2sd: 2578 ; BROADWELL: # %bb.0: 2579 ; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] 2580 ; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 2581 ; BROADWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] 2582 ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2583 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2584 ; 2585 ; SKYLAKE-SSE-LABEL: test_cvtss2sd: 2586 ; SKYLAKE-SSE: # %bb.0: 2587 ; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] 2588 ; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 2589 ; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] 2590 ; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] 2591 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2592 ; 2593 ; SKYLAKE-LABEL: test_cvtss2sd: 2594 ; SKYLAKE: # %bb.0: 2595 ; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 2596 ; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 2597 ; SKYLAKE-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 2598 ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2599 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2600 ; 2601 ; SKX-SSE-LABEL: test_cvtss2sd: 2602 ; SKX-SSE: # %bb.0: 2603 ; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] 2604 ; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] 2605 ; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] 2606 ; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] 2607 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2608 ; 2609 ; SKX-LABEL: test_cvtss2sd: 2610 ; SKX: # %bb.0: 2611 ; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] 2612 ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] 2613 ; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] 2614 ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 2615 ; SKX-NEXT: retq # sched: [7:1.00] 2616 ; 2617 ; BTVER2-SSE-LABEL: test_cvtss2sd: 2618 ; BTVER2-SSE: # %bb.0: 2619 ; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [7:2.00] 2620 ; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] 2621 ; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [7:2.00] 2622 ; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2623 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2624 ; 2625 ; BTVER2-LABEL: test_cvtss2sd: 2626 ; BTVER2: # %bb.0: 2627 ; BTVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [7:2.00] 2628 ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] 2629 ; BTVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [7:2.00] 2630 ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2631 ; BTVER2-NEXT: retq # sched: [4:1.00] 2632 ; 2633 ; ZNVER1-SSE-LABEL: test_cvtss2sd: 2634 ; ZNVER1-SSE: # %bb.0: 2635 ; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00] 2636 ; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] 2637 ; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00] 2638 ; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] 2639 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2640 ; 2641 ; ZNVER1-LABEL: test_cvtss2sd: 2642 ; ZNVER1: # %bb.0: 2643 ; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] 2644 ; ZNVER1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00] 2645 ; ZNVER1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00] 2646 ; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 2647 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2648 %1 = fpext float %a0 to double 2649 %2 = load float, float *%a1, align 4 2650 %3 = fpext float %2 to double 2651 %4 = fadd double %1, %3 2652 ret double %4 2653 } 2654 2655 define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { 2656 ; GENERIC-LABEL: test_cvttpd2dq: 2657 ; GENERIC: # %bb.0: 2658 ; GENERIC-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] 2659 ; GENERIC-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] 2660 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2661 ; GENERIC-NEXT: retq # sched: [1:1.00] 2662 ; 2663 ; ATOM-LABEL: test_cvttpd2dq: 2664 ; ATOM: # %bb.0: 2665 ; ATOM-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [8:4.00] 2666 ; ATOM-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [7:3.50] 2667 ; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 2668 ; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 2669 ; ATOM-NEXT: retq # sched: [79:39.50] 2670 ; 2671 ; SLM-LABEL: test_cvttpd2dq: 2672 ; SLM: # %bb.0: 2673 ; SLM-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:0.50] 2674 ; SLM-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [7:1.00] 2675 ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2676 ; SLM-NEXT: retq # sched: [4:1.00] 2677 ; 2678 ; SANDY-SSE-LABEL: test_cvttpd2dq: 2679 ; SANDY-SSE: # %bb.0: 2680 ; SANDY-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] 2681 ; SANDY-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] 2682 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2683 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2684 ; 2685 ; SANDY-LABEL: test_cvttpd2dq: 2686 ; SANDY: # %bb.0: 2687 ; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] 2688 ; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00] 2689 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2690 ; SANDY-NEXT: retq # sched: [1:1.00] 2691 ; 2692 ; HASWELL-SSE-LABEL: test_cvttpd2dq: 2693 ; HASWELL-SSE: # %bb.0: 2694 ; HASWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] 2695 ; HASWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] 2696 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2697 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2698 ; 2699 ; HASWELL-LABEL: test_cvttpd2dq: 2700 ; HASWELL: # %bb.0: 2701 ; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] 2702 ; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] 2703 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2704 ; HASWELL-NEXT: retq # sched: [7:1.00] 2705 ; 2706 ; BROADWELL-SSE-LABEL: test_cvttpd2dq: 2707 ; BROADWELL-SSE: # %bb.0: 2708 ; BROADWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00] 2709 ; BROADWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] 2710 ; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 2711 ; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 2712 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2713 ; 2714 ; BROADWELL-LABEL: test_cvttpd2dq: 2715 ; BROADWELL: # %bb.0: 2716 ; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] 2717 ; BROADWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] 2718 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2719 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2720 ; 2721 ; SKYLAKE-SSE-LABEL: test_cvttpd2dq: 2722 ; SKYLAKE-SSE: # %bb.0: 2723 ; SKYLAKE-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] 2724 ; SKYLAKE-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] 2725 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 2726 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2727 ; 2728 ; SKYLAKE-LABEL: test_cvttpd2dq: 2729 ; SKYLAKE: # %bb.0: 2730 ; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] 2731 ; SKYLAKE-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] 2732 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2733 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2734 ; 2735 ; SKX-SSE-LABEL: test_cvttpd2dq: 2736 ; SKX-SSE: # %bb.0: 2737 ; SKX-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] 2738 ; SKX-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] 2739 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 2740 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2741 ; 2742 ; SKX-LABEL: test_cvttpd2dq: 2743 ; SKX: # %bb.0: 2744 ; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] 2745 ; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50] 2746 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2747 ; SKX-NEXT: retq # sched: [7:1.00] 2748 ; 2749 ; BTVER2-SSE-LABEL: test_cvttpd2dq: 2750 ; BTVER2-SSE: # %bb.0: 2751 ; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00] 2752 ; BTVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00] 2753 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2754 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2755 ; 2756 ; BTVER2-LABEL: test_cvttpd2dq: 2757 ; BTVER2: # %bb.0: 2758 ; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] 2759 ; BTVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00] 2760 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2761 ; BTVER2-NEXT: retq # sched: [4:1.00] 2762 ; 2763 ; ZNVER1-SSE-LABEL: test_cvttpd2dq: 2764 ; ZNVER1-SSE: # %bb.0: 2765 ; ZNVER1-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] 2766 ; ZNVER1-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00] 2767 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 2768 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2769 ; 2770 ; ZNVER1-LABEL: test_cvttpd2dq: 2771 ; ZNVER1: # %bb.0: 2772 ; ZNVER1-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00] 2773 ; ZNVER1-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] 2774 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2775 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2776 %1 = fptosi <2 x double> %a0 to <2 x i32> 2777 %2 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2778 %3 = load <2 x double>, <2 x double> *%a1, align 16 2779 %4 = fptosi <2 x double> %3 to <2 x i32> 2780 %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2781 %6 = add <4 x i32> %2, %5 2782 ret <4 x i32> %6 2783 } 2784 2785 define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { 2786 ; GENERIC-LABEL: test_cvttps2dq: 2787 ; GENERIC: # %bb.0: 2788 ; GENERIC-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] 2789 ; GENERIC-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] 2790 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2791 ; GENERIC-NEXT: retq # sched: [1:1.00] 2792 ; 2793 ; ATOM-LABEL: test_cvttps2dq: 2794 ; ATOM: # %bb.0: 2795 ; ATOM-NEXT: cvttps2dq (%rdi), %xmm1 # sched: [7:3.50] 2796 ; ATOM-NEXT: cvttps2dq %xmm0, %xmm0 # sched: [6:3.00] 2797 ; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 2798 ; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 2799 ; ATOM-NEXT: retq # sched: [79:39.50] 2800 ; 2801 ; SLM-LABEL: test_cvttps2dq: 2802 ; SLM: # %bb.0: 2803 ; SLM-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] 2804 ; SLM-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [7:1.00] 2805 ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2806 ; SLM-NEXT: retq # sched: [4:1.00] 2807 ; 2808 ; SANDY-SSE-LABEL: test_cvttps2dq: 2809 ; SANDY-SSE: # %bb.0: 2810 ; SANDY-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] 2811 ; SANDY-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] 2812 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2813 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2814 ; 2815 ; SANDY-LABEL: test_cvttps2dq: 2816 ; SANDY: # %bb.0: 2817 ; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] 2818 ; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] 2819 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2820 ; SANDY-NEXT: retq # sched: [1:1.00] 2821 ; 2822 ; HASWELL-SSE-LABEL: test_cvttps2dq: 2823 ; HASWELL-SSE: # %bb.0: 2824 ; HASWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] 2825 ; HASWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] 2826 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2827 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2828 ; 2829 ; HASWELL-LABEL: test_cvttps2dq: 2830 ; HASWELL: # %bb.0: 2831 ; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] 2832 ; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] 2833 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2834 ; HASWELL-NEXT: retq # sched: [7:1.00] 2835 ; 2836 ; BROADWELL-SSE-LABEL: test_cvttps2dq: 2837 ; BROADWELL-SSE: # %bb.0: 2838 ; BROADWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] 2839 ; BROADWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] 2840 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2841 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2842 ; 2843 ; BROADWELL-LABEL: test_cvttps2dq: 2844 ; BROADWELL: # %bb.0: 2845 ; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] 2846 ; BROADWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] 2847 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2848 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2849 ; 2850 ; SKYLAKE-SSE-LABEL: test_cvttps2dq: 2851 ; SKYLAKE-SSE: # %bb.0: 2852 ; SKYLAKE-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] 2853 ; SKYLAKE-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] 2854 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 2855 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2856 ; 2857 ; SKYLAKE-LABEL: test_cvttps2dq: 2858 ; SKYLAKE: # %bb.0: 2859 ; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] 2860 ; SKYLAKE-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] 2861 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2862 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2863 ; 2864 ; SKX-SSE-LABEL: test_cvttps2dq: 2865 ; SKX-SSE: # %bb.0: 2866 ; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] 2867 ; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] 2868 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 2869 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2870 ; 2871 ; SKX-LABEL: test_cvttps2dq: 2872 ; SKX: # %bb.0: 2873 ; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] 2874 ; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] 2875 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2876 ; SKX-NEXT: retq # sched: [7:1.00] 2877 ; 2878 ; BTVER2-SSE-LABEL: test_cvttps2dq: 2879 ; BTVER2-SSE: # %bb.0: 2880 ; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] 2881 ; BTVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] 2882 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 2883 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 2884 ; 2885 ; BTVER2-LABEL: test_cvttps2dq: 2886 ; BTVER2: # %bb.0: 2887 ; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] 2888 ; BTVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] 2889 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2890 ; BTVER2-NEXT: retq # sched: [4:1.00] 2891 ; 2892 ; ZNVER1-SSE-LABEL: test_cvttps2dq: 2893 ; ZNVER1-SSE: # %bb.0: 2894 ; ZNVER1-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [5:1.00] 2895 ; ZNVER1-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [12:1.00] 2896 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 2897 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 2898 ; 2899 ; ZNVER1-LABEL: test_cvttps2dq: 2900 ; ZNVER1: # %bb.0: 2901 ; ZNVER1-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00] 2902 ; ZNVER1-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [5:1.00] 2903 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2904 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2905 %1 = fptosi <4 x float> %a0 to <4 x i32> 2906 %2 = load <4 x float>, <4 x float> *%a1, align 16 2907 %3 = fptosi <4 x float> %2 to <4 x i32> 2908 %4 = add <4 x i32> %1, %3 2909 ret <4 x i32> %4 2910 } 2911 2912 define i32 @test_cvttsd2si(double %a0, double *%a1) { 2913 ; GENERIC-LABEL: test_cvttsd2si: 2914 ; GENERIC: # %bb.0: 2915 ; GENERIC-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] 2916 ; GENERIC-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] 2917 ; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] 2918 ; GENERIC-NEXT: retq # sched: [1:1.00] 2919 ; 2920 ; ATOM-LABEL: test_cvttsd2si: 2921 ; ATOM: # %bb.0: 2922 ; ATOM-NEXT: cvttsd2si (%rdi), %eax # sched: [9:4.50] 2923 ; ATOM-NEXT: cvttsd2si %xmm0, %ecx # sched: [8:4.00] 2924 ; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] 2925 ; ATOM-NEXT: retq # sched: [79:39.50] 2926 ; 2927 ; SLM-LABEL: test_cvttsd2si: 2928 ; SLM: # %bb.0: 2929 ; SLM-NEXT: cvttsd2si (%rdi), %eax # sched: [7:1.00] 2930 ; SLM-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:0.50] 2931 ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] 2932 ; SLM-NEXT: retq # sched: [4:1.00] 2933 ; 2934 ; SANDY-SSE-LABEL: test_cvttsd2si: 2935 ; SANDY-SSE: # %bb.0: 2936 ; SANDY-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] 2937 ; SANDY-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] 2938 ; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] 2939 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 2940 ; 2941 ; SANDY-LABEL: test_cvttsd2si: 2942 ; SANDY: # %bb.0: 2943 ; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] 2944 ; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [10:1.00] 2945 ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] 2946 ; SANDY-NEXT: retq # sched: [1:1.00] 2947 ; 2948 ; HASWELL-SSE-LABEL: test_cvttsd2si: 2949 ; HASWELL-SSE: # %bb.0: 2950 ; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] 2951 ; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] 2952 ; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 2953 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 2954 ; 2955 ; HASWELL-LABEL: test_cvttsd2si: 2956 ; HASWELL: # %bb.0: 2957 ; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] 2958 ; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] 2959 ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] 2960 ; HASWELL-NEXT: retq # sched: [7:1.00] 2961 ; 2962 ; BROADWELL-SSE-LABEL: test_cvttsd2si: 2963 ; BROADWELL-SSE: # %bb.0: 2964 ; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] 2965 ; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] 2966 ; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 2967 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 2968 ; 2969 ; BROADWELL-LABEL: test_cvttsd2si: 2970 ; BROADWELL: # %bb.0: 2971 ; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] 2972 ; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] 2973 ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] 2974 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2975 ; 2976 ; SKYLAKE-SSE-LABEL: test_cvttsd2si: 2977 ; SKYLAKE-SSE: # %bb.0: 2978 ; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] 2979 ; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] 2980 ; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 2981 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 2982 ; 2983 ; SKYLAKE-LABEL: test_cvttsd2si: 2984 ; SKYLAKE: # %bb.0: 2985 ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] 2986 ; SKYLAKE-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00] 2987 ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] 2988 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2989 ; 2990 ; SKX-SSE-LABEL: test_cvttsd2si: 2991 ; SKX-SSE: # %bb.0: 2992 ; SKX-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] 2993 ; SKX-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] 2994 ; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 2995 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 2996 ; 2997 ; SKX-LABEL: test_cvttsd2si: 2998 ; SKX: # %bb.0: 2999 ; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] 3000 ; SKX-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00] 3001 ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] 3002 ; SKX-NEXT: retq # sched: [7:1.00] 3003 ; 3004 ; BTVER2-SSE-LABEL: test_cvttsd2si: 3005 ; BTVER2-SSE: # %bb.0: 3006 ; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00] 3007 ; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [7:1.00] 3008 ; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] 3009 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3010 ; 3011 ; BTVER2-LABEL: test_cvttsd2si: 3012 ; BTVER2: # %bb.0: 3013 ; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00] 3014 ; BTVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [7:1.00] 3015 ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] 3016 ; BTVER2-NEXT: retq # sched: [4:1.00] 3017 ; 3018 ; ZNVER1-SSE-LABEL: test_cvttsd2si: 3019 ; ZNVER1-SSE: # %bb.0: 3020 ; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00] 3021 ; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] 3022 ; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] 3023 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3024 ; 3025 ; ZNVER1-LABEL: test_cvttsd2si: 3026 ; ZNVER1: # %bb.0: 3027 ; ZNVER1-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00] 3028 ; ZNVER1-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] 3029 ; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] 3030 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3031 %1 = fptosi double %a0 to i32 3032 %2 = load double, double *%a1, align 8 3033 %3 = fptosi double %2 to i32 3034 %4 = add i32 %1, %3 3035 ret i32 %4 3036 } 3037 3038 define i64 @test_cvttsd2siq(double %a0, double *%a1) { 3039 ; GENERIC-LABEL: test_cvttsd2siq: 3040 ; GENERIC: # %bb.0: 3041 ; GENERIC-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] 3042 ; GENERIC-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] 3043 ; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] 3044 ; GENERIC-NEXT: retq # sched: [1:1.00] 3045 ; 3046 ; ATOM-LABEL: test_cvttsd2siq: 3047 ; ATOM: # %bb.0: 3048 ; ATOM-NEXT: cvttsd2si (%rdi), %rax # sched: [9:4.50] 3049 ; ATOM-NEXT: cvttsd2si %xmm0, %rcx # sched: [8:4.00] 3050 ; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] 3051 ; ATOM-NEXT: retq # sched: [79:39.50] 3052 ; 3053 ; SLM-LABEL: test_cvttsd2siq: 3054 ; SLM: # %bb.0: 3055 ; SLM-NEXT: cvttsd2si (%rdi), %rax # sched: [7:1.00] 3056 ; SLM-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:0.50] 3057 ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] 3058 ; SLM-NEXT: retq # sched: [4:1.00] 3059 ; 3060 ; SANDY-SSE-LABEL: test_cvttsd2siq: 3061 ; SANDY-SSE: # %bb.0: 3062 ; SANDY-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] 3063 ; SANDY-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] 3064 ; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] 3065 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3066 ; 3067 ; SANDY-LABEL: test_cvttsd2siq: 3068 ; SANDY: # %bb.0: 3069 ; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] 3070 ; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00] 3071 ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] 3072 ; SANDY-NEXT: retq # sched: [1:1.00] 3073 ; 3074 ; HASWELL-SSE-LABEL: test_cvttsd2siq: 3075 ; HASWELL-SSE: # %bb.0: 3076 ; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] 3077 ; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] 3078 ; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 3079 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3080 ; 3081 ; HASWELL-LABEL: test_cvttsd2siq: 3082 ; HASWELL: # %bb.0: 3083 ; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] 3084 ; HASWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] 3085 ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] 3086 ; HASWELL-NEXT: retq # sched: [7:1.00] 3087 ; 3088 ; BROADWELL-SSE-LABEL: test_cvttsd2siq: 3089 ; BROADWELL-SSE: # %bb.0: 3090 ; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] 3091 ; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] 3092 ; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 3093 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3094 ; 3095 ; BROADWELL-LABEL: test_cvttsd2siq: 3096 ; BROADWELL: # %bb.0: 3097 ; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] 3098 ; BROADWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] 3099 ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] 3100 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3101 ; 3102 ; SKYLAKE-SSE-LABEL: test_cvttsd2siq: 3103 ; SKYLAKE-SSE: # %bb.0: 3104 ; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] 3105 ; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] 3106 ; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 3107 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3108 ; 3109 ; SKYLAKE-LABEL: test_cvttsd2siq: 3110 ; SKYLAKE: # %bb.0: 3111 ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] 3112 ; SKYLAKE-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00] 3113 ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] 3114 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3115 ; 3116 ; SKX-SSE-LABEL: test_cvttsd2siq: 3117 ; SKX-SSE: # %bb.0: 3118 ; SKX-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] 3119 ; SKX-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] 3120 ; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 3121 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3122 ; 3123 ; SKX-LABEL: test_cvttsd2siq: 3124 ; SKX: # %bb.0: 3125 ; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] 3126 ; SKX-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00] 3127 ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] 3128 ; SKX-NEXT: retq # sched: [7:1.00] 3129 ; 3130 ; BTVER2-SSE-LABEL: test_cvttsd2siq: 3131 ; BTVER2-SSE: # %bb.0: 3132 ; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00] 3133 ; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [7:1.00] 3134 ; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] 3135 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3136 ; 3137 ; BTVER2-LABEL: test_cvttsd2siq: 3138 ; BTVER2: # %bb.0: 3139 ; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00] 3140 ; BTVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [7:1.00] 3141 ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] 3142 ; BTVER2-NEXT: retq # sched: [4:1.00] 3143 ; 3144 ; ZNVER1-SSE-LABEL: test_cvttsd2siq: 3145 ; ZNVER1-SSE: # %bb.0: 3146 ; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00] 3147 ; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] 3148 ; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] 3149 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3150 ; 3151 ; ZNVER1-LABEL: test_cvttsd2siq: 3152 ; ZNVER1: # %bb.0: 3153 ; ZNVER1-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00] 3154 ; ZNVER1-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] 3155 ; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] 3156 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3157 %1 = fptosi double %a0 to i64 3158 %2 = load double, double *%a1, align 8 3159 %3 = fptosi double %2 to i64 3160 %4 = add i64 %1, %3 3161 ret i64 %4 3162 } 3163 3164 define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 3165 ; GENERIC-LABEL: test_divpd: 3166 ; GENERIC: # %bb.0: 3167 ; GENERIC-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00] 3168 ; GENERIC-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00] 3169 ; GENERIC-NEXT: retq # sched: [1:1.00] 3170 ; 3171 ; ATOM-LABEL: test_divpd: 3172 ; ATOM: # %bb.0: 3173 ; ATOM-NEXT: divpd %xmm1, %xmm0 # sched: [125:62.50] 3174 ; ATOM-NEXT: divpd (%rdi), %xmm0 # sched: [125:62.50] 3175 ; ATOM-NEXT: retq # sched: [79:39.50] 3176 ; 3177 ; SLM-LABEL: test_divpd: 3178 ; SLM: # %bb.0: 3179 ; SLM-NEXT: divpd %xmm1, %xmm0 # sched: [69:69.00] 3180 ; SLM-NEXT: divpd (%rdi), %xmm0 # sched: [72:69.00] 3181 ; SLM-NEXT: retq # sched: [4:1.00] 3182 ; 3183 ; SANDY-SSE-LABEL: test_divpd: 3184 ; SANDY-SSE: # %bb.0: 3185 ; SANDY-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00] 3186 ; SANDY-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00] 3187 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3188 ; 3189 ; SANDY-LABEL: test_divpd: 3190 ; SANDY: # %bb.0: 3191 ; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:22.00] 3192 ; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:22.00] 3193 ; SANDY-NEXT: retq # sched: [1:1.00] 3194 ; 3195 ; HASWELL-SSE-LABEL: test_divpd: 3196 ; HASWELL-SSE: # %bb.0: 3197 ; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [20:14.00] 3198 ; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [26:14.00] 3199 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3200 ; 3201 ; HASWELL-LABEL: test_divpd: 3202 ; HASWELL: # %bb.0: 3203 ; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:14.00] 3204 ; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:14.00] 3205 ; HASWELL-NEXT: retq # sched: [7:1.00] 3206 ; 3207 ; BROADWELL-SSE-LABEL: test_divpd: 3208 ; BROADWELL-SSE: # %bb.0: 3209 ; BROADWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:8.00] 3210 ; BROADWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [19:8.00] 3211 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3212 ; 3213 ; BROADWELL-LABEL: test_divpd: 3214 ; BROADWELL: # %bb.0: 3215 ; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:8.00] 3216 ; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:8.00] 3217 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3218 ; 3219 ; SKYLAKE-SSE-LABEL: test_divpd: 3220 ; SKYLAKE-SSE: # %bb.0: 3221 ; SKYLAKE-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:3.00] 3222 ; SKYLAKE-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:4.00] 3223 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3224 ; 3225 ; SKYLAKE-LABEL: test_divpd: 3226 ; SKYLAKE: # %bb.0: 3227 ; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] 3228 ; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00] 3229 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3230 ; 3231 ; SKX-SSE-LABEL: test_divpd: 3232 ; SKX-SSE: # %bb.0: 3233 ; SKX-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:3.00] 3234 ; SKX-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:4.00] 3235 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3236 ; 3237 ; SKX-LABEL: test_divpd: 3238 ; SKX: # %bb.0: 3239 ; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] 3240 ; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00] 3241 ; SKX-NEXT: retq # sched: [7:1.00] 3242 ; 3243 ; BTVER2-SSE-LABEL: test_divpd: 3244 ; BTVER2-SSE: # %bb.0: 3245 ; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00] 3246 ; BTVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [24:19.00] 3247 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3248 ; 3249 ; BTVER2-LABEL: test_divpd: 3250 ; BTVER2: # %bb.0: 3251 ; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] 3252 ; BTVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] 3253 ; BTVER2-NEXT: retq # sched: [4:1.00] 3254 ; 3255 ; ZNVER1-SSE-LABEL: test_divpd: 3256 ; ZNVER1-SSE: # %bb.0: 3257 ; ZNVER1-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [15:1.00] 3258 ; ZNVER1-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [22:1.00] 3259 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3260 ; 3261 ; ZNVER1-LABEL: test_divpd: 3262 ; ZNVER1: # %bb.0: 3263 ; ZNVER1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] 3264 ; ZNVER1-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [22:1.00] 3265 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3266 %1 = fdiv <2 x double> %a0, %a1 3267 %2 = load <2 x double>, <2 x double> *%a2, align 16 3268 %3 = fdiv <2 x double> %1, %2 3269 ret <2 x double> %3 3270 } 3271 3272 define double @test_divsd(double %a0, double %a1, double *%a2) { 3273 ; GENERIC-LABEL: test_divsd: 3274 ; GENERIC: # %bb.0: 3275 ; GENERIC-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00] 3276 ; GENERIC-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00] 3277 ; GENERIC-NEXT: retq # sched: [1:1.00] 3278 ; 3279 ; ATOM-LABEL: test_divsd: 3280 ; ATOM: # %bb.0: 3281 ; ATOM-NEXT: divsd %xmm1, %xmm0 # sched: [62:31.00] 3282 ; ATOM-NEXT: divsd (%rdi), %xmm0 # sched: [62:31.00] 3283 ; ATOM-NEXT: retq # sched: [79:39.50] 3284 ; 3285 ; SLM-LABEL: test_divsd: 3286 ; SLM: # %bb.0: 3287 ; SLM-NEXT: divsd %xmm1, %xmm0 # sched: [34:32.00] 3288 ; SLM-NEXT: divsd (%rdi), %xmm0 # sched: [37:32.00] 3289 ; SLM-NEXT: retq # sched: [4:1.00] 3290 ; 3291 ; SANDY-SSE-LABEL: test_divsd: 3292 ; SANDY-SSE: # %bb.0: 3293 ; SANDY-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00] 3294 ; SANDY-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00] 3295 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3296 ; 3297 ; SANDY-LABEL: test_divsd: 3298 ; SANDY: # %bb.0: 3299 ; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:22.00] 3300 ; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:22.00] 3301 ; SANDY-NEXT: retq # sched: [1:1.00] 3302 ; 3303 ; HASWELL-SSE-LABEL: test_divsd: 3304 ; HASWELL-SSE: # %bb.0: 3305 ; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [20:14.00] 3306 ; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [25:14.00] 3307 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3308 ; 3309 ; HASWELL-LABEL: test_divsd: 3310 ; HASWELL: # %bb.0: 3311 ; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:14.00] 3312 ; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:14.00] 3313 ; HASWELL-NEXT: retq # sched: [7:1.00] 3314 ; 3315 ; BROADWELL-SSE-LABEL: test_divsd: 3316 ; BROADWELL-SSE: # %bb.0: 3317 ; BROADWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:4.00] 3318 ; BROADWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:8.00] 3319 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3320 ; 3321 ; BROADWELL-LABEL: test_divsd: 3322 ; BROADWELL: # %bb.0: 3323 ; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:4.00] 3324 ; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:8.00] 3325 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3326 ; 3327 ; SKYLAKE-SSE-LABEL: test_divsd: 3328 ; SKYLAKE-SSE: # %bb.0: 3329 ; SKYLAKE-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:3.00] 3330 ; SKYLAKE-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:4.00] 3331 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3332 ; 3333 ; SKYLAKE-LABEL: test_divsd: 3334 ; SKYLAKE: # %bb.0: 3335 ; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] 3336 ; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] 3337 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3338 ; 3339 ; SKX-SSE-LABEL: test_divsd: 3340 ; SKX-SSE: # %bb.0: 3341 ; SKX-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:3.00] 3342 ; SKX-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:4.00] 3343 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3344 ; 3345 ; SKX-LABEL: test_divsd: 3346 ; SKX: # %bb.0: 3347 ; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] 3348 ; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] 3349 ; SKX-NEXT: retq # sched: [7:1.00] 3350 ; 3351 ; BTVER2-SSE-LABEL: test_divsd: 3352 ; BTVER2-SSE: # %bb.0: 3353 ; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00] 3354 ; BTVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [24:19.00] 3355 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3356 ; 3357 ; BTVER2-LABEL: test_divsd: 3358 ; BTVER2: # %bb.0: 3359 ; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] 3360 ; BTVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] 3361 ; BTVER2-NEXT: retq # sched: [4:1.00] 3362 ; 3363 ; ZNVER1-SSE-LABEL: test_divsd: 3364 ; ZNVER1-SSE: # %bb.0: 3365 ; ZNVER1-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [15:1.00] 3366 ; ZNVER1-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [22:1.00] 3367 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3368 ; 3369 ; ZNVER1-LABEL: test_divsd: 3370 ; ZNVER1: # %bb.0: 3371 ; ZNVER1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] 3372 ; ZNVER1-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [22:1.00] 3373 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3374 %1 = fdiv double %a0, %a1 3375 %2 = load double, double *%a2, align 8 3376 %3 = fdiv double %1, %2 3377 ret double %3 3378 } 3379 3380 define void @test_lfence() { 3381 ; GENERIC-LABEL: test_lfence: 3382 ; GENERIC: # %bb.0: 3383 ; GENERIC-NEXT: lfence # sched: [1:1.00] 3384 ; GENERIC-NEXT: retq # sched: [1:1.00] 3385 ; 3386 ; ATOM-LABEL: test_lfence: 3387 ; ATOM: # %bb.0: 3388 ; ATOM-NEXT: lfence # sched: [1:0.50] 3389 ; ATOM-NEXT: nop # sched: [1:0.50] 3390 ; ATOM-NEXT: nop # sched: [1:0.50] 3391 ; ATOM-NEXT: nop # sched: [1:0.50] 3392 ; ATOM-NEXT: nop # sched: [1:0.50] 3393 ; ATOM-NEXT: nop # sched: [1:0.50] 3394 ; ATOM-NEXT: nop # sched: [1:0.50] 3395 ; ATOM-NEXT: retq # sched: [79:39.50] 3396 ; 3397 ; SLM-LABEL: test_lfence: 3398 ; SLM: # %bb.0: 3399 ; SLM-NEXT: lfence # sched: [1:1.00] 3400 ; SLM-NEXT: retq # sched: [4:1.00] 3401 ; 3402 ; SANDY-SSE-LABEL: test_lfence: 3403 ; SANDY-SSE: # %bb.0: 3404 ; SANDY-SSE-NEXT: lfence # sched: [1:1.00] 3405 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3406 ; 3407 ; SANDY-LABEL: test_lfence: 3408 ; SANDY: # %bb.0: 3409 ; SANDY-NEXT: lfence # sched: [1:1.00] 3410 ; SANDY-NEXT: retq # sched: [1:1.00] 3411 ; 3412 ; HASWELL-SSE-LABEL: test_lfence: 3413 ; HASWELL-SSE: # %bb.0: 3414 ; HASWELL-SSE-NEXT: lfence # sched: [2:0.50] 3415 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3416 ; 3417 ; HASWELL-LABEL: test_lfence: 3418 ; HASWELL: # %bb.0: 3419 ; HASWELL-NEXT: lfence # sched: [2:0.50] 3420 ; HASWELL-NEXT: retq # sched: [7:1.00] 3421 ; 3422 ; BROADWELL-SSE-LABEL: test_lfence: 3423 ; BROADWELL-SSE: # %bb.0: 3424 ; BROADWELL-SSE-NEXT: lfence # sched: [2:0.50] 3425 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3426 ; 3427 ; BROADWELL-LABEL: test_lfence: 3428 ; BROADWELL: # %bb.0: 3429 ; BROADWELL-NEXT: lfence # sched: [2:0.50] 3430 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3431 ; 3432 ; SKYLAKE-SSE-LABEL: test_lfence: 3433 ; SKYLAKE-SSE: # %bb.0: 3434 ; SKYLAKE-SSE-NEXT: lfence # sched: [2:0.50] 3435 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3436 ; 3437 ; SKYLAKE-LABEL: test_lfence: 3438 ; SKYLAKE: # %bb.0: 3439 ; SKYLAKE-NEXT: lfence # sched: [2:0.50] 3440 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3441 ; 3442 ; SKX-SSE-LABEL: test_lfence: 3443 ; SKX-SSE: # %bb.0: 3444 ; SKX-SSE-NEXT: lfence # sched: [2:0.50] 3445 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3446 ; 3447 ; SKX-LABEL: test_lfence: 3448 ; SKX: # %bb.0: 3449 ; SKX-NEXT: lfence # sched: [2:0.50] 3450 ; SKX-NEXT: retq # sched: [7:1.00] 3451 ; 3452 ; BTVER2-SSE-LABEL: test_lfence: 3453 ; BTVER2-SSE: # %bb.0: 3454 ; BTVER2-SSE-NEXT: lfence # sched: [1:1.00] 3455 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3456 ; 3457 ; BTVER2-LABEL: test_lfence: 3458 ; BTVER2: # %bb.0: 3459 ; BTVER2-NEXT: lfence # sched: [1:1.00] 3460 ; BTVER2-NEXT: retq # sched: [4:1.00] 3461 ; 3462 ; ZNVER1-SSE-LABEL: test_lfence: 3463 ; ZNVER1-SSE: # %bb.0: 3464 ; ZNVER1-SSE-NEXT: lfence # sched: [1:0.50] 3465 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3466 ; 3467 ; ZNVER1-LABEL: test_lfence: 3468 ; ZNVER1: # %bb.0: 3469 ; ZNVER1-NEXT: lfence # sched: [1:0.50] 3470 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3471 call void @llvm.x86.sse2.lfence() 3472 ret void 3473 } 3474 declare void @llvm.x86.sse2.lfence() nounwind readnone 3475 3476 define void @test_mfence() { 3477 ; GENERIC-LABEL: test_mfence: 3478 ; GENERIC: # %bb.0: 3479 ; GENERIC-NEXT: mfence # sched: [1:1.00] 3480 ; GENERIC-NEXT: retq # sched: [1:1.00] 3481 ; 3482 ; ATOM-LABEL: test_mfence: 3483 ; ATOM: # %bb.0: 3484 ; ATOM-NEXT: mfence # sched: [1:1.00] 3485 ; ATOM-NEXT: nop # sched: [1:0.50] 3486 ; ATOM-NEXT: nop # sched: [1:0.50] 3487 ; ATOM-NEXT: nop # sched: [1:0.50] 3488 ; ATOM-NEXT: nop # sched: [1:0.50] 3489 ; ATOM-NEXT: nop # sched: [1:0.50] 3490 ; ATOM-NEXT: nop # sched: [1:0.50] 3491 ; ATOM-NEXT: retq # sched: [79:39.50] 3492 ; 3493 ; SLM-LABEL: test_mfence: 3494 ; SLM: # %bb.0: 3495 ; SLM-NEXT: mfence # sched: [1:1.00] 3496 ; SLM-NEXT: retq # sched: [4:1.00] 3497 ; 3498 ; SANDY-SSE-LABEL: test_mfence: 3499 ; SANDY-SSE: # %bb.0: 3500 ; SANDY-SSE-NEXT: mfence # sched: [1:1.00] 3501 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3502 ; 3503 ; SANDY-LABEL: test_mfence: 3504 ; SANDY: # %bb.0: 3505 ; SANDY-NEXT: mfence # sched: [1:1.00] 3506 ; SANDY-NEXT: retq # sched: [1:1.00] 3507 ; 3508 ; HASWELL-SSE-LABEL: test_mfence: 3509 ; HASWELL-SSE: # %bb.0: 3510 ; HASWELL-SSE-NEXT: mfence # sched: [2:0.50] 3511 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3512 ; 3513 ; HASWELL-LABEL: test_mfence: 3514 ; HASWELL: # %bb.0: 3515 ; HASWELL-NEXT: mfence # sched: [2:0.50] 3516 ; HASWELL-NEXT: retq # sched: [7:1.00] 3517 ; 3518 ; BROADWELL-SSE-LABEL: test_mfence: 3519 ; BROADWELL-SSE: # %bb.0: 3520 ; BROADWELL-SSE-NEXT: mfence # sched: [2:0.50] 3521 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3522 ; 3523 ; BROADWELL-LABEL: test_mfence: 3524 ; BROADWELL: # %bb.0: 3525 ; BROADWELL-NEXT: mfence # sched: [2:0.50] 3526 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3527 ; 3528 ; SKYLAKE-SSE-LABEL: test_mfence: 3529 ; SKYLAKE-SSE: # %bb.0: 3530 ; SKYLAKE-SSE-NEXT: mfence # sched: [3:0.50] 3531 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3532 ; 3533 ; SKYLAKE-LABEL: test_mfence: 3534 ; SKYLAKE: # %bb.0: 3535 ; SKYLAKE-NEXT: mfence # sched: [3:0.50] 3536 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3537 ; 3538 ; SKX-SSE-LABEL: test_mfence: 3539 ; SKX-SSE: # %bb.0: 3540 ; SKX-SSE-NEXT: mfence # sched: [3:0.50] 3541 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3542 ; 3543 ; SKX-LABEL: test_mfence: 3544 ; SKX: # %bb.0: 3545 ; SKX-NEXT: mfence # sched: [3:0.50] 3546 ; SKX-NEXT: retq # sched: [7:1.00] 3547 ; 3548 ; BTVER2-SSE-LABEL: test_mfence: 3549 ; BTVER2-SSE: # %bb.0: 3550 ; BTVER2-SSE-NEXT: mfence # sched: [1:1.00] 3551 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3552 ; 3553 ; BTVER2-LABEL: test_mfence: 3554 ; BTVER2: # %bb.0: 3555 ; BTVER2-NEXT: mfence # sched: [1:1.00] 3556 ; BTVER2-NEXT: retq # sched: [4:1.00] 3557 ; 3558 ; ZNVER1-SSE-LABEL: test_mfence: 3559 ; ZNVER1-SSE: # %bb.0: 3560 ; ZNVER1-SSE-NEXT: mfence # sched: [1:0.50] 3561 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3562 ; 3563 ; ZNVER1-LABEL: test_mfence: 3564 ; ZNVER1: # %bb.0: 3565 ; ZNVER1-NEXT: mfence # sched: [1:0.50] 3566 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3567 call void @llvm.x86.sse2.mfence() 3568 ret void 3569 } 3570 declare void @llvm.x86.sse2.mfence() nounwind readnone 3571 3572 define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { 3573 ; GENERIC-LABEL: test_maskmovdqu: 3574 ; GENERIC: # %bb.0: 3575 ; GENERIC-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3576 ; GENERIC-NEXT: retq # sched: [1:1.00] 3577 ; 3578 ; ATOM-LABEL: test_maskmovdqu: 3579 ; ATOM: # %bb.0: 3580 ; ATOM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [2:1.00] 3581 ; ATOM-NEXT: nop # sched: [1:0.50] 3582 ; ATOM-NEXT: nop # sched: [1:0.50] 3583 ; ATOM-NEXT: nop # sched: [1:0.50] 3584 ; ATOM-NEXT: nop # sched: [1:0.50] 3585 ; ATOM-NEXT: retq # sched: [79:39.50] 3586 ; 3587 ; SLM-LABEL: test_maskmovdqu: 3588 ; SLM: # %bb.0: 3589 ; SLM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3590 ; SLM-NEXT: retq # sched: [4:1.00] 3591 ; 3592 ; SANDY-SSE-LABEL: test_maskmovdqu: 3593 ; SANDY-SSE: # %bb.0: 3594 ; SANDY-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3595 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3596 ; 3597 ; SANDY-LABEL: test_maskmovdqu: 3598 ; SANDY: # %bb.0: 3599 ; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3600 ; SANDY-NEXT: retq # sched: [1:1.00] 3601 ; 3602 ; HASWELL-SSE-LABEL: test_maskmovdqu: 3603 ; HASWELL-SSE: # %bb.0: 3604 ; HASWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3605 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3606 ; 3607 ; HASWELL-LABEL: test_maskmovdqu: 3608 ; HASWELL: # %bb.0: 3609 ; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3610 ; HASWELL-NEXT: retq # sched: [7:1.00] 3611 ; 3612 ; BROADWELL-SSE-LABEL: test_maskmovdqu: 3613 ; BROADWELL-SSE: # %bb.0: 3614 ; BROADWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3615 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3616 ; 3617 ; BROADWELL-LABEL: test_maskmovdqu: 3618 ; BROADWELL: # %bb.0: 3619 ; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3620 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3621 ; 3622 ; SKYLAKE-SSE-LABEL: test_maskmovdqu: 3623 ; SKYLAKE-SSE: # %bb.0: 3624 ; SKYLAKE-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3625 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3626 ; 3627 ; SKYLAKE-LABEL: test_maskmovdqu: 3628 ; SKYLAKE: # %bb.0: 3629 ; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3630 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3631 ; 3632 ; SKX-SSE-LABEL: test_maskmovdqu: 3633 ; SKX-SSE: # %bb.0: 3634 ; SKX-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3635 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3636 ; 3637 ; SKX-LABEL: test_maskmovdqu: 3638 ; SKX: # %bb.0: 3639 ; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3640 ; SKX-NEXT: retq # sched: [7:1.00] 3641 ; 3642 ; BTVER2-SSE-LABEL: test_maskmovdqu: 3643 ; BTVER2-SSE: # %bb.0: 3644 ; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3645 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3646 ; 3647 ; BTVER2-LABEL: test_maskmovdqu: 3648 ; BTVER2: # %bb.0: 3649 ; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] 3650 ; BTVER2-NEXT: retq # sched: [4:1.00] 3651 ; 3652 ; ZNVER1-SSE-LABEL: test_maskmovdqu: 3653 ; ZNVER1-SSE: # %bb.0: 3654 ; ZNVER1-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [100:0.25] 3655 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3656 ; 3657 ; ZNVER1-LABEL: test_maskmovdqu: 3658 ; ZNVER1: # %bb.0: 3659 ; ZNVER1-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [100:0.25] 3660 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3661 call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) 3662 ret void 3663 } 3664 declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind 3665 3666 define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 3667 ; GENERIC-LABEL: test_maxpd: 3668 ; GENERIC: # %bb.0: 3669 ; GENERIC-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] 3670 ; GENERIC-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] 3671 ; GENERIC-NEXT: retq # sched: [1:1.00] 3672 ; 3673 ; ATOM-LABEL: test_maxpd: 3674 ; ATOM: # %bb.0: 3675 ; ATOM-NEXT: maxpd %xmm1, %xmm0 # sched: [6:3.00] 3676 ; ATOM-NEXT: maxpd (%rdi), %xmm0 # sched: [7:3.50] 3677 ; ATOM-NEXT: retq # sched: [79:39.50] 3678 ; 3679 ; SLM-LABEL: test_maxpd: 3680 ; SLM: # %bb.0: 3681 ; SLM-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] 3682 ; SLM-NEXT: maxpd (%rdi), %xmm0 # sched: [6:1.00] 3683 ; SLM-NEXT: retq # sched: [4:1.00] 3684 ; 3685 ; SANDY-SSE-LABEL: test_maxpd: 3686 ; SANDY-SSE: # %bb.0: 3687 ; SANDY-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] 3688 ; SANDY-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] 3689 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3690 ; 3691 ; SANDY-LABEL: test_maxpd: 3692 ; SANDY: # %bb.0: 3693 ; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3694 ; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 3695 ; SANDY-NEXT: retq # sched: [1:1.00] 3696 ; 3697 ; HASWELL-SSE-LABEL: test_maxpd: 3698 ; HASWELL-SSE: # %bb.0: 3699 ; HASWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] 3700 ; HASWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] 3701 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3702 ; 3703 ; HASWELL-LABEL: test_maxpd: 3704 ; HASWELL: # %bb.0: 3705 ; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3706 ; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 3707 ; HASWELL-NEXT: retq # sched: [7:1.00] 3708 ; 3709 ; BROADWELL-SSE-LABEL: test_maxpd: 3710 ; BROADWELL-SSE: # %bb.0: 3711 ; BROADWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] 3712 ; BROADWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [8:1.00] 3713 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3714 ; 3715 ; BROADWELL-LABEL: test_maxpd: 3716 ; BROADWELL: # %bb.0: 3717 ; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3718 ; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 3719 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3720 ; 3721 ; SKYLAKE-SSE-LABEL: test_maxpd: 3722 ; SKYLAKE-SSE: # %bb.0: 3723 ; SKYLAKE-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] 3724 ; SKYLAKE-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] 3725 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3726 ; 3727 ; SKYLAKE-LABEL: test_maxpd: 3728 ; SKYLAKE: # %bb.0: 3729 ; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3730 ; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 3731 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3732 ; 3733 ; SKX-SSE-LABEL: test_maxpd: 3734 ; SKX-SSE: # %bb.0: 3735 ; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] 3736 ; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] 3737 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3738 ; 3739 ; SKX-LABEL: test_maxpd: 3740 ; SKX: # %bb.0: 3741 ; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3742 ; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 3743 ; SKX-NEXT: retq # sched: [7:1.00] 3744 ; 3745 ; BTVER2-SSE-LABEL: test_maxpd: 3746 ; BTVER2-SSE: # %bb.0: 3747 ; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00] 3748 ; BTVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00] 3749 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3750 ; 3751 ; BTVER2-LABEL: test_maxpd: 3752 ; BTVER2: # %bb.0: 3753 ; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 3754 ; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3755 ; BTVER2-NEXT: retq # sched: [4:1.00] 3756 ; 3757 ; ZNVER1-SSE-LABEL: test_maxpd: 3758 ; ZNVER1-SSE: # %bb.0: 3759 ; ZNVER1-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] 3760 ; ZNVER1-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:1.00] 3761 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3762 ; 3763 ; ZNVER1-LABEL: test_maxpd: 3764 ; ZNVER1: # %bb.0: 3765 ; ZNVER1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3766 ; ZNVER1-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 3767 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3768 %1 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) 3769 %2 = load <2 x double>, <2 x double> *%a2, align 16 3770 %3 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %1, <2 x double> %2) 3771 ret <2 x double> %3 3772 } 3773 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 3774 3775 define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 3776 ; GENERIC-LABEL: test_maxsd: 3777 ; GENERIC: # %bb.0: 3778 ; GENERIC-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] 3779 ; GENERIC-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] 3780 ; GENERIC-NEXT: retq # sched: [1:1.00] 3781 ; 3782 ; ATOM-LABEL: test_maxsd: 3783 ; ATOM: # %bb.0: 3784 ; ATOM-NEXT: maxsd %xmm1, %xmm0 # sched: [5:5.00] 3785 ; ATOM-NEXT: maxsd (%rdi), %xmm0 # sched: [5:5.00] 3786 ; ATOM-NEXT: retq # sched: [79:39.50] 3787 ; 3788 ; SLM-LABEL: test_maxsd: 3789 ; SLM: # %bb.0: 3790 ; SLM-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] 3791 ; SLM-NEXT: maxsd (%rdi), %xmm0 # sched: [6:1.00] 3792 ; SLM-NEXT: retq # sched: [4:1.00] 3793 ; 3794 ; SANDY-SSE-LABEL: test_maxsd: 3795 ; SANDY-SSE: # %bb.0: 3796 ; SANDY-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] 3797 ; SANDY-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] 3798 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3799 ; 3800 ; SANDY-LABEL: test_maxsd: 3801 ; SANDY: # %bb.0: 3802 ; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3803 ; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 3804 ; SANDY-NEXT: retq # sched: [1:1.00] 3805 ; 3806 ; HASWELL-SSE-LABEL: test_maxsd: 3807 ; HASWELL-SSE: # %bb.0: 3808 ; HASWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] 3809 ; HASWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] 3810 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3811 ; 3812 ; HASWELL-LABEL: test_maxsd: 3813 ; HASWELL: # %bb.0: 3814 ; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3815 ; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 3816 ; HASWELL-NEXT: retq # sched: [7:1.00] 3817 ; 3818 ; BROADWELL-SSE-LABEL: test_maxsd: 3819 ; BROADWELL-SSE: # %bb.0: 3820 ; BROADWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] 3821 ; BROADWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] 3822 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3823 ; 3824 ; BROADWELL-LABEL: test_maxsd: 3825 ; BROADWELL: # %bb.0: 3826 ; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3827 ; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 3828 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3829 ; 3830 ; SKYLAKE-SSE-LABEL: test_maxsd: 3831 ; SKYLAKE-SSE: # %bb.0: 3832 ; SKYLAKE-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] 3833 ; SKYLAKE-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] 3834 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3835 ; 3836 ; SKYLAKE-LABEL: test_maxsd: 3837 ; SKYLAKE: # %bb.0: 3838 ; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3839 ; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 3840 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3841 ; 3842 ; SKX-SSE-LABEL: test_maxsd: 3843 ; SKX-SSE: # %bb.0: 3844 ; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] 3845 ; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] 3846 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3847 ; 3848 ; SKX-LABEL: test_maxsd: 3849 ; SKX: # %bb.0: 3850 ; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3851 ; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 3852 ; SKX-NEXT: retq # sched: [7:1.00] 3853 ; 3854 ; BTVER2-SSE-LABEL: test_maxsd: 3855 ; BTVER2-SSE: # %bb.0: 3856 ; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00] 3857 ; BTVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00] 3858 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3859 ; 3860 ; BTVER2-LABEL: test_maxsd: 3861 ; BTVER2: # %bb.0: 3862 ; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 3863 ; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3864 ; BTVER2-NEXT: retq # sched: [4:1.00] 3865 ; 3866 ; ZNVER1-SSE-LABEL: test_maxsd: 3867 ; ZNVER1-SSE: # %bb.0: 3868 ; ZNVER1-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] 3869 ; ZNVER1-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [10:1.00] 3870 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3871 ; 3872 ; ZNVER1-LABEL: test_maxsd: 3873 ; ZNVER1: # %bb.0: 3874 ; ZNVER1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3875 ; ZNVER1-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 3876 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3877 %1 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) 3878 %2 = load <2 x double>, <2 x double> *%a2, align 16 3879 %3 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2) 3880 ret <2 x double> %3 3881 } 3882 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 3883 3884 define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 3885 ; GENERIC-LABEL: test_minpd: 3886 ; GENERIC: # %bb.0: 3887 ; GENERIC-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] 3888 ; GENERIC-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] 3889 ; GENERIC-NEXT: retq # sched: [1:1.00] 3890 ; 3891 ; ATOM-LABEL: test_minpd: 3892 ; ATOM: # %bb.0: 3893 ; ATOM-NEXT: minpd %xmm1, %xmm0 # sched: [6:3.00] 3894 ; ATOM-NEXT: minpd (%rdi), %xmm0 # sched: [7:3.50] 3895 ; ATOM-NEXT: retq # sched: [79:39.50] 3896 ; 3897 ; SLM-LABEL: test_minpd: 3898 ; SLM: # %bb.0: 3899 ; SLM-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] 3900 ; SLM-NEXT: minpd (%rdi), %xmm0 # sched: [6:1.00] 3901 ; SLM-NEXT: retq # sched: [4:1.00] 3902 ; 3903 ; SANDY-SSE-LABEL: test_minpd: 3904 ; SANDY-SSE: # %bb.0: 3905 ; SANDY-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] 3906 ; SANDY-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] 3907 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 3908 ; 3909 ; SANDY-LABEL: test_minpd: 3910 ; SANDY: # %bb.0: 3911 ; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3912 ; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 3913 ; SANDY-NEXT: retq # sched: [1:1.00] 3914 ; 3915 ; HASWELL-SSE-LABEL: test_minpd: 3916 ; HASWELL-SSE: # %bb.0: 3917 ; HASWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] 3918 ; HASWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] 3919 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 3920 ; 3921 ; HASWELL-LABEL: test_minpd: 3922 ; HASWELL: # %bb.0: 3923 ; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3924 ; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 3925 ; HASWELL-NEXT: retq # sched: [7:1.00] 3926 ; 3927 ; BROADWELL-SSE-LABEL: test_minpd: 3928 ; BROADWELL-SSE: # %bb.0: 3929 ; BROADWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] 3930 ; BROADWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [8:1.00] 3931 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 3932 ; 3933 ; BROADWELL-LABEL: test_minpd: 3934 ; BROADWELL: # %bb.0: 3935 ; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3936 ; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 3937 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3938 ; 3939 ; SKYLAKE-SSE-LABEL: test_minpd: 3940 ; SKYLAKE-SSE: # %bb.0: 3941 ; SKYLAKE-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] 3942 ; SKYLAKE-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] 3943 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 3944 ; 3945 ; SKYLAKE-LABEL: test_minpd: 3946 ; SKYLAKE: # %bb.0: 3947 ; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3948 ; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 3949 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3950 ; 3951 ; SKX-SSE-LABEL: test_minpd: 3952 ; SKX-SSE: # %bb.0: 3953 ; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] 3954 ; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] 3955 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 3956 ; 3957 ; SKX-LABEL: test_minpd: 3958 ; SKX: # %bb.0: 3959 ; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 3960 ; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 3961 ; SKX-NEXT: retq # sched: [7:1.00] 3962 ; 3963 ; BTVER2-SSE-LABEL: test_minpd: 3964 ; BTVER2-SSE: # %bb.0: 3965 ; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00] 3966 ; BTVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00] 3967 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 3968 ; 3969 ; BTVER2-LABEL: test_minpd: 3970 ; BTVER2: # %bb.0: 3971 ; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 3972 ; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 3973 ; BTVER2-NEXT: retq # sched: [4:1.00] 3974 ; 3975 ; ZNVER1-SSE-LABEL: test_minpd: 3976 ; ZNVER1-SSE: # %bb.0: 3977 ; ZNVER1-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] 3978 ; ZNVER1-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:1.00] 3979 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 3980 ; 3981 ; ZNVER1-LABEL: test_minpd: 3982 ; ZNVER1: # %bb.0: 3983 ; ZNVER1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 3984 ; ZNVER1-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 3985 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3986 %1 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) 3987 %2 = load <2 x double>, <2 x double> *%a2, align 16 3988 %3 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %1, <2 x double> %2) 3989 ret <2 x double> %3 3990 } 3991 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 3992 3993 define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 3994 ; GENERIC-LABEL: test_minsd: 3995 ; GENERIC: # %bb.0: 3996 ; GENERIC-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] 3997 ; GENERIC-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] 3998 ; GENERIC-NEXT: retq # sched: [1:1.00] 3999 ; 4000 ; ATOM-LABEL: test_minsd: 4001 ; ATOM: # %bb.0: 4002 ; ATOM-NEXT: minsd %xmm1, %xmm0 # sched: [5:5.00] 4003 ; ATOM-NEXT: minsd (%rdi), %xmm0 # sched: [5:5.00] 4004 ; ATOM-NEXT: retq # sched: [79:39.50] 4005 ; 4006 ; SLM-LABEL: test_minsd: 4007 ; SLM: # %bb.0: 4008 ; SLM-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] 4009 ; SLM-NEXT: minsd (%rdi), %xmm0 # sched: [6:1.00] 4010 ; SLM-NEXT: retq # sched: [4:1.00] 4011 ; 4012 ; SANDY-SSE-LABEL: test_minsd: 4013 ; SANDY-SSE: # %bb.0: 4014 ; SANDY-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] 4015 ; SANDY-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] 4016 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4017 ; 4018 ; SANDY-LABEL: test_minsd: 4019 ; SANDY: # %bb.0: 4020 ; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4021 ; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 4022 ; SANDY-NEXT: retq # sched: [1:1.00] 4023 ; 4024 ; HASWELL-SSE-LABEL: test_minsd: 4025 ; HASWELL-SSE: # %bb.0: 4026 ; HASWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] 4027 ; HASWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] 4028 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4029 ; 4030 ; HASWELL-LABEL: test_minsd: 4031 ; HASWELL: # %bb.0: 4032 ; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4033 ; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 4034 ; HASWELL-NEXT: retq # sched: [7:1.00] 4035 ; 4036 ; BROADWELL-SSE-LABEL: test_minsd: 4037 ; BROADWELL-SSE: # %bb.0: 4038 ; BROADWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] 4039 ; BROADWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] 4040 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4041 ; 4042 ; BROADWELL-LABEL: test_minsd: 4043 ; BROADWELL: # %bb.0: 4044 ; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4045 ; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 4046 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4047 ; 4048 ; SKYLAKE-SSE-LABEL: test_minsd: 4049 ; SKYLAKE-SSE: # %bb.0: 4050 ; SKYLAKE-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] 4051 ; SKYLAKE-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] 4052 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4053 ; 4054 ; SKYLAKE-LABEL: test_minsd: 4055 ; SKYLAKE: # %bb.0: 4056 ; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4057 ; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 4058 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4059 ; 4060 ; SKX-SSE-LABEL: test_minsd: 4061 ; SKX-SSE: # %bb.0: 4062 ; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] 4063 ; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] 4064 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4065 ; 4066 ; SKX-LABEL: test_minsd: 4067 ; SKX: # %bb.0: 4068 ; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4069 ; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 4070 ; SKX-NEXT: retq # sched: [7:1.00] 4071 ; 4072 ; BTVER2-SSE-LABEL: test_minsd: 4073 ; BTVER2-SSE: # %bb.0: 4074 ; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00] 4075 ; BTVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00] 4076 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4077 ; 4078 ; BTVER2-LABEL: test_minsd: 4079 ; BTVER2: # %bb.0: 4080 ; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 4081 ; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 4082 ; BTVER2-NEXT: retq # sched: [4:1.00] 4083 ; 4084 ; ZNVER1-SSE-LABEL: test_minsd: 4085 ; ZNVER1-SSE: # %bb.0: 4086 ; ZNVER1-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] 4087 ; ZNVER1-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [10:1.00] 4088 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4089 ; 4090 ; ZNVER1-LABEL: test_minsd: 4091 ; ZNVER1: # %bb.0: 4092 ; ZNVER1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4093 ; ZNVER1-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 4094 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4095 %1 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) 4096 %2 = load <2 x double>, <2 x double> *%a2, align 16 4097 %3 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2) 4098 ret <2 x double> %3 4099 } 4100 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 4101 4102 define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { 4103 ; GENERIC-LABEL: test_movapd: 4104 ; GENERIC: # %bb.0: 4105 ; GENERIC-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] 4106 ; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 4107 ; GENERIC-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] 4108 ; GENERIC-NEXT: retq # sched: [1:1.00] 4109 ; 4110 ; ATOM-LABEL: test_movapd: 4111 ; ATOM: # %bb.0: 4112 ; ATOM-NEXT: movapd (%rdi), %xmm0 # sched: [1:1.00] 4113 ; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00] 4114 ; ATOM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] 4115 ; ATOM-NEXT: retq # sched: [79:39.50] 4116 ; 4117 ; SLM-LABEL: test_movapd: 4118 ; SLM: # %bb.0: 4119 ; SLM-NEXT: movapd (%rdi), %xmm0 # sched: [3:1.00] 4120 ; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 4121 ; SLM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] 4122 ; SLM-NEXT: retq # sched: [4:1.00] 4123 ; 4124 ; SANDY-SSE-LABEL: test_movapd: 4125 ; SANDY-SSE: # %bb.0: 4126 ; SANDY-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] 4127 ; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 4128 ; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] 4129 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4130 ; 4131 ; SANDY-LABEL: test_movapd: 4132 ; SANDY: # %bb.0: 4133 ; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] 4134 ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 4135 ; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] 4136 ; SANDY-NEXT: retq # sched: [1:1.00] 4137 ; 4138 ; HASWELL-SSE-LABEL: test_movapd: 4139 ; HASWELL-SSE: # %bb.0: 4140 ; HASWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] 4141 ; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 4142 ; HASWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] 4143 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4144 ; 4145 ; HASWELL-LABEL: test_movapd: 4146 ; HASWELL: # %bb.0: 4147 ; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] 4148 ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 4149 ; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] 4150 ; HASWELL-NEXT: retq # sched: [7:1.00] 4151 ; 4152 ; BROADWELL-SSE-LABEL: test_movapd: 4153 ; BROADWELL-SSE: # %bb.0: 4154 ; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50] 4155 ; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 4156 ; BROADWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] 4157 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4158 ; 4159 ; BROADWELL-LABEL: test_movapd: 4160 ; BROADWELL: # %bb.0: 4161 ; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50] 4162 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 4163 ; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] 4164 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4165 ; 4166 ; SKYLAKE-SSE-LABEL: test_movapd: 4167 ; SKYLAKE-SSE: # %bb.0: 4168 ; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] 4169 ; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] 4170 ; SKYLAKE-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] 4171 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4172 ; 4173 ; SKYLAKE-LABEL: test_movapd: 4174 ; SKYLAKE: # %bb.0: 4175 ; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] 4176 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 4177 ; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] 4178 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4179 ; 4180 ; SKX-SSE-LABEL: test_movapd: 4181 ; SKX-SSE: # %bb.0: 4182 ; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] 4183 ; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] 4184 ; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] 4185 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4186 ; 4187 ; SKX-LABEL: test_movapd: 4188 ; SKX: # %bb.0: 4189 ; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] 4190 ; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 4191 ; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] 4192 ; SKX-NEXT: retq # sched: [7:1.00] 4193 ; 4194 ; BTVER2-SSE-LABEL: test_movapd: 4195 ; BTVER2-SSE: # %bb.0: 4196 ; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00] 4197 ; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 4198 ; BTVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] 4199 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4200 ; 4201 ; BTVER2-LABEL: test_movapd: 4202 ; BTVER2: # %bb.0: 4203 ; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00] 4204 ; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 4205 ; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] 4206 ; BTVER2-NEXT: retq # sched: [4:1.00] 4207 ; 4208 ; ZNVER1-SSE-LABEL: test_movapd: 4209 ; ZNVER1-SSE: # %bb.0: 4210 ; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [8:0.50] 4211 ; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 4212 ; ZNVER1-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:0.50] 4213 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4214 ; 4215 ; ZNVER1-LABEL: test_movapd: 4216 ; ZNVER1: # %bb.0: 4217 ; ZNVER1-NEXT: vmovapd (%rdi), %xmm0 # sched: [8:0.50] 4218 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 4219 ; ZNVER1-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:0.50] 4220 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4221 %1 = load <2 x double>, <2 x double> *%a0, align 16 4222 %2 = fadd <2 x double> %1, %1 4223 store <2 x double> %2, <2 x double> *%a1, align 16 4224 ret void 4225 } 4226 4227 define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { 4228 ; GENERIC-LABEL: test_movdqa: 4229 ; GENERIC: # %bb.0: 4230 ; GENERIC-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] 4231 ; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4232 ; GENERIC-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] 4233 ; GENERIC-NEXT: retq # sched: [1:1.00] 4234 ; 4235 ; ATOM-LABEL: test_movdqa: 4236 ; ATOM: # %bb.0: 4237 ; ATOM-NEXT: movdqa (%rdi), %xmm0 # sched: [1:1.00] 4238 ; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00] 4239 ; ATOM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] 4240 ; ATOM-NEXT: retq # sched: [79:39.50] 4241 ; 4242 ; SLM-LABEL: test_movdqa: 4243 ; SLM: # %bb.0: 4244 ; SLM-NEXT: movdqa (%rdi), %xmm0 # sched: [3:1.00] 4245 ; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4246 ; SLM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] 4247 ; SLM-NEXT: retq # sched: [4:1.00] 4248 ; 4249 ; SANDY-SSE-LABEL: test_movdqa: 4250 ; SANDY-SSE: # %bb.0: 4251 ; SANDY-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] 4252 ; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4253 ; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] 4254 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4255 ; 4256 ; SANDY-LABEL: test_movdqa: 4257 ; SANDY: # %bb.0: 4258 ; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4259 ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 4260 ; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] 4261 ; SANDY-NEXT: retq # sched: [1:1.00] 4262 ; 4263 ; HASWELL-SSE-LABEL: test_movdqa: 4264 ; HASWELL-SSE: # %bb.0: 4265 ; HASWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] 4266 ; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4267 ; HASWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] 4268 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4269 ; 4270 ; HASWELL-LABEL: test_movdqa: 4271 ; HASWELL: # %bb.0: 4272 ; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4273 ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 4274 ; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] 4275 ; HASWELL-NEXT: retq # sched: [7:1.00] 4276 ; 4277 ; BROADWELL-SSE-LABEL: test_movdqa: 4278 ; BROADWELL-SSE: # %bb.0: 4279 ; BROADWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50] 4280 ; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4281 ; BROADWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] 4282 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4283 ; 4284 ; BROADWELL-LABEL: test_movdqa: 4285 ; BROADWELL: # %bb.0: 4286 ; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50] 4287 ; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 4288 ; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] 4289 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4290 ; 4291 ; SKYLAKE-SSE-LABEL: test_movdqa: 4292 ; SKYLAKE-SSE: # %bb.0: 4293 ; SKYLAKE-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] 4294 ; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] 4295 ; SKYLAKE-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] 4296 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4297 ; 4298 ; SKYLAKE-LABEL: test_movdqa: 4299 ; SKYLAKE: # %bb.0: 4300 ; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4301 ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 4302 ; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] 4303 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4304 ; 4305 ; SKX-SSE-LABEL: test_movdqa: 4306 ; SKX-SSE: # %bb.0: 4307 ; SKX-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] 4308 ; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] 4309 ; SKX-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] 4310 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4311 ; 4312 ; SKX-LABEL: test_movdqa: 4313 ; SKX: # %bb.0: 4314 ; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] 4315 ; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 4316 ; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] 4317 ; SKX-NEXT: retq # sched: [7:1.00] 4318 ; 4319 ; BTVER2-SSE-LABEL: test_movdqa: 4320 ; BTVER2-SSE: # %bb.0: 4321 ; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00] 4322 ; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4323 ; BTVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] 4324 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4325 ; 4326 ; BTVER2-LABEL: test_movdqa: 4327 ; BTVER2: # %bb.0: 4328 ; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00] 4329 ; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 4330 ; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] 4331 ; BTVER2-NEXT: retq # sched: [4:1.00] 4332 ; 4333 ; ZNVER1-SSE-LABEL: test_movdqa: 4334 ; ZNVER1-SSE: # %bb.0: 4335 ; ZNVER1-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [8:0.50] 4336 ; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] 4337 ; ZNVER1-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:0.50] 4338 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4339 ; 4340 ; ZNVER1-LABEL: test_movdqa: 4341 ; ZNVER1: # %bb.0: 4342 ; ZNVER1-NEXT: vmovdqa (%rdi), %xmm0 # sched: [8:0.50] 4343 ; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] 4344 ; ZNVER1-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:0.50] 4345 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4346 %1 = load <2 x i64>, <2 x i64> *%a0, align 16 4347 %2 = add <2 x i64> %1, %1 4348 store <2 x i64> %2, <2 x i64> *%a1, align 16 4349 ret void 4350 } 4351 4352 define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { 4353 ; GENERIC-LABEL: test_movdqu: 4354 ; GENERIC: # %bb.0: 4355 ; GENERIC-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] 4356 ; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4357 ; GENERIC-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] 4358 ; GENERIC-NEXT: retq # sched: [1:1.00] 4359 ; 4360 ; ATOM-LABEL: test_movdqu: 4361 ; ATOM: # %bb.0: 4362 ; ATOM-NEXT: movdqu (%rdi), %xmm0 # sched: [3:1.50] 4363 ; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00] 4364 ; ATOM-NEXT: movdqu %xmm0, (%rsi) # sched: [2:1.00] 4365 ; ATOM-NEXT: retq # sched: [79:39.50] 4366 ; 4367 ; SLM-LABEL: test_movdqu: 4368 ; SLM: # %bb.0: 4369 ; SLM-NEXT: movdqu (%rdi), %xmm0 # sched: [3:1.00] 4370 ; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4371 ; SLM-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] 4372 ; SLM-NEXT: retq # sched: [4:1.00] 4373 ; 4374 ; SANDY-SSE-LABEL: test_movdqu: 4375 ; SANDY-SSE: # %bb.0: 4376 ; SANDY-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] 4377 ; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4378 ; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] 4379 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4380 ; 4381 ; SANDY-LABEL: test_movdqu: 4382 ; SANDY: # %bb.0: 4383 ; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] 4384 ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 4385 ; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] 4386 ; SANDY-NEXT: retq # sched: [1:1.00] 4387 ; 4388 ; HASWELL-SSE-LABEL: test_movdqu: 4389 ; HASWELL-SSE: # %bb.0: 4390 ; HASWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] 4391 ; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4392 ; HASWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] 4393 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4394 ; 4395 ; HASWELL-LABEL: test_movdqu: 4396 ; HASWELL: # %bb.0: 4397 ; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] 4398 ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 4399 ; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] 4400 ; HASWELL-NEXT: retq # sched: [7:1.00] 4401 ; 4402 ; BROADWELL-SSE-LABEL: test_movdqu: 4403 ; BROADWELL-SSE: # %bb.0: 4404 ; BROADWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50] 4405 ; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4406 ; BROADWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] 4407 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4408 ; 4409 ; BROADWELL-LABEL: test_movdqu: 4410 ; BROADWELL: # %bb.0: 4411 ; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50] 4412 ; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 4413 ; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] 4414 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4415 ; 4416 ; SKYLAKE-SSE-LABEL: test_movdqu: 4417 ; SKYLAKE-SSE: # %bb.0: 4418 ; SKYLAKE-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] 4419 ; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] 4420 ; SKYLAKE-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] 4421 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4422 ; 4423 ; SKYLAKE-LABEL: test_movdqu: 4424 ; SKYLAKE: # %bb.0: 4425 ; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] 4426 ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 4427 ; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] 4428 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4429 ; 4430 ; SKX-SSE-LABEL: test_movdqu: 4431 ; SKX-SSE: # %bb.0: 4432 ; SKX-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] 4433 ; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] 4434 ; SKX-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] 4435 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4436 ; 4437 ; SKX-LABEL: test_movdqu: 4438 ; SKX: # %bb.0: 4439 ; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] 4440 ; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 4441 ; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] 4442 ; SKX-NEXT: retq # sched: [7:1.00] 4443 ; 4444 ; BTVER2-SSE-LABEL: test_movdqu: 4445 ; BTVER2-SSE: # %bb.0: 4446 ; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00] 4447 ; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 4448 ; BTVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] 4449 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4450 ; 4451 ; BTVER2-LABEL: test_movdqu: 4452 ; BTVER2: # %bb.0: 4453 ; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00] 4454 ; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 4455 ; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] 4456 ; BTVER2-NEXT: retq # sched: [4:1.00] 4457 ; 4458 ; ZNVER1-SSE-LABEL: test_movdqu: 4459 ; ZNVER1-SSE: # %bb.0: 4460 ; ZNVER1-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [8:0.50] 4461 ; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] 4462 ; ZNVER1-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:0.50] 4463 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4464 ; 4465 ; ZNVER1-LABEL: test_movdqu: 4466 ; ZNVER1: # %bb.0: 4467 ; ZNVER1-NEXT: vmovdqu (%rdi), %xmm0 # sched: [8:0.50] 4468 ; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] 4469 ; ZNVER1-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:0.50] 4470 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4471 %1 = load <2 x i64>, <2 x i64> *%a0, align 1 4472 %2 = add <2 x i64> %1, %1 4473 store <2 x i64> %2, <2 x i64> *%a1, align 1 4474 ret void 4475 } 4476 4477 define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { 4478 ; GENERIC-LABEL: test_movd: 4479 ; GENERIC: # %bb.0: 4480 ; GENERIC-NEXT: movd %edi, %xmm1 # sched: [1:1.00] 4481 ; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] 4482 ; GENERIC-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4483 ; GENERIC-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] 4484 ; GENERIC-NEXT: movd %xmm2, %eax # sched: [2:1.00] 4485 ; GENERIC-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] 4486 ; GENERIC-NEXT: retq # sched: [1:1.00] 4487 ; 4488 ; ATOM-LABEL: test_movd: 4489 ; ATOM: # %bb.0: 4490 ; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] 4491 ; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4492 ; ATOM-NEXT: movd %xmm1, %eax # sched: [3:3.00] 4493 ; ATOM-NEXT: movd %edi, %xmm1 # sched: [1:1.00] 4494 ; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4495 ; ATOM-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] 4496 ; ATOM-NEXT: retq # sched: [79:39.50] 4497 ; 4498 ; SLM-LABEL: test_movd: 4499 ; SLM: # %bb.0: 4500 ; SLM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [3:1.00] 4501 ; SLM-NEXT: movd %edi, %xmm1 # sched: [1:0.50] 4502 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4503 ; SLM-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] 4504 ; SLM-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] 4505 ; SLM-NEXT: movd %xmm2, %eax # sched: [1:0.50] 4506 ; SLM-NEXT: retq # sched: [4:1.00] 4507 ; 4508 ; SANDY-SSE-LABEL: test_movd: 4509 ; SANDY-SSE: # %bb.0: 4510 ; SANDY-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] 4511 ; SANDY-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] 4512 ; SANDY-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4513 ; SANDY-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] 4514 ; SANDY-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] 4515 ; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] 4516 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4517 ; 4518 ; SANDY-LABEL: test_movd: 4519 ; SANDY: # %bb.0: 4520 ; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] 4521 ; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] 4522 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 4523 ; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] 4524 ; SANDY-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 4525 ; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] 4526 ; SANDY-NEXT: retq # sched: [1:1.00] 4527 ; 4528 ; HASWELL-SSE-LABEL: test_movd: 4529 ; HASWELL-SSE: # %bb.0: 4530 ; HASWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] 4531 ; HASWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] 4532 ; HASWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4533 ; HASWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] 4534 ; HASWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] 4535 ; HASWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] 4536 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4537 ; 4538 ; HASWELL-LABEL: test_movd: 4539 ; HASWELL: # %bb.0: 4540 ; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] 4541 ; HASWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] 4542 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 4543 ; HASWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] 4544 ; HASWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] 4545 ; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] 4546 ; HASWELL-NEXT: retq # sched: [7:1.00] 4547 ; 4548 ; BROADWELL-SSE-LABEL: test_movd: 4549 ; BROADWELL-SSE: # %bb.0: 4550 ; BROADWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] 4551 ; BROADWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] 4552 ; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4553 ; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] 4554 ; BROADWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] 4555 ; BROADWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] 4556 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4557 ; 4558 ; BROADWELL-LABEL: test_movd: 4559 ; BROADWELL: # %bb.0: 4560 ; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] 4561 ; BROADWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] 4562 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 4563 ; BROADWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] 4564 ; BROADWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] 4565 ; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] 4566 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4567 ; 4568 ; SKYLAKE-SSE-LABEL: test_movd: 4569 ; SKYLAKE-SSE: # %bb.0: 4570 ; SKYLAKE-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] 4571 ; SKYLAKE-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] 4572 ; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] 4573 ; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] 4574 ; SKYLAKE-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] 4575 ; SKYLAKE-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] 4576 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4577 ; 4578 ; SKYLAKE-LABEL: test_movd: 4579 ; SKYLAKE: # %bb.0: 4580 ; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] 4581 ; SKYLAKE-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] 4582 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33] 4583 ; SKYLAKE-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4584 ; SKYLAKE-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 4585 ; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] 4586 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4587 ; 4588 ; SKX-SSE-LABEL: test_movd: 4589 ; SKX-SSE: # %bb.0: 4590 ; SKX-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] 4591 ; SKX-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] 4592 ; SKX-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] 4593 ; SKX-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] 4594 ; SKX-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] 4595 ; SKX-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] 4596 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4597 ; 4598 ; SKX-LABEL: test_movd: 4599 ; SKX: # %bb.0: 4600 ; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] 4601 ; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] 4602 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33] 4603 ; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4604 ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 4605 ; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] 4606 ; SKX-NEXT: retq # sched: [7:1.00] 4607 ; 4608 ; BTVER2-SSE-LABEL: test_movd: 4609 ; BTVER2-SSE: # %bb.0: 4610 ; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] 4611 ; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [8:0.50] 4612 ; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] 4613 ; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [4:1.00] 4614 ; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 4615 ; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00] 4616 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4617 ; 4618 ; BTVER2-LABEL: test_movd: 4619 ; BTVER2: # %bb.0: 4620 ; BTVER2-NEXT: vmovd %edi, %xmm1 # sched: [8:0.50] 4621 ; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] 4622 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 4623 ; BTVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] 4624 ; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [4:1.00] 4625 ; BTVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00] 4626 ; BTVER2-NEXT: retq # sched: [4:1.00] 4627 ; 4628 ; ZNVER1-SSE-LABEL: test_movd: 4629 ; ZNVER1-SSE: # %bb.0: 4630 ; ZNVER1-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] 4631 ; ZNVER1-SSE-NEXT: movd %edi, %xmm1 # sched: [3:1.00] 4632 ; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.25] 4633 ; ZNVER1-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:0.50] 4634 ; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.25] 4635 ; ZNVER1-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] 4636 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4637 ; 4638 ; ZNVER1-LABEL: test_movd: 4639 ; ZNVER1: # %bb.0: 4640 ; ZNVER1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] 4641 ; ZNVER1-NEXT: vmovd %edi, %xmm1 # sched: [3:1.00] 4642 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] 4643 ; ZNVER1-NEXT: vmovd %xmm1, (%rsi) # sched: [1:0.50] 4644 ; ZNVER1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.25] 4645 ; ZNVER1-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] 4646 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4647 %1 = insertelement <4 x i32> undef, i32 %a1, i32 0 4648 %2 = load i32, i32 *%a2 4649 %3 = insertelement <4 x i32> undef, i32 %2, i32 0 4650 %4 = add <4 x i32> %a0, %1 4651 %5 = add <4 x i32> %a0, %3 4652 %6 = extractelement <4 x i32> %4, i32 0 4653 %7 = extractelement <4 x i32> %5, i32 0 4654 store i32 %6, i32* %a2 4655 ret i32 %7 4656 } 4657 4658 define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { 4659 ; GENERIC-LABEL: test_movd_64: 4660 ; GENERIC: # %bb.0: 4661 ; GENERIC-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] 4662 ; GENERIC-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] 4663 ; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 4664 ; GENERIC-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] 4665 ; GENERIC-NEXT: movq %xmm2, %rax # sched: [2:1.00] 4666 ; GENERIC-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] 4667 ; GENERIC-NEXT: retq # sched: [1:1.00] 4668 ; 4669 ; ATOM-LABEL: test_movd_64: 4670 ; ATOM: # %bb.0: 4671 ; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] 4672 ; ATOM-NEXT: movq %rdi, %xmm2 # sched: [1:1.00] 4673 ; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00] 4674 ; ATOM-NEXT: paddq %xmm0, %xmm2 # sched: [2:1.00] 4675 ; ATOM-NEXT: movq %xmm1, %rax # sched: [3:3.00] 4676 ; ATOM-NEXT: movq %xmm2, (%rsi) # sched: [1:1.00] 4677 ; ATOM-NEXT: retq # sched: [79:39.50] 4678 ; 4679 ; SLM-LABEL: test_movd_64: 4680 ; SLM: # %bb.0: 4681 ; SLM-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [3:1.00] 4682 ; SLM-NEXT: movq %rdi, %xmm1 # sched: [1:0.50] 4683 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 4684 ; SLM-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] 4685 ; SLM-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] 4686 ; SLM-NEXT: movq %xmm2, %rax # sched: [1:0.50] 4687 ; SLM-NEXT: retq # sched: [4:1.00] 4688 ; 4689 ; SANDY-SSE-LABEL: test_movd_64: 4690 ; SANDY-SSE: # %bb.0: 4691 ; SANDY-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] 4692 ; SANDY-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] 4693 ; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 4694 ; SANDY-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] 4695 ; SANDY-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] 4696 ; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] 4697 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4698 ; 4699 ; SANDY-LABEL: test_movd_64: 4700 ; SANDY: # %bb.0: 4701 ; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] 4702 ; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] 4703 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 4704 ; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] 4705 ; SANDY-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 4706 ; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] 4707 ; SANDY-NEXT: retq # sched: [1:1.00] 4708 ; 4709 ; HASWELL-SSE-LABEL: test_movd_64: 4710 ; HASWELL-SSE: # %bb.0: 4711 ; HASWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] 4712 ; HASWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] 4713 ; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 4714 ; HASWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] 4715 ; HASWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] 4716 ; HASWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] 4717 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4718 ; 4719 ; HASWELL-LABEL: test_movd_64: 4720 ; HASWELL: # %bb.0: 4721 ; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] 4722 ; HASWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] 4723 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 4724 ; HASWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] 4725 ; HASWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] 4726 ; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] 4727 ; HASWELL-NEXT: retq # sched: [7:1.00] 4728 ; 4729 ; BROADWELL-SSE-LABEL: test_movd_64: 4730 ; BROADWELL-SSE: # %bb.0: 4731 ; BROADWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] 4732 ; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] 4733 ; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 4734 ; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] 4735 ; BROADWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] 4736 ; BROADWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] 4737 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4738 ; 4739 ; BROADWELL-LABEL: test_movd_64: 4740 ; BROADWELL: # %bb.0: 4741 ; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] 4742 ; BROADWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] 4743 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 4744 ; BROADWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] 4745 ; BROADWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] 4746 ; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] 4747 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4748 ; 4749 ; SKYLAKE-SSE-LABEL: test_movd_64: 4750 ; SKYLAKE-SSE: # %bb.0: 4751 ; SKYLAKE-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] 4752 ; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] 4753 ; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] 4754 ; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] 4755 ; SKYLAKE-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] 4756 ; SKYLAKE-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] 4757 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4758 ; 4759 ; SKYLAKE-LABEL: test_movd_64: 4760 ; SKYLAKE: # %bb.0: 4761 ; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] 4762 ; SKYLAKE-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] 4763 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33] 4764 ; SKYLAKE-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4765 ; SKYLAKE-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 4766 ; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] 4767 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4768 ; 4769 ; SKX-SSE-LABEL: test_movd_64: 4770 ; SKX-SSE: # %bb.0: 4771 ; SKX-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] 4772 ; SKX-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] 4773 ; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] 4774 ; SKX-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] 4775 ; SKX-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] 4776 ; SKX-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] 4777 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4778 ; 4779 ; SKX-LABEL: test_movd_64: 4780 ; SKX: # %bb.0: 4781 ; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] 4782 ; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] 4783 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33] 4784 ; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33] 4785 ; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 4786 ; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] 4787 ; SKX-NEXT: retq # sched: [7:1.00] 4788 ; 4789 ; BTVER2-SSE-LABEL: test_movd_64: 4790 ; BTVER2-SSE: # %bb.0: 4791 ; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] 4792 ; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [8:0.50] 4793 ; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] 4794 ; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [4:1.00] 4795 ; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 4796 ; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00] 4797 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4798 ; 4799 ; BTVER2-LABEL: test_movd_64: 4800 ; BTVER2: # %bb.0: 4801 ; BTVER2-NEXT: vmovq %rdi, %xmm1 # sched: [8:0.50] 4802 ; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] 4803 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 4804 ; BTVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] 4805 ; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [4:1.00] 4806 ; BTVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00] 4807 ; BTVER2-NEXT: retq # sched: [4:1.00] 4808 ; 4809 ; ZNVER1-SSE-LABEL: test_movd_64: 4810 ; ZNVER1-SSE: # %bb.0: 4811 ; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] 4812 ; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [3:1.00] 4813 ; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] 4814 ; ZNVER1-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:0.50] 4815 ; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.25] 4816 ; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] 4817 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4818 ; 4819 ; ZNVER1-LABEL: test_movd_64: 4820 ; ZNVER1: # %bb.0: 4821 ; ZNVER1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] 4822 ; ZNVER1-NEXT: vmovq %rdi, %xmm1 # sched: [3:1.00] 4823 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.25] 4824 ; ZNVER1-NEXT: vmovq %xmm1, (%rsi) # sched: [1:0.50] 4825 ; ZNVER1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.25] 4826 ; ZNVER1-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] 4827 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4828 %1 = insertelement <2 x i64> undef, i64 %a1, i64 0 4829 %2 = load i64, i64 *%a2 4830 %3 = insertelement <2 x i64> undef, i64 %2, i64 0 4831 %4 = add <2 x i64> %a0, %1 4832 %5 = add <2 x i64> %a0, %3 4833 %6 = extractelement <2 x i64> %4, i64 0 4834 %7 = extractelement <2 x i64> %5, i64 0 4835 store i64 %6, i64* %a2 4836 ret i64 %7 4837 } 4838 4839 define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { 4840 ; GENERIC-LABEL: test_movhpd: 4841 ; GENERIC: # %bb.0: 4842 ; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 4843 ; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4844 ; GENERIC-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] 4845 ; GENERIC-NEXT: retq # sched: [1:1.00] 4846 ; 4847 ; ATOM-LABEL: test_movhpd: 4848 ; ATOM: # %bb.0: 4849 ; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] 4850 ; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] 4851 ; ATOM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] 4852 ; ATOM-NEXT: retq # sched: [79:39.50] 4853 ; 4854 ; SLM-LABEL: test_movhpd: 4855 ; SLM: # %bb.0: 4856 ; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] 4857 ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4858 ; SLM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] 4859 ; SLM-NEXT: retq # sched: [4:1.00] 4860 ; 4861 ; SANDY-SSE-LABEL: test_movhpd: 4862 ; SANDY-SSE: # %bb.0: 4863 ; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 4864 ; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4865 ; SANDY-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] 4866 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4867 ; 4868 ; SANDY-LABEL: test_movhpd: 4869 ; SANDY: # %bb.0: 4870 ; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 4871 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4872 ; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] 4873 ; SANDY-NEXT: retq # sched: [1:1.00] 4874 ; 4875 ; HASWELL-SSE-LABEL: test_movhpd: 4876 ; HASWELL-SSE: # %bb.0: 4877 ; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4878 ; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4879 ; HASWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] 4880 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 4881 ; 4882 ; HASWELL-LABEL: test_movhpd: 4883 ; HASWELL: # %bb.0: 4884 ; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4885 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4886 ; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] 4887 ; HASWELL-NEXT: retq # sched: [7:1.00] 4888 ; 4889 ; BROADWELL-SSE-LABEL: test_movhpd: 4890 ; BROADWELL-SSE: # %bb.0: 4891 ; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4892 ; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4893 ; BROADWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] 4894 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 4895 ; 4896 ; BROADWELL-LABEL: test_movhpd: 4897 ; BROADWELL: # %bb.0: 4898 ; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4899 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4900 ; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] 4901 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4902 ; 4903 ; SKYLAKE-SSE-LABEL: test_movhpd: 4904 ; SKYLAKE-SSE: # %bb.0: 4905 ; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4906 ; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] 4907 ; SKYLAKE-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] 4908 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 4909 ; 4910 ; SKYLAKE-LABEL: test_movhpd: 4911 ; SKYLAKE: # %bb.0: 4912 ; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4913 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4914 ; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] 4915 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4916 ; 4917 ; SKX-SSE-LABEL: test_movhpd: 4918 ; SKX-SSE: # %bb.0: 4919 ; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4920 ; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] 4921 ; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] 4922 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 4923 ; 4924 ; SKX-LABEL: test_movhpd: 4925 ; SKX: # %bb.0: 4926 ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4927 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 4928 ; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] 4929 ; SKX-NEXT: retq # sched: [7:1.00] 4930 ; 4931 ; BTVER2-SSE-LABEL: test_movhpd: 4932 ; BTVER2-SSE: # %bb.0: 4933 ; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4934 ; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4935 ; BTVER2-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [2:1.00] 4936 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 4937 ; 4938 ; BTVER2-LABEL: test_movhpd: 4939 ; BTVER2: # %bb.0: 4940 ; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 4941 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4942 ; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] 4943 ; BTVER2-NEXT: retq # sched: [4:1.00] 4944 ; 4945 ; ZNVER1-SSE-LABEL: test_movhpd: 4946 ; ZNVER1-SSE: # %bb.0: 4947 ; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] 4948 ; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4949 ; ZNVER1-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:0.50] 4950 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 4951 ; 4952 ; ZNVER1-LABEL: test_movhpd: 4953 ; ZNVER1: # %bb.0: 4954 ; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] 4955 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 4956 ; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50] 4957 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4958 %1 = bitcast x86_mmx* %a2 to double* 4959 %2 = load double, double *%1, align 8 4960 %3 = insertelement <2 x double> %a1, double %2, i32 1 4961 %4 = fadd <2 x double> %a0, %3 4962 %5 = extractelement <2 x double> %4, i32 1 4963 store double %5, double* %1 4964 ret void 4965 } 4966 4967 define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { 4968 ; GENERIC-LABEL: test_movlpd: 4969 ; GENERIC: # %bb.0: 4970 ; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] 4971 ; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4972 ; GENERIC-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] 4973 ; GENERIC-NEXT: retq # sched: [1:1.00] 4974 ; 4975 ; ATOM-LABEL: test_movlpd: 4976 ; ATOM: # %bb.0: 4977 ; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] 4978 ; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] 4979 ; ATOM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] 4980 ; ATOM-NEXT: retq # sched: [79:39.50] 4981 ; 4982 ; SLM-LABEL: test_movlpd: 4983 ; SLM: # %bb.0: 4984 ; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00] 4985 ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4986 ; SLM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] 4987 ; SLM-NEXT: retq # sched: [4:1.00] 4988 ; 4989 ; SANDY-SSE-LABEL: test_movlpd: 4990 ; SANDY-SSE: # %bb.0: 4991 ; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] 4992 ; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 4993 ; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] 4994 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 4995 ; 4996 ; SANDY-LABEL: test_movlpd: 4997 ; SANDY: # %bb.0: 4998 ; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] 4999 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5000 ; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] 5001 ; SANDY-NEXT: retq # sched: [1:1.00] 5002 ; 5003 ; HASWELL-SSE-LABEL: test_movlpd: 5004 ; HASWELL-SSE: # %bb.0: 5005 ; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5006 ; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 5007 ; HASWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] 5008 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5009 ; 5010 ; HASWELL-LABEL: test_movlpd: 5011 ; HASWELL: # %bb.0: 5012 ; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5013 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5014 ; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] 5015 ; HASWELL-NEXT: retq # sched: [7:1.00] 5016 ; 5017 ; BROADWELL-SSE-LABEL: test_movlpd: 5018 ; BROADWELL-SSE: # %bb.0: 5019 ; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5020 ; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 5021 ; BROADWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] 5022 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5023 ; 5024 ; BROADWELL-LABEL: test_movlpd: 5025 ; BROADWELL: # %bb.0: 5026 ; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5027 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5028 ; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] 5029 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5030 ; 5031 ; SKYLAKE-SSE-LABEL: test_movlpd: 5032 ; SKYLAKE-SSE: # %bb.0: 5033 ; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5034 ; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] 5035 ; SKYLAKE-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] 5036 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5037 ; 5038 ; SKYLAKE-LABEL: test_movlpd: 5039 ; SKYLAKE: # %bb.0: 5040 ; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5041 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5042 ; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] 5043 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5044 ; 5045 ; SKX-SSE-LABEL: test_movlpd: 5046 ; SKX-SSE: # %bb.0: 5047 ; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5048 ; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] 5049 ; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] 5050 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5051 ; 5052 ; SKX-LABEL: test_movlpd: 5053 ; SKX: # %bb.0: 5054 ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5055 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 5056 ; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] 5057 ; SKX-NEXT: retq # sched: [7:1.00] 5058 ; 5059 ; BTVER2-SSE-LABEL: test_movlpd: 5060 ; BTVER2-SSE: # %bb.0: 5061 ; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5062 ; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 5063 ; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [2:1.00] 5064 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5065 ; 5066 ; BTVER2-LABEL: test_movlpd: 5067 ; BTVER2: # %bb.0: 5068 ; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] 5069 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5070 ; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00] 5071 ; BTVER2-NEXT: retq # sched: [4:1.00] 5072 ; 5073 ; ZNVER1-SSE-LABEL: test_movlpd: 5074 ; ZNVER1-SSE: # %bb.0: 5075 ; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] 5076 ; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 5077 ; ZNVER1-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:0.50] 5078 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5079 ; 5080 ; ZNVER1-LABEL: test_movlpd: 5081 ; ZNVER1: # %bb.0: 5082 ; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] 5083 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 5084 ; ZNVER1-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:0.50] 5085 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5086 %1 = bitcast x86_mmx* %a2 to double* 5087 %2 = load double, double *%1, align 8 5088 %3 = insertelement <2 x double> %a1, double %2, i32 0 5089 %4 = fadd <2 x double> %a0, %3 5090 %5 = extractelement <2 x double> %4, i32 0 5091 store double %5, double* %1 5092 ret void 5093 } 5094 5095 define i32 @test_movmskpd(<2 x double> %a0) { 5096 ; GENERIC-LABEL: test_movmskpd: 5097 ; GENERIC: # %bb.0: 5098 ; GENERIC-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] 5099 ; GENERIC-NEXT: retq # sched: [1:1.00] 5100 ; 5101 ; ATOM-LABEL: test_movmskpd: 5102 ; ATOM: # %bb.0: 5103 ; ATOM-NEXT: movmskpd %xmm0, %eax # sched: [3:3.00] 5104 ; ATOM-NEXT: nop # sched: [1:0.50] 5105 ; ATOM-NEXT: nop # sched: [1:0.50] 5106 ; ATOM-NEXT: retq # sched: [79:39.50] 5107 ; 5108 ; SLM-LABEL: test_movmskpd: 5109 ; SLM: # %bb.0: 5110 ; SLM-NEXT: movmskpd %xmm0, %eax # sched: [4:1.00] 5111 ; SLM-NEXT: retq # sched: [4:1.00] 5112 ; 5113 ; SANDY-SSE-LABEL: test_movmskpd: 5114 ; SANDY-SSE: # %bb.0: 5115 ; SANDY-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] 5116 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5117 ; 5118 ; SANDY-LABEL: test_movmskpd: 5119 ; SANDY: # %bb.0: 5120 ; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] 5121 ; SANDY-NEXT: retq # sched: [1:1.00] 5122 ; 5123 ; HASWELL-SSE-LABEL: test_movmskpd: 5124 ; HASWELL-SSE: # %bb.0: 5125 ; HASWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] 5126 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5127 ; 5128 ; HASWELL-LABEL: test_movmskpd: 5129 ; HASWELL: # %bb.0: 5130 ; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] 5131 ; HASWELL-NEXT: retq # sched: [7:1.00] 5132 ; 5133 ; BROADWELL-SSE-LABEL: test_movmskpd: 5134 ; BROADWELL-SSE: # %bb.0: 5135 ; BROADWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] 5136 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5137 ; 5138 ; BROADWELL-LABEL: test_movmskpd: 5139 ; BROADWELL: # %bb.0: 5140 ; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] 5141 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5142 ; 5143 ; SKYLAKE-SSE-LABEL: test_movmskpd: 5144 ; SKYLAKE-SSE: # %bb.0: 5145 ; SKYLAKE-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] 5146 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5147 ; 5148 ; SKYLAKE-LABEL: test_movmskpd: 5149 ; SKYLAKE: # %bb.0: 5150 ; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] 5151 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5152 ; 5153 ; SKX-SSE-LABEL: test_movmskpd: 5154 ; SKX-SSE: # %bb.0: 5155 ; SKX-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] 5156 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5157 ; 5158 ; SKX-LABEL: test_movmskpd: 5159 ; SKX: # %bb.0: 5160 ; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] 5161 ; SKX-NEXT: retq # sched: [7:1.00] 5162 ; 5163 ; BTVER2-SSE-LABEL: test_movmskpd: 5164 ; BTVER2-SSE: # %bb.0: 5165 ; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] 5166 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5167 ; 5168 ; BTVER2-LABEL: test_movmskpd: 5169 ; BTVER2: # %bb.0: 5170 ; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] 5171 ; BTVER2-NEXT: retq # sched: [4:1.00] 5172 ; 5173 ; ZNVER1-SSE-LABEL: test_movmskpd: 5174 ; ZNVER1-SSE: # %bb.0: 5175 ; ZNVER1-SSE-NEXT: movmskpd %xmm0, %eax # sched: [1:1.00] 5176 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5177 ; 5178 ; ZNVER1-LABEL: test_movmskpd: 5179 ; ZNVER1: # %bb.0: 5180 ; ZNVER1-NEXT: vmovmskpd %xmm0, %eax # sched: [1:1.00] 5181 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5182 %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) 5183 ret i32 %1 5184 } 5185 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 5186 5187 define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { 5188 ; GENERIC-LABEL: test_movntdqa: 5189 ; GENERIC: # %bb.0: 5190 ; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 5191 ; GENERIC-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] 5192 ; GENERIC-NEXT: retq # sched: [1:1.00] 5193 ; 5194 ; ATOM-LABEL: test_movntdqa: 5195 ; ATOM: # %bb.0: 5196 ; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00] 5197 ; ATOM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] 5198 ; ATOM-NEXT: nop # sched: [1:0.50] 5199 ; ATOM-NEXT: nop # sched: [1:0.50] 5200 ; ATOM-NEXT: retq # sched: [79:39.50] 5201 ; 5202 ; SLM-LABEL: test_movntdqa: 5203 ; SLM: # %bb.0: 5204 ; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 5205 ; SLM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] 5206 ; SLM-NEXT: retq # sched: [4:1.00] 5207 ; 5208 ; SANDY-SSE-LABEL: test_movntdqa: 5209 ; SANDY-SSE: # %bb.0: 5210 ; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 5211 ; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] 5212 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5213 ; 5214 ; SANDY-LABEL: test_movntdqa: 5215 ; SANDY: # %bb.0: 5216 ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 5217 ; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] 5218 ; SANDY-NEXT: retq # sched: [1:1.00] 5219 ; 5220 ; HASWELL-SSE-LABEL: test_movntdqa: 5221 ; HASWELL-SSE: # %bb.0: 5222 ; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 5223 ; HASWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] 5224 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5225 ; 5226 ; HASWELL-LABEL: test_movntdqa: 5227 ; HASWELL: # %bb.0: 5228 ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 5229 ; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] 5230 ; HASWELL-NEXT: retq # sched: [7:1.00] 5231 ; 5232 ; BROADWELL-SSE-LABEL: test_movntdqa: 5233 ; BROADWELL-SSE: # %bb.0: 5234 ; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 5235 ; BROADWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] 5236 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5237 ; 5238 ; BROADWELL-LABEL: test_movntdqa: 5239 ; BROADWELL: # %bb.0: 5240 ; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 5241 ; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] 5242 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5243 ; 5244 ; SKYLAKE-SSE-LABEL: test_movntdqa: 5245 ; SKYLAKE-SSE: # %bb.0: 5246 ; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] 5247 ; SKYLAKE-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] 5248 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5249 ; 5250 ; SKYLAKE-LABEL: test_movntdqa: 5251 ; SKYLAKE: # %bb.0: 5252 ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 5253 ; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] 5254 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5255 ; 5256 ; SKX-SSE-LABEL: test_movntdqa: 5257 ; SKX-SSE: # %bb.0: 5258 ; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] 5259 ; SKX-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] 5260 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5261 ; 5262 ; SKX-LABEL: test_movntdqa: 5263 ; SKX: # %bb.0: 5264 ; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] 5265 ; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] 5266 ; SKX-NEXT: retq # sched: [7:1.00] 5267 ; 5268 ; BTVER2-SSE-LABEL: test_movntdqa: 5269 ; BTVER2-SSE: # %bb.0: 5270 ; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] 5271 ; BTVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00] 5272 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5273 ; 5274 ; BTVER2-LABEL: test_movntdqa: 5275 ; BTVER2: # %bb.0: 5276 ; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] 5277 ; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00] 5278 ; BTVER2-NEXT: retq # sched: [4:1.00] 5279 ; 5280 ; ZNVER1-SSE-LABEL: test_movntdqa: 5281 ; ZNVER1-SSE: # %bb.0: 5282 ; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] 5283 ; ZNVER1-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:0.50] 5284 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5285 ; 5286 ; ZNVER1-LABEL: test_movntdqa: 5287 ; ZNVER1: # %bb.0: 5288 ; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] 5289 ; ZNVER1-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:0.50] 5290 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5291 %1 = add <2 x i64> %a0, %a0 5292 store <2 x i64> %1, <2 x i64> *%a1, align 16, !nontemporal !0 5293 ret void 5294 } 5295 5296 define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { 5297 ; GENERIC-LABEL: test_movntpd: 5298 ; GENERIC: # %bb.0: 5299 ; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5300 ; GENERIC-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] 5301 ; GENERIC-NEXT: retq # sched: [1:1.00] 5302 ; 5303 ; ATOM-LABEL: test_movntpd: 5304 ; ATOM: # %bb.0: 5305 ; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00] 5306 ; ATOM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] 5307 ; ATOM-NEXT: retq # sched: [79:39.50] 5308 ; 5309 ; SLM-LABEL: test_movntpd: 5310 ; SLM: # %bb.0: 5311 ; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5312 ; SLM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] 5313 ; SLM-NEXT: retq # sched: [4:1.00] 5314 ; 5315 ; SANDY-SSE-LABEL: test_movntpd: 5316 ; SANDY-SSE: # %bb.0: 5317 ; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5318 ; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] 5319 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5320 ; 5321 ; SANDY-LABEL: test_movntpd: 5322 ; SANDY: # %bb.0: 5323 ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5324 ; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] 5325 ; SANDY-NEXT: retq # sched: [1:1.00] 5326 ; 5327 ; HASWELL-SSE-LABEL: test_movntpd: 5328 ; HASWELL-SSE: # %bb.0: 5329 ; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5330 ; HASWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] 5331 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5332 ; 5333 ; HASWELL-LABEL: test_movntpd: 5334 ; HASWELL: # %bb.0: 5335 ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5336 ; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] 5337 ; HASWELL-NEXT: retq # sched: [7:1.00] 5338 ; 5339 ; BROADWELL-SSE-LABEL: test_movntpd: 5340 ; BROADWELL-SSE: # %bb.0: 5341 ; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5342 ; BROADWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] 5343 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5344 ; 5345 ; BROADWELL-LABEL: test_movntpd: 5346 ; BROADWELL: # %bb.0: 5347 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5348 ; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] 5349 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5350 ; 5351 ; SKYLAKE-SSE-LABEL: test_movntpd: 5352 ; SKYLAKE-SSE: # %bb.0: 5353 ; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] 5354 ; SKYLAKE-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] 5355 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5356 ; 5357 ; SKYLAKE-LABEL: test_movntpd: 5358 ; SKYLAKE: # %bb.0: 5359 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 5360 ; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] 5361 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5362 ; 5363 ; SKX-SSE-LABEL: test_movntpd: 5364 ; SKX-SSE: # %bb.0: 5365 ; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] 5366 ; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] 5367 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5368 ; 5369 ; SKX-LABEL: test_movntpd: 5370 ; SKX: # %bb.0: 5371 ; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 5372 ; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] 5373 ; SKX-NEXT: retq # sched: [7:1.00] 5374 ; 5375 ; BTVER2-SSE-LABEL: test_movntpd: 5376 ; BTVER2-SSE: # %bb.0: 5377 ; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5378 ; BTVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00] 5379 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5380 ; 5381 ; BTVER2-LABEL: test_movntpd: 5382 ; BTVER2: # %bb.0: 5383 ; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5384 ; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00] 5385 ; BTVER2-NEXT: retq # sched: [4:1.00] 5386 ; 5387 ; ZNVER1-SSE-LABEL: test_movntpd: 5388 ; ZNVER1-SSE: # %bb.0: 5389 ; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5390 ; ZNVER1-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:0.50] 5391 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5392 ; 5393 ; ZNVER1-LABEL: test_movntpd: 5394 ; ZNVER1: # %bb.0: 5395 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5396 ; ZNVER1-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:0.50] 5397 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5398 %1 = fadd <2 x double> %a0, %a0 5399 store <2 x double> %1, <2 x double> *%a1, align 16, !nontemporal !0 5400 ret void 5401 } 5402 5403 define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { 5404 ; GENERIC-LABEL: test_movq_mem: 5405 ; GENERIC: # %bb.0: 5406 ; GENERIC-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] 5407 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5408 ; GENERIC-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] 5409 ; GENERIC-NEXT: retq # sched: [1:1.00] 5410 ; 5411 ; ATOM-LABEL: test_movq_mem: 5412 ; ATOM: # %bb.0: 5413 ; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] 5414 ; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] 5415 ; ATOM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] 5416 ; ATOM-NEXT: retq # sched: [79:39.50] 5417 ; 5418 ; SLM-LABEL: test_movq_mem: 5419 ; SLM: # %bb.0: 5420 ; SLM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00] 5421 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5422 ; SLM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] 5423 ; SLM-NEXT: retq # sched: [4:1.00] 5424 ; 5425 ; SANDY-SSE-LABEL: test_movq_mem: 5426 ; SANDY-SSE: # %bb.0: 5427 ; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] 5428 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5429 ; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] 5430 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5431 ; 5432 ; SANDY-LABEL: test_movq_mem: 5433 ; SANDY: # %bb.0: 5434 ; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] 5435 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5436 ; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] 5437 ; SANDY-NEXT: retq # sched: [1:1.00] 5438 ; 5439 ; HASWELL-SSE-LABEL: test_movq_mem: 5440 ; HASWELL-SSE: # %bb.0: 5441 ; HASWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 5442 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5443 ; HASWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] 5444 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5445 ; 5446 ; HASWELL-LABEL: test_movq_mem: 5447 ; HASWELL: # %bb.0: 5448 ; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 5449 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5450 ; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] 5451 ; HASWELL-NEXT: retq # sched: [7:1.00] 5452 ; 5453 ; BROADWELL-SSE-LABEL: test_movq_mem: 5454 ; BROADWELL-SSE: # %bb.0: 5455 ; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 5456 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5457 ; BROADWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] 5458 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5459 ; 5460 ; BROADWELL-LABEL: test_movq_mem: 5461 ; BROADWELL: # %bb.0: 5462 ; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 5463 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5464 ; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] 5465 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5466 ; 5467 ; SKYLAKE-SSE-LABEL: test_movq_mem: 5468 ; SKYLAKE-SSE: # %bb.0: 5469 ; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 5470 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 5471 ; SKYLAKE-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] 5472 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5473 ; 5474 ; SKYLAKE-LABEL: test_movq_mem: 5475 ; SKYLAKE: # %bb.0: 5476 ; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 5477 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 5478 ; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] 5479 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5480 ; 5481 ; SKX-SSE-LABEL: test_movq_mem: 5482 ; SKX-SSE: # %bb.0: 5483 ; SKX-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 5484 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 5485 ; SKX-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] 5486 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5487 ; 5488 ; SKX-LABEL: test_movq_mem: 5489 ; SKX: # %bb.0: 5490 ; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] 5491 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 5492 ; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] 5493 ; SKX-NEXT: retq # sched: [7:1.00] 5494 ; 5495 ; BTVER2-SSE-LABEL: test_movq_mem: 5496 ; BTVER2-SSE: # %bb.0: 5497 ; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] 5498 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5499 ; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [2:1.00] 5500 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5501 ; 5502 ; BTVER2-LABEL: test_movq_mem: 5503 ; BTVER2: # %bb.0: 5504 ; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] 5505 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5506 ; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [2:1.00] 5507 ; BTVER2-NEXT: retq # sched: [4:1.00] 5508 ; 5509 ; ZNVER1-SSE-LABEL: test_movq_mem: 5510 ; ZNVER1-SSE: # %bb.0: 5511 ; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] 5512 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 5513 ; ZNVER1-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:0.50] 5514 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5515 ; 5516 ; ZNVER1-LABEL: test_movq_mem: 5517 ; ZNVER1: # %bb.0: 5518 ; ZNVER1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] 5519 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 5520 ; ZNVER1-NEXT: vmovq %xmm0, (%rdi) # sched: [1:0.50] 5521 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5522 %1 = load i64, i64* %a1, align 1 5523 %2 = insertelement <2 x i64> zeroinitializer, i64 %1, i32 0 5524 %3 = add <2 x i64> %a0, %2 5525 %4 = extractelement <2 x i64> %3, i32 0 5526 store i64 %4, i64 *%a1, align 1 5527 ret <2 x i64> %3 5528 } 5529 5530 define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { 5531 ; GENERIC-LABEL: test_movq_reg: 5532 ; GENERIC: # %bb.0: 5533 ; GENERIC-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5534 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5535 ; GENERIC-NEXT: retq # sched: [1:1.00] 5536 ; 5537 ; ATOM-LABEL: test_movq_reg: 5538 ; ATOM: # %bb.0: 5539 ; ATOM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] 5540 ; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] 5541 ; ATOM-NEXT: nop # sched: [1:0.50] 5542 ; ATOM-NEXT: nop # sched: [1:0.50] 5543 ; ATOM-NEXT: retq # sched: [79:39.50] 5544 ; 5545 ; SLM-LABEL: test_movq_reg: 5546 ; SLM: # %bb.0: 5547 ; SLM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] 5548 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5549 ; SLM-NEXT: retq # sched: [4:1.00] 5550 ; 5551 ; SANDY-SSE-LABEL: test_movq_reg: 5552 ; SANDY-SSE: # %bb.0: 5553 ; SANDY-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5554 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5555 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5556 ; 5557 ; SANDY-LABEL: test_movq_reg: 5558 ; SANDY: # %bb.0: 5559 ; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5560 ; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 5561 ; SANDY-NEXT: retq # sched: [1:1.00] 5562 ; 5563 ; HASWELL-SSE-LABEL: test_movq_reg: 5564 ; HASWELL-SSE: # %bb.0: 5565 ; HASWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5566 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5567 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5568 ; 5569 ; HASWELL-LABEL: test_movq_reg: 5570 ; HASWELL: # %bb.0: 5571 ; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5572 ; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 5573 ; HASWELL-NEXT: retq # sched: [7:1.00] 5574 ; 5575 ; BROADWELL-SSE-LABEL: test_movq_reg: 5576 ; BROADWELL-SSE: # %bb.0: 5577 ; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5578 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5579 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5580 ; 5581 ; BROADWELL-LABEL: test_movq_reg: 5582 ; BROADWELL: # %bb.0: 5583 ; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5584 ; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 5585 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5586 ; 5587 ; SKYLAKE-SSE-LABEL: test_movq_reg: 5588 ; SKYLAKE-SSE: # %bb.0: 5589 ; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5590 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 5591 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5592 ; 5593 ; SKYLAKE-LABEL: test_movq_reg: 5594 ; SKYLAKE: # %bb.0: 5595 ; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5596 ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 5597 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5598 ; 5599 ; SKX-SSE-LABEL: test_movq_reg: 5600 ; SKX-SSE: # %bb.0: 5601 ; SKX-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5602 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 5603 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5604 ; 5605 ; SKX-LABEL: test_movq_reg: 5606 ; SKX: # %bb.0: 5607 ; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] 5608 ; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 5609 ; SKX-NEXT: retq # sched: [7:1.00] 5610 ; 5611 ; BTVER2-SSE-LABEL: test_movq_reg: 5612 ; BTVER2-SSE: # %bb.0: 5613 ; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] 5614 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 5615 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5616 ; 5617 ; BTVER2-LABEL: test_movq_reg: 5618 ; BTVER2: # %bb.0: 5619 ; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] 5620 ; BTVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 5621 ; BTVER2-NEXT: retq # sched: [4:1.00] 5622 ; 5623 ; ZNVER1-SSE-LABEL: test_movq_reg: 5624 ; ZNVER1-SSE: # %bb.0: 5625 ; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] 5626 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 5627 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5628 ; 5629 ; ZNVER1-LABEL: test_movq_reg: 5630 ; ZNVER1: # %bb.0: 5631 ; ZNVER1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] 5632 ; ZNVER1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.25] 5633 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5634 %1 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2> 5635 %2 = add <2 x i64> %a1, %1 5636 ret <2 x i64> %2 5637 } 5638 5639 define void @test_movsd_mem(double* %a0, double* %a1) { 5640 ; GENERIC-LABEL: test_movsd_mem: 5641 ; GENERIC: # %bb.0: 5642 ; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] 5643 ; GENERIC-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] 5644 ; GENERIC-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] 5645 ; GENERIC-NEXT: retq # sched: [1:1.00] 5646 ; 5647 ; ATOM-LABEL: test_movsd_mem: 5648 ; ATOM: # %bb.0: 5649 ; ATOM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [1:1.00] 5650 ; ATOM-NEXT: addsd %xmm0, %xmm0 # sched: [5:5.00] 5651 ; ATOM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] 5652 ; ATOM-NEXT: retq # sched: [79:39.50] 5653 ; 5654 ; SLM-LABEL: test_movsd_mem: 5655 ; SLM: # %bb.0: 5656 ; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00] 5657 ; SLM-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] 5658 ; SLM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] 5659 ; SLM-NEXT: retq # sched: [4:1.00] 5660 ; 5661 ; SANDY-SSE-LABEL: test_movsd_mem: 5662 ; SANDY-SSE: # %bb.0: 5663 ; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] 5664 ; SANDY-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] 5665 ; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] 5666 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5667 ; 5668 ; SANDY-LABEL: test_movsd_mem: 5669 ; SANDY: # %bb.0: 5670 ; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] 5671 ; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5672 ; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] 5673 ; SANDY-NEXT: retq # sched: [1:1.00] 5674 ; 5675 ; HASWELL-SSE-LABEL: test_movsd_mem: 5676 ; HASWELL-SSE: # %bb.0: 5677 ; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 5678 ; HASWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] 5679 ; HASWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] 5680 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5681 ; 5682 ; HASWELL-LABEL: test_movsd_mem: 5683 ; HASWELL: # %bb.0: 5684 ; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 5685 ; HASWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5686 ; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] 5687 ; HASWELL-NEXT: retq # sched: [7:1.00] 5688 ; 5689 ; BROADWELL-SSE-LABEL: test_movsd_mem: 5690 ; BROADWELL-SSE: # %bb.0: 5691 ; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 5692 ; BROADWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] 5693 ; BROADWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] 5694 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5695 ; 5696 ; BROADWELL-LABEL: test_movsd_mem: 5697 ; BROADWELL: # %bb.0: 5698 ; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 5699 ; BROADWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5700 ; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] 5701 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5702 ; 5703 ; SKYLAKE-SSE-LABEL: test_movsd_mem: 5704 ; SKYLAKE-SSE: # %bb.0: 5705 ; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 5706 ; SKYLAKE-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] 5707 ; SKYLAKE-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] 5708 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5709 ; 5710 ; SKYLAKE-LABEL: test_movsd_mem: 5711 ; SKYLAKE: # %bb.0: 5712 ; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 5713 ; SKYLAKE-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 5714 ; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] 5715 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5716 ; 5717 ; SKX-SSE-LABEL: test_movsd_mem: 5718 ; SKX-SSE: # %bb.0: 5719 ; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 5720 ; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] 5721 ; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] 5722 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5723 ; 5724 ; SKX-LABEL: test_movsd_mem: 5725 ; SKX: # %bb.0: 5726 ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] 5727 ; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 5728 ; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] 5729 ; SKX-NEXT: retq # sched: [7:1.00] 5730 ; 5731 ; BTVER2-SSE-LABEL: test_movsd_mem: 5732 ; BTVER2-SSE: # %bb.0: 5733 ; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] 5734 ; BTVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] 5735 ; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [2:1.00] 5736 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5737 ; 5738 ; BTVER2-LABEL: test_movsd_mem: 5739 ; BTVER2: # %bb.0: 5740 ; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] 5741 ; BTVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5742 ; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [2:1.00] 5743 ; BTVER2-NEXT: retq # sched: [4:1.00] 5744 ; 5745 ; ZNVER1-SSE-LABEL: test_movsd_mem: 5746 ; ZNVER1-SSE: # %bb.0: 5747 ; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] 5748 ; ZNVER1-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] 5749 ; ZNVER1-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:0.50] 5750 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5751 ; 5752 ; ZNVER1-LABEL: test_movsd_mem: 5753 ; ZNVER1: # %bb.0: 5754 ; ZNVER1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] 5755 ; ZNVER1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5756 ; ZNVER1-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:0.50] 5757 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5758 %1 = load double, double* %a0, align 1 5759 %2 = fadd double %1, %1 5760 store double %2, double *%a1, align 1 5761 ret void 5762 } 5763 5764 define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { 5765 ; GENERIC-LABEL: test_movsd_reg: 5766 ; GENERIC: # %bb.0: 5767 ; GENERIC-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] 5768 ; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] 5769 ; GENERIC-NEXT: retq # sched: [1:1.00] 5770 ; 5771 ; ATOM-LABEL: test_movsd_reg: 5772 ; ATOM: # %bb.0: 5773 ; ATOM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] 5774 ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 5775 ; ATOM-NEXT: nop # sched: [1:0.50] 5776 ; ATOM-NEXT: nop # sched: [1:0.50] 5777 ; ATOM-NEXT: nop # sched: [1:0.50] 5778 ; ATOM-NEXT: nop # sched: [1:0.50] 5779 ; ATOM-NEXT: retq # sched: [79:39.50] 5780 ; 5781 ; SLM-LABEL: test_movsd_reg: 5782 ; SLM: # %bb.0: 5783 ; SLM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] 5784 ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 5785 ; SLM-NEXT: retq # sched: [4:1.00] 5786 ; 5787 ; SANDY-SSE-LABEL: test_movsd_reg: 5788 ; SANDY-SSE: # %bb.0: 5789 ; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] 5790 ; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] 5791 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5792 ; 5793 ; SANDY-LABEL: test_movsd_reg: 5794 ; SANDY: # %bb.0: 5795 ; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] 5796 ; SANDY-NEXT: retq # sched: [1:1.00] 5797 ; 5798 ; HASWELL-SSE-LABEL: test_movsd_reg: 5799 ; HASWELL-SSE: # %bb.0: 5800 ; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] 5801 ; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] 5802 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5803 ; 5804 ; HASWELL-LABEL: test_movsd_reg: 5805 ; HASWELL: # %bb.0: 5806 ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] 5807 ; HASWELL-NEXT: retq # sched: [7:1.00] 5808 ; 5809 ; BROADWELL-SSE-LABEL: test_movsd_reg: 5810 ; BROADWELL-SSE: # %bb.0: 5811 ; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] 5812 ; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] 5813 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5814 ; 5815 ; BROADWELL-LABEL: test_movsd_reg: 5816 ; BROADWELL: # %bb.0: 5817 ; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] 5818 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5819 ; 5820 ; SKYLAKE-SSE-LABEL: test_movsd_reg: 5821 ; SKYLAKE-SSE: # %bb.0: 5822 ; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] 5823 ; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] 5824 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5825 ; 5826 ; SKYLAKE-LABEL: test_movsd_reg: 5827 ; SKYLAKE: # %bb.0: 5828 ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] 5829 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5830 ; 5831 ; SKX-SSE-LABEL: test_movsd_reg: 5832 ; SKX-SSE: # %bb.0: 5833 ; SKX-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] 5834 ; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] 5835 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5836 ; 5837 ; SKX-LABEL: test_movsd_reg: 5838 ; SKX: # %bb.0: 5839 ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] 5840 ; SKX-NEXT: retq # sched: [7:1.00] 5841 ; 5842 ; BTVER2-SSE-LABEL: test_movsd_reg: 5843 ; BTVER2-SSE: # %bb.0: 5844 ; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] 5845 ; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] 5846 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5847 ; 5848 ; BTVER2-LABEL: test_movsd_reg: 5849 ; BTVER2: # %bb.0: 5850 ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] 5851 ; BTVER2-NEXT: retq # sched: [4:1.00] 5852 ; 5853 ; ZNVER1-SSE-LABEL: test_movsd_reg: 5854 ; ZNVER1-SSE: # %bb.0: 5855 ; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] 5856 ; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25] 5857 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5858 ; 5859 ; ZNVER1-LABEL: test_movsd_reg: 5860 ; ZNVER1: # %bb.0: 5861 ; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] 5862 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5863 %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 0> 5864 ret <2 x double> %1 5865 } 5866 5867 define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { 5868 ; GENERIC-LABEL: test_movupd: 5869 ; GENERIC: # %bb.0: 5870 ; GENERIC-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] 5871 ; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5872 ; GENERIC-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] 5873 ; GENERIC-NEXT: retq # sched: [1:1.00] 5874 ; 5875 ; ATOM-LABEL: test_movupd: 5876 ; ATOM: # %bb.0: 5877 ; ATOM-NEXT: movupd (%rdi), %xmm0 # sched: [3:1.50] 5878 ; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00] 5879 ; ATOM-NEXT: movupd %xmm0, (%rsi) # sched: [2:1.00] 5880 ; ATOM-NEXT: retq # sched: [79:39.50] 5881 ; 5882 ; SLM-LABEL: test_movupd: 5883 ; SLM: # %bb.0: 5884 ; SLM-NEXT: movupd (%rdi), %xmm0 # sched: [3:1.00] 5885 ; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5886 ; SLM-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] 5887 ; SLM-NEXT: retq # sched: [4:1.00] 5888 ; 5889 ; SANDY-SSE-LABEL: test_movupd: 5890 ; SANDY-SSE: # %bb.0: 5891 ; SANDY-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] 5892 ; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5893 ; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] 5894 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 5895 ; 5896 ; SANDY-LABEL: test_movupd: 5897 ; SANDY: # %bb.0: 5898 ; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] 5899 ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5900 ; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] 5901 ; SANDY-NEXT: retq # sched: [1:1.00] 5902 ; 5903 ; HASWELL-SSE-LABEL: test_movupd: 5904 ; HASWELL-SSE: # %bb.0: 5905 ; HASWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] 5906 ; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5907 ; HASWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] 5908 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 5909 ; 5910 ; HASWELL-LABEL: test_movupd: 5911 ; HASWELL: # %bb.0: 5912 ; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] 5913 ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5914 ; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] 5915 ; HASWELL-NEXT: retq # sched: [7:1.00] 5916 ; 5917 ; BROADWELL-SSE-LABEL: test_movupd: 5918 ; BROADWELL-SSE: # %bb.0: 5919 ; BROADWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50] 5920 ; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5921 ; BROADWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] 5922 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 5923 ; 5924 ; BROADWELL-LABEL: test_movupd: 5925 ; BROADWELL: # %bb.0: 5926 ; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50] 5927 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5928 ; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] 5929 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5930 ; 5931 ; SKYLAKE-SSE-LABEL: test_movupd: 5932 ; SKYLAKE-SSE: # %bb.0: 5933 ; SKYLAKE-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] 5934 ; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] 5935 ; SKYLAKE-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] 5936 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 5937 ; 5938 ; SKYLAKE-LABEL: test_movupd: 5939 ; SKYLAKE: # %bb.0: 5940 ; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] 5941 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 5942 ; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] 5943 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5944 ; 5945 ; SKX-SSE-LABEL: test_movupd: 5946 ; SKX-SSE: # %bb.0: 5947 ; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] 5948 ; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] 5949 ; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] 5950 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 5951 ; 5952 ; SKX-LABEL: test_movupd: 5953 ; SKX: # %bb.0: 5954 ; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] 5955 ; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 5956 ; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] 5957 ; SKX-NEXT: retq # sched: [7:1.00] 5958 ; 5959 ; BTVER2-SSE-LABEL: test_movupd: 5960 ; BTVER2-SSE: # %bb.0: 5961 ; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00] 5962 ; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5963 ; BTVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] 5964 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 5965 ; 5966 ; BTVER2-LABEL: test_movupd: 5967 ; BTVER2: # %bb.0: 5968 ; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00] 5969 ; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5970 ; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] 5971 ; BTVER2-NEXT: retq # sched: [4:1.00] 5972 ; 5973 ; ZNVER1-SSE-LABEL: test_movupd: 5974 ; ZNVER1-SSE: # %bb.0: 5975 ; ZNVER1-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [8:0.50] 5976 ; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] 5977 ; ZNVER1-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:0.50] 5978 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 5979 ; 5980 ; ZNVER1-LABEL: test_movupd: 5981 ; ZNVER1: # %bb.0: 5982 ; ZNVER1-NEXT: vmovupd (%rdi), %xmm0 # sched: [8:0.50] 5983 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 5984 ; ZNVER1-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:0.50] 5985 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5986 %1 = load <2 x double>, <2 x double> *%a0, align 1 5987 %2 = fadd <2 x double> %1, %1 5988 store <2 x double> %2, <2 x double> *%a1, align 1 5989 ret void 5990 } 5991 5992 define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 5993 ; GENERIC-LABEL: test_mulpd: 5994 ; GENERIC: # %bb.0: 5995 ; GENERIC-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] 5996 ; GENERIC-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] 5997 ; GENERIC-NEXT: retq # sched: [1:1.00] 5998 ; 5999 ; ATOM-LABEL: test_mulpd: 6000 ; ATOM: # %bb.0: 6001 ; ATOM-NEXT: mulpd %xmm1, %xmm0 # sched: [9:4.50] 6002 ; ATOM-NEXT: mulpd (%rdi), %xmm0 # sched: [10:5.00] 6003 ; ATOM-NEXT: retq # sched: [79:39.50] 6004 ; 6005 ; SLM-LABEL: test_mulpd: 6006 ; SLM: # %bb.0: 6007 ; SLM-NEXT: mulpd %xmm1, %xmm0 # sched: [5:2.00] 6008 ; SLM-NEXT: mulpd (%rdi), %xmm0 # sched: [8:2.00] 6009 ; SLM-NEXT: retq # sched: [4:1.00] 6010 ; 6011 ; SANDY-SSE-LABEL: test_mulpd: 6012 ; SANDY-SSE: # %bb.0: 6013 ; SANDY-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] 6014 ; SANDY-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] 6015 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6016 ; 6017 ; SANDY-LABEL: test_mulpd: 6018 ; SANDY: # %bb.0: 6019 ; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 6020 ; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 6021 ; SANDY-NEXT: retq # sched: [1:1.00] 6022 ; 6023 ; HASWELL-SSE-LABEL: test_mulpd: 6024 ; HASWELL-SSE: # %bb.0: 6025 ; HASWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:0.50] 6026 ; HASWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:0.50] 6027 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6028 ; 6029 ; HASWELL-LABEL: test_mulpd: 6030 ; HASWELL: # %bb.0: 6031 ; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] 6032 ; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50] 6033 ; HASWELL-NEXT: retq # sched: [7:1.00] 6034 ; 6035 ; BROADWELL-SSE-LABEL: test_mulpd: 6036 ; BROADWELL-SSE: # %bb.0: 6037 ; BROADWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] 6038 ; BROADWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [8:0.50] 6039 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6040 ; 6041 ; BROADWELL-LABEL: test_mulpd: 6042 ; BROADWELL: # %bb.0: 6043 ; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] 6044 ; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6045 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6046 ; 6047 ; SKYLAKE-SSE-LABEL: test_mulpd: 6048 ; SKYLAKE-SSE: # %bb.0: 6049 ; SKYLAKE-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] 6050 ; SKYLAKE-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] 6051 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6052 ; 6053 ; SKYLAKE-LABEL: test_mulpd: 6054 ; SKYLAKE: # %bb.0: 6055 ; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 6056 ; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 6057 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6058 ; 6059 ; SKX-SSE-LABEL: test_mulpd: 6060 ; SKX-SSE: # %bb.0: 6061 ; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] 6062 ; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] 6063 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6064 ; 6065 ; SKX-LABEL: test_mulpd: 6066 ; SKX: # %bb.0: 6067 ; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 6068 ; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 6069 ; SKX-NEXT: retq # sched: [7:1.00] 6070 ; 6071 ; BTVER2-SSE-LABEL: test_mulpd: 6072 ; BTVER2-SSE: # %bb.0: 6073 ; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00] 6074 ; BTVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [9:2.00] 6075 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6076 ; 6077 ; BTVER2-LABEL: test_mulpd: 6078 ; BTVER2: # %bb.0: 6079 ; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] 6080 ; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 6081 ; BTVER2-NEXT: retq # sched: [4:1.00] 6082 ; 6083 ; ZNVER1-SSE-LABEL: test_mulpd: 6084 ; ZNVER1-SSE: # %bb.0: 6085 ; ZNVER1-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] 6086 ; ZNVER1-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] 6087 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6088 ; 6089 ; ZNVER1-LABEL: test_mulpd: 6090 ; ZNVER1: # %bb.0: 6091 ; ZNVER1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] 6092 ; ZNVER1-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 6093 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6094 %1 = fmul <2 x double> %a0, %a1 6095 %2 = load <2 x double>, <2 x double> *%a2, align 16 6096 %3 = fmul <2 x double> %1, %2 6097 ret <2 x double> %3 6098 } 6099 6100 define double @test_mulsd(double %a0, double %a1, double *%a2) { 6101 ; GENERIC-LABEL: test_mulsd: 6102 ; GENERIC: # %bb.0: 6103 ; GENERIC-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] 6104 ; GENERIC-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] 6105 ; GENERIC-NEXT: retq # sched: [1:1.00] 6106 ; 6107 ; ATOM-LABEL: test_mulsd: 6108 ; ATOM: # %bb.0: 6109 ; ATOM-NEXT: mulsd %xmm1, %xmm0 # sched: [5:5.00] 6110 ; ATOM-NEXT: mulsd (%rdi), %xmm0 # sched: [5:5.00] 6111 ; ATOM-NEXT: retq # sched: [79:39.50] 6112 ; 6113 ; SLM-LABEL: test_mulsd: 6114 ; SLM: # %bb.0: 6115 ; SLM-NEXT: mulsd %xmm1, %xmm0 # sched: [5:2.00] 6116 ; SLM-NEXT: mulsd (%rdi), %xmm0 # sched: [8:2.00] 6117 ; SLM-NEXT: retq # sched: [4:1.00] 6118 ; 6119 ; SANDY-SSE-LABEL: test_mulsd: 6120 ; SANDY-SSE: # %bb.0: 6121 ; SANDY-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] 6122 ; SANDY-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] 6123 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6124 ; 6125 ; SANDY-LABEL: test_mulsd: 6126 ; SANDY: # %bb.0: 6127 ; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 6128 ; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 6129 ; SANDY-NEXT: retq # sched: [1:1.00] 6130 ; 6131 ; HASWELL-SSE-LABEL: test_mulsd: 6132 ; HASWELL-SSE: # %bb.0: 6133 ; HASWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:0.50] 6134 ; HASWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] 6135 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6136 ; 6137 ; HASWELL-LABEL: test_mulsd: 6138 ; HASWELL: # %bb.0: 6139 ; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] 6140 ; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 6141 ; HASWELL-NEXT: retq # sched: [7:1.00] 6142 ; 6143 ; BROADWELL-SSE-LABEL: test_mulsd: 6144 ; BROADWELL-SSE: # %bb.0: 6145 ; BROADWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] 6146 ; BROADWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [8:0.50] 6147 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6148 ; 6149 ; BROADWELL-LABEL: test_mulsd: 6150 ; BROADWELL: # %bb.0: 6151 ; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] 6152 ; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6153 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6154 ; 6155 ; SKYLAKE-SSE-LABEL: test_mulsd: 6156 ; SKYLAKE-SSE: # %bb.0: 6157 ; SKYLAKE-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] 6158 ; SKYLAKE-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] 6159 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6160 ; 6161 ; SKYLAKE-LABEL: test_mulsd: 6162 ; SKYLAKE: # %bb.0: 6163 ; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 6164 ; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 6165 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6166 ; 6167 ; SKX-SSE-LABEL: test_mulsd: 6168 ; SKX-SSE: # %bb.0: 6169 ; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] 6170 ; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] 6171 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6172 ; 6173 ; SKX-LABEL: test_mulsd: 6174 ; SKX: # %bb.0: 6175 ; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 6176 ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 6177 ; SKX-NEXT: retq # sched: [7:1.00] 6178 ; 6179 ; BTVER2-SSE-LABEL: test_mulsd: 6180 ; BTVER2-SSE: # %bb.0: 6181 ; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00] 6182 ; BTVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:2.00] 6183 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6184 ; 6185 ; BTVER2-LABEL: test_mulsd: 6186 ; BTVER2: # %bb.0: 6187 ; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] 6188 ; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 6189 ; BTVER2-NEXT: retq # sched: [4:1.00] 6190 ; 6191 ; ZNVER1-SSE-LABEL: test_mulsd: 6192 ; ZNVER1-SSE: # %bb.0: 6193 ; ZNVER1-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] 6194 ; ZNVER1-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] 6195 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6196 ; 6197 ; ZNVER1-LABEL: test_mulsd: 6198 ; ZNVER1: # %bb.0: 6199 ; ZNVER1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] 6200 ; ZNVER1-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 6201 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6202 %1 = fmul double %a0, %a1 6203 %2 = load double, double *%a2, align 8 6204 %3 = fmul double %1, %2 6205 ret double %3 6206 } 6207 6208 define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 6209 ; GENERIC-LABEL: test_orpd: 6210 ; GENERIC: # %bb.0: 6211 ; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] 6212 ; GENERIC-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] 6213 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 6214 ; GENERIC-NEXT: retq # sched: [1:1.00] 6215 ; 6216 ; ATOM-LABEL: test_orpd: 6217 ; ATOM: # %bb.0: 6218 ; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] 6219 ; ATOM-NEXT: orpd (%rdi), %xmm0 # sched: [1:1.00] 6220 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 6221 ; ATOM-NEXT: retq # sched: [79:39.50] 6222 ; 6223 ; SLM-LABEL: test_orpd: 6224 ; SLM: # %bb.0: 6225 ; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] 6226 ; SLM-NEXT: orpd (%rdi), %xmm0 # sched: [4:1.00] 6227 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 6228 ; SLM-NEXT: retq # sched: [4:1.00] 6229 ; 6230 ; SANDY-SSE-LABEL: test_orpd: 6231 ; SANDY-SSE: # %bb.0: 6232 ; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] 6233 ; SANDY-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] 6234 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 6235 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6236 ; 6237 ; SANDY-LABEL: test_orpd: 6238 ; SANDY: # %bb.0: 6239 ; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6240 ; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6241 ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 6242 ; SANDY-NEXT: retq # sched: [1:1.00] 6243 ; 6244 ; HASWELL-SSE-LABEL: test_orpd: 6245 ; HASWELL-SSE: # %bb.0: 6246 ; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] 6247 ; HASWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] 6248 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 6249 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6250 ; 6251 ; HASWELL-LABEL: test_orpd: 6252 ; HASWELL: # %bb.0: 6253 ; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6254 ; HASWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6255 ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 6256 ; HASWELL-NEXT: retq # sched: [7:1.00] 6257 ; 6258 ; BROADWELL-SSE-LABEL: test_orpd: 6259 ; BROADWELL-SSE: # %bb.0: 6260 ; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] 6261 ; BROADWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] 6262 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 6263 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6264 ; 6265 ; BROADWELL-LABEL: test_orpd: 6266 ; BROADWELL: # %bb.0: 6267 ; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6268 ; BROADWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6269 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 6270 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6271 ; 6272 ; SKYLAKE-SSE-LABEL: test_orpd: 6273 ; SKYLAKE-SSE: # %bb.0: 6274 ; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] 6275 ; SKYLAKE-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] 6276 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 6277 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6278 ; 6279 ; SKYLAKE-LABEL: test_orpd: 6280 ; SKYLAKE: # %bb.0: 6281 ; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6282 ; SKYLAKE-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6283 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 6284 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6285 ; 6286 ; SKX-SSE-LABEL: test_orpd: 6287 ; SKX-SSE: # %bb.0: 6288 ; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] 6289 ; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] 6290 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 6291 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6292 ; 6293 ; SKX-LABEL: test_orpd: 6294 ; SKX: # %bb.0: 6295 ; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6296 ; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6297 ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 6298 ; SKX-NEXT: retq # sched: [7:1.00] 6299 ; 6300 ; BTVER2-SSE-LABEL: test_orpd: 6301 ; BTVER2-SSE: # %bb.0: 6302 ; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] 6303 ; BTVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] 6304 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 6305 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6306 ; 6307 ; BTVER2-LABEL: test_orpd: 6308 ; BTVER2: # %bb.0: 6309 ; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6310 ; BTVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6311 ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 6312 ; BTVER2-NEXT: retq # sched: [4:1.00] 6313 ; 6314 ; ZNVER1-SSE-LABEL: test_orpd: 6315 ; ZNVER1-SSE: # %bb.0: 6316 ; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] 6317 ; ZNVER1-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [8:0.50] 6318 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 6319 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6320 ; 6321 ; ZNVER1-LABEL: test_orpd: 6322 ; ZNVER1: # %bb.0: 6323 ; ZNVER1-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 6324 ; ZNVER1-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6325 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 6326 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6327 %1 = bitcast <2 x double> %a0 to <4 x i32> 6328 %2 = bitcast <2 x double> %a1 to <4 x i32> 6329 %3 = or <4 x i32> %1, %2 6330 %4 = load <2 x double>, <2 x double> *%a2, align 16 6331 %5 = bitcast <2 x double> %4 to <4 x i32> 6332 %6 = or <4 x i32> %3, %5 6333 %7 = bitcast <4 x i32> %6 to <2 x double> 6334 %8 = fadd <2 x double> %a1, %7 6335 ret <2 x double> %8 6336 } 6337 6338 define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 6339 ; GENERIC-LABEL: test_packssdw: 6340 ; GENERIC: # %bb.0: 6341 ; GENERIC-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] 6342 ; GENERIC-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] 6343 ; GENERIC-NEXT: retq # sched: [1:1.00] 6344 ; 6345 ; ATOM-LABEL: test_packssdw: 6346 ; ATOM: # %bb.0: 6347 ; ATOM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] 6348 ; ATOM-NEXT: packssdw (%rdi), %xmm0 # sched: [1:1.00] 6349 ; ATOM-NEXT: nop # sched: [1:0.50] 6350 ; ATOM-NEXT: nop # sched: [1:0.50] 6351 ; ATOM-NEXT: nop # sched: [1:0.50] 6352 ; ATOM-NEXT: nop # sched: [1:0.50] 6353 ; ATOM-NEXT: retq # sched: [79:39.50] 6354 ; 6355 ; SLM-LABEL: test_packssdw: 6356 ; SLM: # %bb.0: 6357 ; SLM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] 6358 ; SLM-NEXT: packssdw (%rdi), %xmm0 # sched: [4:1.00] 6359 ; SLM-NEXT: retq # sched: [4:1.00] 6360 ; 6361 ; SANDY-SSE-LABEL: test_packssdw: 6362 ; SANDY-SSE: # %bb.0: 6363 ; SANDY-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] 6364 ; SANDY-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] 6365 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6366 ; 6367 ; SANDY-LABEL: test_packssdw: 6368 ; SANDY: # %bb.0: 6369 ; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6370 ; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6371 ; SANDY-NEXT: retq # sched: [1:1.00] 6372 ; 6373 ; HASWELL-SSE-LABEL: test_packssdw: 6374 ; HASWELL-SSE: # %bb.0: 6375 ; HASWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] 6376 ; HASWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] 6377 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6378 ; 6379 ; HASWELL-LABEL: test_packssdw: 6380 ; HASWELL: # %bb.0: 6381 ; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6382 ; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6383 ; HASWELL-NEXT: retq # sched: [7:1.00] 6384 ; 6385 ; BROADWELL-SSE-LABEL: test_packssdw: 6386 ; BROADWELL-SSE: # %bb.0: 6387 ; BROADWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] 6388 ; BROADWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] 6389 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6390 ; 6391 ; BROADWELL-LABEL: test_packssdw: 6392 ; BROADWELL: # %bb.0: 6393 ; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6394 ; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6395 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6396 ; 6397 ; SKYLAKE-SSE-LABEL: test_packssdw: 6398 ; SKYLAKE-SSE: # %bb.0: 6399 ; SKYLAKE-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] 6400 ; SKYLAKE-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] 6401 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6402 ; 6403 ; SKYLAKE-LABEL: test_packssdw: 6404 ; SKYLAKE: # %bb.0: 6405 ; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6406 ; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6407 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6408 ; 6409 ; SKX-SSE-LABEL: test_packssdw: 6410 ; SKX-SSE: # %bb.0: 6411 ; SKX-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] 6412 ; SKX-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] 6413 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6414 ; 6415 ; SKX-LABEL: test_packssdw: 6416 ; SKX: # %bb.0: 6417 ; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6418 ; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6419 ; SKX-NEXT: retq # sched: [7:1.00] 6420 ; 6421 ; BTVER2-SSE-LABEL: test_packssdw: 6422 ; BTVER2-SSE: # %bb.0: 6423 ; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] 6424 ; BTVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] 6425 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6426 ; 6427 ; BTVER2-LABEL: test_packssdw: 6428 ; BTVER2: # %bb.0: 6429 ; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6430 ; BTVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6431 ; BTVER2-NEXT: retq # sched: [4:1.00] 6432 ; 6433 ; ZNVER1-SSE-LABEL: test_packssdw: 6434 ; ZNVER1-SSE: # %bb.0: 6435 ; ZNVER1-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.25] 6436 ; ZNVER1-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [8:0.50] 6437 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6438 ; 6439 ; ZNVER1-LABEL: test_packssdw: 6440 ; ZNVER1: # %bb.0: 6441 ; ZNVER1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 6442 ; ZNVER1-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6443 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6444 %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) 6445 %2 = bitcast <8 x i16> %1 to <4 x i32> 6446 %3 = load <4 x i32>, <4 x i32> *%a2, align 16 6447 %4 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %2, <4 x i32> %3) 6448 ret <8 x i16> %4 6449 } 6450 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 6451 6452 define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 6453 ; GENERIC-LABEL: test_packsswb: 6454 ; GENERIC: # %bb.0: 6455 ; GENERIC-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] 6456 ; GENERIC-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] 6457 ; GENERIC-NEXT: retq # sched: [1:1.00] 6458 ; 6459 ; ATOM-LABEL: test_packsswb: 6460 ; ATOM: # %bb.0: 6461 ; ATOM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] 6462 ; ATOM-NEXT: packsswb (%rdi), %xmm0 # sched: [1:1.00] 6463 ; ATOM-NEXT: nop # sched: [1:0.50] 6464 ; ATOM-NEXT: nop # sched: [1:0.50] 6465 ; ATOM-NEXT: nop # sched: [1:0.50] 6466 ; ATOM-NEXT: nop # sched: [1:0.50] 6467 ; ATOM-NEXT: retq # sched: [79:39.50] 6468 ; 6469 ; SLM-LABEL: test_packsswb: 6470 ; SLM: # %bb.0: 6471 ; SLM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] 6472 ; SLM-NEXT: packsswb (%rdi), %xmm0 # sched: [4:1.00] 6473 ; SLM-NEXT: retq # sched: [4:1.00] 6474 ; 6475 ; SANDY-SSE-LABEL: test_packsswb: 6476 ; SANDY-SSE: # %bb.0: 6477 ; SANDY-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] 6478 ; SANDY-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] 6479 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6480 ; 6481 ; SANDY-LABEL: test_packsswb: 6482 ; SANDY: # %bb.0: 6483 ; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6484 ; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6485 ; SANDY-NEXT: retq # sched: [1:1.00] 6486 ; 6487 ; HASWELL-SSE-LABEL: test_packsswb: 6488 ; HASWELL-SSE: # %bb.0: 6489 ; HASWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] 6490 ; HASWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] 6491 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6492 ; 6493 ; HASWELL-LABEL: test_packsswb: 6494 ; HASWELL: # %bb.0: 6495 ; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6496 ; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6497 ; HASWELL-NEXT: retq # sched: [7:1.00] 6498 ; 6499 ; BROADWELL-SSE-LABEL: test_packsswb: 6500 ; BROADWELL-SSE: # %bb.0: 6501 ; BROADWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] 6502 ; BROADWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] 6503 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6504 ; 6505 ; BROADWELL-LABEL: test_packsswb: 6506 ; BROADWELL: # %bb.0: 6507 ; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6508 ; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6509 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6510 ; 6511 ; SKYLAKE-SSE-LABEL: test_packsswb: 6512 ; SKYLAKE-SSE: # %bb.0: 6513 ; SKYLAKE-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] 6514 ; SKYLAKE-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] 6515 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6516 ; 6517 ; SKYLAKE-LABEL: test_packsswb: 6518 ; SKYLAKE: # %bb.0: 6519 ; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6520 ; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6521 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6522 ; 6523 ; SKX-SSE-LABEL: test_packsswb: 6524 ; SKX-SSE: # %bb.0: 6525 ; SKX-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] 6526 ; SKX-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] 6527 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6528 ; 6529 ; SKX-LABEL: test_packsswb: 6530 ; SKX: # %bb.0: 6531 ; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6532 ; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6533 ; SKX-NEXT: retq # sched: [7:1.00] 6534 ; 6535 ; BTVER2-SSE-LABEL: test_packsswb: 6536 ; BTVER2-SSE: # %bb.0: 6537 ; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] 6538 ; BTVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] 6539 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6540 ; 6541 ; BTVER2-LABEL: test_packsswb: 6542 ; BTVER2: # %bb.0: 6543 ; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6544 ; BTVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6545 ; BTVER2-NEXT: retq # sched: [4:1.00] 6546 ; 6547 ; ZNVER1-SSE-LABEL: test_packsswb: 6548 ; ZNVER1-SSE: # %bb.0: 6549 ; ZNVER1-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.25] 6550 ; ZNVER1-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [8:0.50] 6551 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6552 ; 6553 ; ZNVER1-LABEL: test_packsswb: 6554 ; ZNVER1: # %bb.0: 6555 ; ZNVER1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 6556 ; ZNVER1-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6557 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6558 %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) 6559 %2 = bitcast <16 x i8> %1 to <8 x i16> 6560 %3 = load <8 x i16>, <8 x i16> *%a2, align 16 6561 %4 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %2, <8 x i16> %3) 6562 ret <16 x i8> %4 6563 } 6564 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 6565 6566 define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 6567 ; GENERIC-LABEL: test_packuswb: 6568 ; GENERIC: # %bb.0: 6569 ; GENERIC-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] 6570 ; GENERIC-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] 6571 ; GENERIC-NEXT: retq # sched: [1:1.00] 6572 ; 6573 ; ATOM-LABEL: test_packuswb: 6574 ; ATOM: # %bb.0: 6575 ; ATOM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] 6576 ; ATOM-NEXT: packuswb (%rdi), %xmm0 # sched: [1:1.00] 6577 ; ATOM-NEXT: nop # sched: [1:0.50] 6578 ; ATOM-NEXT: nop # sched: [1:0.50] 6579 ; ATOM-NEXT: nop # sched: [1:0.50] 6580 ; ATOM-NEXT: nop # sched: [1:0.50] 6581 ; ATOM-NEXT: retq # sched: [79:39.50] 6582 ; 6583 ; SLM-LABEL: test_packuswb: 6584 ; SLM: # %bb.0: 6585 ; SLM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] 6586 ; SLM-NEXT: packuswb (%rdi), %xmm0 # sched: [4:1.00] 6587 ; SLM-NEXT: retq # sched: [4:1.00] 6588 ; 6589 ; SANDY-SSE-LABEL: test_packuswb: 6590 ; SANDY-SSE: # %bb.0: 6591 ; SANDY-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] 6592 ; SANDY-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] 6593 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6594 ; 6595 ; SANDY-LABEL: test_packuswb: 6596 ; SANDY: # %bb.0: 6597 ; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6598 ; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6599 ; SANDY-NEXT: retq # sched: [1:1.00] 6600 ; 6601 ; HASWELL-SSE-LABEL: test_packuswb: 6602 ; HASWELL-SSE: # %bb.0: 6603 ; HASWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] 6604 ; HASWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] 6605 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6606 ; 6607 ; HASWELL-LABEL: test_packuswb: 6608 ; HASWELL: # %bb.0: 6609 ; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6610 ; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6611 ; HASWELL-NEXT: retq # sched: [7:1.00] 6612 ; 6613 ; BROADWELL-SSE-LABEL: test_packuswb: 6614 ; BROADWELL-SSE: # %bb.0: 6615 ; BROADWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] 6616 ; BROADWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] 6617 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6618 ; 6619 ; BROADWELL-LABEL: test_packuswb: 6620 ; BROADWELL: # %bb.0: 6621 ; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6622 ; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6623 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6624 ; 6625 ; SKYLAKE-SSE-LABEL: test_packuswb: 6626 ; SKYLAKE-SSE: # %bb.0: 6627 ; SKYLAKE-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] 6628 ; SKYLAKE-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] 6629 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6630 ; 6631 ; SKYLAKE-LABEL: test_packuswb: 6632 ; SKYLAKE: # %bb.0: 6633 ; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6634 ; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6635 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6636 ; 6637 ; SKX-SSE-LABEL: test_packuswb: 6638 ; SKX-SSE: # %bb.0: 6639 ; SKX-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] 6640 ; SKX-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] 6641 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6642 ; 6643 ; SKX-LABEL: test_packuswb: 6644 ; SKX: # %bb.0: 6645 ; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6646 ; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6647 ; SKX-NEXT: retq # sched: [7:1.00] 6648 ; 6649 ; BTVER2-SSE-LABEL: test_packuswb: 6650 ; BTVER2-SSE: # %bb.0: 6651 ; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] 6652 ; BTVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] 6653 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6654 ; 6655 ; BTVER2-LABEL: test_packuswb: 6656 ; BTVER2: # %bb.0: 6657 ; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6658 ; BTVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6659 ; BTVER2-NEXT: retq # sched: [4:1.00] 6660 ; 6661 ; ZNVER1-SSE-LABEL: test_packuswb: 6662 ; ZNVER1-SSE: # %bb.0: 6663 ; ZNVER1-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.25] 6664 ; ZNVER1-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [8:0.50] 6665 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6666 ; 6667 ; ZNVER1-LABEL: test_packuswb: 6668 ; ZNVER1: # %bb.0: 6669 ; ZNVER1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 6670 ; ZNVER1-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6671 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6672 %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) 6673 %2 = bitcast <16 x i8> %1 to <8 x i16> 6674 %3 = load <8 x i16>, <8 x i16> *%a2, align 16 6675 %4 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %2, <8 x i16> %3) 6676 ret <16 x i8> %4 6677 } 6678 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 6679 6680 define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 6681 ; GENERIC-LABEL: test_paddb: 6682 ; GENERIC: # %bb.0: 6683 ; GENERIC-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] 6684 ; GENERIC-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] 6685 ; GENERIC-NEXT: retq # sched: [1:1.00] 6686 ; 6687 ; ATOM-LABEL: test_paddb: 6688 ; ATOM: # %bb.0: 6689 ; ATOM-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] 6690 ; ATOM-NEXT: paddb (%rdi), %xmm0 # sched: [1:1.00] 6691 ; ATOM-NEXT: nop # sched: [1:0.50] 6692 ; ATOM-NEXT: nop # sched: [1:0.50] 6693 ; ATOM-NEXT: nop # sched: [1:0.50] 6694 ; ATOM-NEXT: nop # sched: [1:0.50] 6695 ; ATOM-NEXT: retq # sched: [79:39.50] 6696 ; 6697 ; SLM-LABEL: test_paddb: 6698 ; SLM: # %bb.0: 6699 ; SLM-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] 6700 ; SLM-NEXT: paddb (%rdi), %xmm0 # sched: [4:1.00] 6701 ; SLM-NEXT: retq # sched: [4:1.00] 6702 ; 6703 ; SANDY-SSE-LABEL: test_paddb: 6704 ; SANDY-SSE: # %bb.0: 6705 ; SANDY-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] 6706 ; SANDY-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] 6707 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6708 ; 6709 ; SANDY-LABEL: test_paddb: 6710 ; SANDY: # %bb.0: 6711 ; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6712 ; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6713 ; SANDY-NEXT: retq # sched: [1:1.00] 6714 ; 6715 ; HASWELL-SSE-LABEL: test_paddb: 6716 ; HASWELL-SSE: # %bb.0: 6717 ; HASWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] 6718 ; HASWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] 6719 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6720 ; 6721 ; HASWELL-LABEL: test_paddb: 6722 ; HASWELL: # %bb.0: 6723 ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6724 ; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6725 ; HASWELL-NEXT: retq # sched: [7:1.00] 6726 ; 6727 ; BROADWELL-SSE-LABEL: test_paddb: 6728 ; BROADWELL-SSE: # %bb.0: 6729 ; BROADWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] 6730 ; BROADWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:0.50] 6731 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6732 ; 6733 ; BROADWELL-LABEL: test_paddb: 6734 ; BROADWELL: # %bb.0: 6735 ; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6736 ; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 6737 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6738 ; 6739 ; SKYLAKE-SSE-LABEL: test_paddb: 6740 ; SKYLAKE-SSE: # %bb.0: 6741 ; SKYLAKE-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] 6742 ; SKYLAKE-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] 6743 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6744 ; 6745 ; SKYLAKE-LABEL: test_paddb: 6746 ; SKYLAKE: # %bb.0: 6747 ; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6748 ; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6749 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6750 ; 6751 ; SKX-SSE-LABEL: test_paddb: 6752 ; SKX-SSE: # %bb.0: 6753 ; SKX-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] 6754 ; SKX-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] 6755 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6756 ; 6757 ; SKX-LABEL: test_paddb: 6758 ; SKX: # %bb.0: 6759 ; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6760 ; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6761 ; SKX-NEXT: retq # sched: [7:1.00] 6762 ; 6763 ; BTVER2-SSE-LABEL: test_paddb: 6764 ; BTVER2-SSE: # %bb.0: 6765 ; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] 6766 ; BTVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:1.00] 6767 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6768 ; 6769 ; BTVER2-LABEL: test_paddb: 6770 ; BTVER2: # %bb.0: 6771 ; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6772 ; BTVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6773 ; BTVER2-NEXT: retq # sched: [4:1.00] 6774 ; 6775 ; ZNVER1-SSE-LABEL: test_paddb: 6776 ; ZNVER1-SSE: # %bb.0: 6777 ; ZNVER1-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.25] 6778 ; ZNVER1-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [8:0.50] 6779 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6780 ; 6781 ; ZNVER1-LABEL: test_paddb: 6782 ; ZNVER1: # %bb.0: 6783 ; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 6784 ; ZNVER1-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6785 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6786 %1 = add <16 x i8> %a0, %a1 6787 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 6788 %3 = add <16 x i8> %1, %2 6789 ret <16 x i8> %3 6790 } 6791 6792 define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 6793 ; GENERIC-LABEL: test_paddd: 6794 ; GENERIC: # %bb.0: 6795 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 6796 ; GENERIC-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] 6797 ; GENERIC-NEXT: retq # sched: [1:1.00] 6798 ; 6799 ; ATOM-LABEL: test_paddd: 6800 ; ATOM: # %bb.0: 6801 ; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 6802 ; ATOM-NEXT: paddd (%rdi), %xmm0 # sched: [1:1.00] 6803 ; ATOM-NEXT: nop # sched: [1:0.50] 6804 ; ATOM-NEXT: nop # sched: [1:0.50] 6805 ; ATOM-NEXT: nop # sched: [1:0.50] 6806 ; ATOM-NEXT: nop # sched: [1:0.50] 6807 ; ATOM-NEXT: retq # sched: [79:39.50] 6808 ; 6809 ; SLM-LABEL: test_paddd: 6810 ; SLM: # %bb.0: 6811 ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 6812 ; SLM-NEXT: paddd (%rdi), %xmm0 # sched: [4:1.00] 6813 ; SLM-NEXT: retq # sched: [4:1.00] 6814 ; 6815 ; SANDY-SSE-LABEL: test_paddd: 6816 ; SANDY-SSE: # %bb.0: 6817 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 6818 ; SANDY-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] 6819 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6820 ; 6821 ; SANDY-LABEL: test_paddd: 6822 ; SANDY: # %bb.0: 6823 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6824 ; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6825 ; SANDY-NEXT: retq # sched: [1:1.00] 6826 ; 6827 ; HASWELL-SSE-LABEL: test_paddd: 6828 ; HASWELL-SSE: # %bb.0: 6829 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 6830 ; HASWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] 6831 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6832 ; 6833 ; HASWELL-LABEL: test_paddd: 6834 ; HASWELL: # %bb.0: 6835 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6836 ; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6837 ; HASWELL-NEXT: retq # sched: [7:1.00] 6838 ; 6839 ; BROADWELL-SSE-LABEL: test_paddd: 6840 ; BROADWELL-SSE: # %bb.0: 6841 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 6842 ; BROADWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:0.50] 6843 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6844 ; 6845 ; BROADWELL-LABEL: test_paddd: 6846 ; BROADWELL: # %bb.0: 6847 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6848 ; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 6849 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6850 ; 6851 ; SKYLAKE-SSE-LABEL: test_paddd: 6852 ; SKYLAKE-SSE: # %bb.0: 6853 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 6854 ; SKYLAKE-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] 6855 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6856 ; 6857 ; SKYLAKE-LABEL: test_paddd: 6858 ; SKYLAKE: # %bb.0: 6859 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6860 ; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6861 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6862 ; 6863 ; SKX-SSE-LABEL: test_paddd: 6864 ; SKX-SSE: # %bb.0: 6865 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 6866 ; SKX-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] 6867 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6868 ; 6869 ; SKX-LABEL: test_paddd: 6870 ; SKX: # %bb.0: 6871 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6872 ; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6873 ; SKX-NEXT: retq # sched: [7:1.00] 6874 ; 6875 ; BTVER2-SSE-LABEL: test_paddd: 6876 ; BTVER2-SSE: # %bb.0: 6877 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 6878 ; BTVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:1.00] 6879 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6880 ; 6881 ; BTVER2-LABEL: test_paddd: 6882 ; BTVER2: # %bb.0: 6883 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6884 ; BTVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6885 ; BTVER2-NEXT: retq # sched: [4:1.00] 6886 ; 6887 ; ZNVER1-SSE-LABEL: test_paddd: 6888 ; ZNVER1-SSE: # %bb.0: 6889 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 6890 ; ZNVER1-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [8:0.50] 6891 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 6892 ; 6893 ; ZNVER1-LABEL: test_paddd: 6894 ; ZNVER1: # %bb.0: 6895 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 6896 ; ZNVER1-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6897 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6898 %1 = add <4 x i32> %a0, %a1 6899 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 6900 %3 = add <4 x i32> %1, %2 6901 ret <4 x i32> %3 6902 } 6903 6904 define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 6905 ; GENERIC-LABEL: test_paddq: 6906 ; GENERIC: # %bb.0: 6907 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 6908 ; GENERIC-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] 6909 ; GENERIC-NEXT: retq # sched: [1:1.00] 6910 ; 6911 ; ATOM-LABEL: test_paddq: 6912 ; ATOM: # %bb.0: 6913 ; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] 6914 ; ATOM-NEXT: paddq (%rdi), %xmm0 # sched: [3:1.50] 6915 ; ATOM-NEXT: retq # sched: [79:39.50] 6916 ; 6917 ; SLM-LABEL: test_paddq: 6918 ; SLM: # %bb.0: 6919 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 6920 ; SLM-NEXT: paddq (%rdi), %xmm0 # sched: [4:1.00] 6921 ; SLM-NEXT: retq # sched: [4:1.00] 6922 ; 6923 ; SANDY-SSE-LABEL: test_paddq: 6924 ; SANDY-SSE: # %bb.0: 6925 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 6926 ; SANDY-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] 6927 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 6928 ; 6929 ; SANDY-LABEL: test_paddq: 6930 ; SANDY: # %bb.0: 6931 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6932 ; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6933 ; SANDY-NEXT: retq # sched: [1:1.00] 6934 ; 6935 ; HASWELL-SSE-LABEL: test_paddq: 6936 ; HASWELL-SSE: # %bb.0: 6937 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 6938 ; HASWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] 6939 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 6940 ; 6941 ; HASWELL-LABEL: test_paddq: 6942 ; HASWELL: # %bb.0: 6943 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6944 ; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6945 ; HASWELL-NEXT: retq # sched: [7:1.00] 6946 ; 6947 ; BROADWELL-SSE-LABEL: test_paddq: 6948 ; BROADWELL-SSE: # %bb.0: 6949 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 6950 ; BROADWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:0.50] 6951 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 6952 ; 6953 ; BROADWELL-LABEL: test_paddq: 6954 ; BROADWELL: # %bb.0: 6955 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6956 ; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 6957 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6958 ; 6959 ; SKYLAKE-SSE-LABEL: test_paddq: 6960 ; SKYLAKE-SSE: # %bb.0: 6961 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 6962 ; SKYLAKE-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] 6963 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 6964 ; 6965 ; SKYLAKE-LABEL: test_paddq: 6966 ; SKYLAKE: # %bb.0: 6967 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6968 ; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6969 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6970 ; 6971 ; SKX-SSE-LABEL: test_paddq: 6972 ; SKX-SSE: # %bb.0: 6973 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 6974 ; SKX-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] 6975 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 6976 ; 6977 ; SKX-LABEL: test_paddq: 6978 ; SKX: # %bb.0: 6979 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 6980 ; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6981 ; SKX-NEXT: retq # sched: [7:1.00] 6982 ; 6983 ; BTVER2-SSE-LABEL: test_paddq: 6984 ; BTVER2-SSE: # %bb.0: 6985 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 6986 ; BTVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:1.00] 6987 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 6988 ; 6989 ; BTVER2-LABEL: test_paddq: 6990 ; BTVER2: # %bb.0: 6991 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6992 ; BTVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6993 ; BTVER2-NEXT: retq # sched: [4:1.00] 6994 ; 6995 ; ZNVER1-SSE-LABEL: test_paddq: 6996 ; ZNVER1-SSE: # %bb.0: 6997 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 6998 ; ZNVER1-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [8:0.50] 6999 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 7000 ; 7001 ; ZNVER1-LABEL: test_paddq: 7002 ; ZNVER1: # %bb.0: 7003 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7004 ; ZNVER1-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 7005 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7006 %1 = add <2 x i64> %a0, %a1 7007 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 7008 %3 = add <2 x i64> %1, %2 7009 ret <2 x i64> %3 7010 } 7011 7012 define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 7013 ; GENERIC-LABEL: test_paddsb: 7014 ; GENERIC: # %bb.0: 7015 ; GENERIC-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] 7016 ; GENERIC-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] 7017 ; GENERIC-NEXT: retq # sched: [1:1.00] 7018 ; 7019 ; ATOM-LABEL: test_paddsb: 7020 ; ATOM: # %bb.0: 7021 ; ATOM-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] 7022 ; ATOM-NEXT: paddsb (%rdi), %xmm0 # sched: [1:1.00] 7023 ; ATOM-NEXT: nop # sched: [1:0.50] 7024 ; ATOM-NEXT: nop # sched: [1:0.50] 7025 ; ATOM-NEXT: nop # sched: [1:0.50] 7026 ; ATOM-NEXT: nop # sched: [1:0.50] 7027 ; ATOM-NEXT: retq # sched: [79:39.50] 7028 ; 7029 ; SLM-LABEL: test_paddsb: 7030 ; SLM: # %bb.0: 7031 ; SLM-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] 7032 ; SLM-NEXT: paddsb (%rdi), %xmm0 # sched: [4:1.00] 7033 ; SLM-NEXT: retq # sched: [4:1.00] 7034 ; 7035 ; SANDY-SSE-LABEL: test_paddsb: 7036 ; SANDY-SSE: # %bb.0: 7037 ; SANDY-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] 7038 ; SANDY-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] 7039 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 7040 ; 7041 ; SANDY-LABEL: test_paddsb: 7042 ; SANDY: # %bb.0: 7043 ; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7044 ; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7045 ; SANDY-NEXT: retq # sched: [1:1.00] 7046 ; 7047 ; HASWELL-SSE-LABEL: test_paddsb: 7048 ; HASWELL-SSE: # %bb.0: 7049 ; HASWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] 7050 ; HASWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] 7051 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 7052 ; 7053 ; HASWELL-LABEL: test_paddsb: 7054 ; HASWELL: # %bb.0: 7055 ; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7056 ; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7057 ; HASWELL-NEXT: retq # sched: [7:1.00] 7058 ; 7059 ; BROADWELL-SSE-LABEL: test_paddsb: 7060 ; BROADWELL-SSE: # %bb.0: 7061 ; BROADWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] 7062 ; BROADWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:0.50] 7063 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 7064 ; 7065 ; BROADWELL-LABEL: test_paddsb: 7066 ; BROADWELL: # %bb.0: 7067 ; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7068 ; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 7069 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7070 ; 7071 ; SKYLAKE-SSE-LABEL: test_paddsb: 7072 ; SKYLAKE-SSE: # %bb.0: 7073 ; SKYLAKE-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] 7074 ; SKYLAKE-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] 7075 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 7076 ; 7077 ; SKYLAKE-LABEL: test_paddsb: 7078 ; SKYLAKE: # %bb.0: 7079 ; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7080 ; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7081 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7082 ; 7083 ; SKX-SSE-LABEL: test_paddsb: 7084 ; SKX-SSE: # %bb.0: 7085 ; SKX-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] 7086 ; SKX-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] 7087 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 7088 ; 7089 ; SKX-LABEL: test_paddsb: 7090 ; SKX: # %bb.0: 7091 ; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7092 ; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7093 ; SKX-NEXT: retq # sched: [7:1.00] 7094 ; 7095 ; BTVER2-SSE-LABEL: test_paddsb: 7096 ; BTVER2-SSE: # %bb.0: 7097 ; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] 7098 ; BTVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:1.00] 7099 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 7100 ; 7101 ; BTVER2-LABEL: test_paddsb: 7102 ; BTVER2: # %bb.0: 7103 ; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7104 ; BTVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 7105 ; BTVER2-NEXT: retq # sched: [4:1.00] 7106 ; 7107 ; ZNVER1-SSE-LABEL: test_paddsb: 7108 ; ZNVER1-SSE: # %bb.0: 7109 ; ZNVER1-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.25] 7110 ; ZNVER1-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [8:0.50] 7111 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 7112 ; 7113 ; ZNVER1-LABEL: test_paddsb: 7114 ; ZNVER1: # %bb.0: 7115 ; ZNVER1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7116 ; ZNVER1-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 7117 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7118 %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) 7119 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 7120 %3 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %1, <16 x i8> %2) 7121 ret <16 x i8> %3 7122 } 7123 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 7124 7125 define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 7126 ; GENERIC-LABEL: test_paddsw: 7127 ; GENERIC: # %bb.0: 7128 ; GENERIC-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] 7129 ; GENERIC-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] 7130 ; GENERIC-NEXT: retq # sched: [1:1.00] 7131 ; 7132 ; ATOM-LABEL: test_paddsw: 7133 ; ATOM: # %bb.0: 7134 ; ATOM-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] 7135 ; ATOM-NEXT: paddsw (%rdi), %xmm0 # sched: [1:1.00] 7136 ; ATOM-NEXT: nop # sched: [1:0.50] 7137 ; ATOM-NEXT: nop # sched: [1:0.50] 7138 ; ATOM-NEXT: nop # sched: [1:0.50] 7139 ; ATOM-NEXT: nop # sched: [1:0.50] 7140 ; ATOM-NEXT: retq # sched: [79:39.50] 7141 ; 7142 ; SLM-LABEL: test_paddsw: 7143 ; SLM: # %bb.0: 7144 ; SLM-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] 7145 ; SLM-NEXT: paddsw (%rdi), %xmm0 # sched: [4:1.00] 7146 ; SLM-NEXT: retq # sched: [4:1.00] 7147 ; 7148 ; SANDY-SSE-LABEL: test_paddsw: 7149 ; SANDY-SSE: # %bb.0: 7150 ; SANDY-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] 7151 ; SANDY-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] 7152 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 7153 ; 7154 ; SANDY-LABEL: test_paddsw: 7155 ; SANDY: # %bb.0: 7156 ; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7157 ; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7158 ; SANDY-NEXT: retq # sched: [1:1.00] 7159 ; 7160 ; HASWELL-SSE-LABEL: test_paddsw: 7161 ; HASWELL-SSE: # %bb.0: 7162 ; HASWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] 7163 ; HASWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] 7164 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 7165 ; 7166 ; HASWELL-LABEL: test_paddsw: 7167 ; HASWELL: # %bb.0: 7168 ; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7169 ; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7170 ; HASWELL-NEXT: retq # sched: [7:1.00] 7171 ; 7172 ; BROADWELL-SSE-LABEL: test_paddsw: 7173 ; BROADWELL-SSE: # %bb.0: 7174 ; BROADWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] 7175 ; BROADWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:0.50] 7176 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 7177 ; 7178 ; BROADWELL-LABEL: test_paddsw: 7179 ; BROADWELL: # %bb.0: 7180 ; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7181 ; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 7182 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7183 ; 7184 ; SKYLAKE-SSE-LABEL: test_paddsw: 7185 ; SKYLAKE-SSE: # %bb.0: 7186 ; SKYLAKE-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] 7187 ; SKYLAKE-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] 7188 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 7189 ; 7190 ; SKYLAKE-LABEL: test_paddsw: 7191 ; SKYLAKE: # %bb.0: 7192 ; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7193 ; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7194 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7195 ; 7196 ; SKX-SSE-LABEL: test_paddsw: 7197 ; SKX-SSE: # %bb.0: 7198 ; SKX-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] 7199 ; SKX-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] 7200 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 7201 ; 7202 ; SKX-LABEL: test_paddsw: 7203 ; SKX: # %bb.0: 7204 ; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7205 ; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7206 ; SKX-NEXT: retq # sched: [7:1.00] 7207 ; 7208 ; BTVER2-SSE-LABEL: test_paddsw: 7209 ; BTVER2-SSE: # %bb.0: 7210 ; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] 7211 ; BTVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:1.00] 7212 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 7213 ; 7214 ; BTVER2-LABEL: test_paddsw: 7215 ; BTVER2: # %bb.0: 7216 ; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7217 ; BTVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 7218 ; BTVER2-NEXT: retq # sched: [4:1.00] 7219 ; 7220 ; ZNVER1-SSE-LABEL: test_paddsw: 7221 ; ZNVER1-SSE: # %bb.0: 7222 ; ZNVER1-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.25] 7223 ; ZNVER1-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [8:0.50] 7224 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 7225 ; 7226 ; ZNVER1-LABEL: test_paddsw: 7227 ; ZNVER1: # %bb.0: 7228 ; ZNVER1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7229 ; ZNVER1-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 7230 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7231 %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) 7232 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 7233 %3 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %1, <8 x i16> %2) 7234 ret <8 x i16> %3 7235 } 7236 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 7237 7238 define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 7239 ; GENERIC-LABEL: test_paddusb: 7240 ; GENERIC: # %bb.0: 7241 ; GENERIC-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] 7242 ; GENERIC-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] 7243 ; GENERIC-NEXT: retq # sched: [1:1.00] 7244 ; 7245 ; ATOM-LABEL: test_paddusb: 7246 ; ATOM: # %bb.0: 7247 ; ATOM-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] 7248 ; ATOM-NEXT: paddusb (%rdi), %xmm0 # sched: [1:1.00] 7249 ; ATOM-NEXT: nop # sched: [1:0.50] 7250 ; ATOM-NEXT: nop # sched: [1:0.50] 7251 ; ATOM-NEXT: nop # sched: [1:0.50] 7252 ; ATOM-NEXT: nop # sched: [1:0.50] 7253 ; ATOM-NEXT: retq # sched: [79:39.50] 7254 ; 7255 ; SLM-LABEL: test_paddusb: 7256 ; SLM: # %bb.0: 7257 ; SLM-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] 7258 ; SLM-NEXT: paddusb (%rdi), %xmm0 # sched: [4:1.00] 7259 ; SLM-NEXT: retq # sched: [4:1.00] 7260 ; 7261 ; SANDY-SSE-LABEL: test_paddusb: 7262 ; SANDY-SSE: # %bb.0: 7263 ; SANDY-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] 7264 ; SANDY-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] 7265 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 7266 ; 7267 ; SANDY-LABEL: test_paddusb: 7268 ; SANDY: # %bb.0: 7269 ; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7270 ; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7271 ; SANDY-NEXT: retq # sched: [1:1.00] 7272 ; 7273 ; HASWELL-SSE-LABEL: test_paddusb: 7274 ; HASWELL-SSE: # %bb.0: 7275 ; HASWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] 7276 ; HASWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] 7277 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 7278 ; 7279 ; HASWELL-LABEL: test_paddusb: 7280 ; HASWELL: # %bb.0: 7281 ; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7282 ; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7283 ; HASWELL-NEXT: retq # sched: [7:1.00] 7284 ; 7285 ; BROADWELL-SSE-LABEL: test_paddusb: 7286 ; BROADWELL-SSE: # %bb.0: 7287 ; BROADWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] 7288 ; BROADWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:0.50] 7289 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 7290 ; 7291 ; BROADWELL-LABEL: test_paddusb: 7292 ; BROADWELL: # %bb.0: 7293 ; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7294 ; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 7295 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7296 ; 7297 ; SKYLAKE-SSE-LABEL: test_paddusb: 7298 ; SKYLAKE-SSE: # %bb.0: 7299 ; SKYLAKE-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] 7300 ; SKYLAKE-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] 7301 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 7302 ; 7303 ; SKYLAKE-LABEL: test_paddusb: 7304 ; SKYLAKE: # %bb.0: 7305 ; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7306 ; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7307 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7308 ; 7309 ; SKX-SSE-LABEL: test_paddusb: 7310 ; SKX-SSE: # %bb.0: 7311 ; SKX-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] 7312 ; SKX-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] 7313 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 7314 ; 7315 ; SKX-LABEL: test_paddusb: 7316 ; SKX: # %bb.0: 7317 ; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7318 ; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7319 ; SKX-NEXT: retq # sched: [7:1.00] 7320 ; 7321 ; BTVER2-SSE-LABEL: test_paddusb: 7322 ; BTVER2-SSE: # %bb.0: 7323 ; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] 7324 ; BTVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:1.00] 7325 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 7326 ; 7327 ; BTVER2-LABEL: test_paddusb: 7328 ; BTVER2: # %bb.0: 7329 ; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7330 ; BTVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 7331 ; BTVER2-NEXT: retq # sched: [4:1.00] 7332 ; 7333 ; ZNVER1-SSE-LABEL: test_paddusb: 7334 ; ZNVER1-SSE: # %bb.0: 7335 ; ZNVER1-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.25] 7336 ; ZNVER1-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [8:0.50] 7337 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 7338 ; 7339 ; ZNVER1-LABEL: test_paddusb: 7340 ; ZNVER1: # %bb.0: 7341 ; ZNVER1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7342 ; ZNVER1-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 7343 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7344 %1 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) 7345 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 7346 %3 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %1, <16 x i8> %2) 7347 ret <16 x i8> %3 7348 } 7349 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 7350 7351 define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 7352 ; GENERIC-LABEL: test_paddusw: 7353 ; GENERIC: # %bb.0: 7354 ; GENERIC-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] 7355 ; GENERIC-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] 7356 ; GENERIC-NEXT: retq # sched: [1:1.00] 7357 ; 7358 ; ATOM-LABEL: test_paddusw: 7359 ; ATOM: # %bb.0: 7360 ; ATOM-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] 7361 ; ATOM-NEXT: paddusw (%rdi), %xmm0 # sched: [1:1.00] 7362 ; ATOM-NEXT: nop # sched: [1:0.50] 7363 ; ATOM-NEXT: nop # sched: [1:0.50] 7364 ; ATOM-NEXT: nop # sched: [1:0.50] 7365 ; ATOM-NEXT: nop # sched: [1:0.50] 7366 ; ATOM-NEXT: retq # sched: [79:39.50] 7367 ; 7368 ; SLM-LABEL: test_paddusw: 7369 ; SLM: # %bb.0: 7370 ; SLM-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] 7371 ; SLM-NEXT: paddusw (%rdi), %xmm0 # sched: [4:1.00] 7372 ; SLM-NEXT: retq # sched: [4:1.00] 7373 ; 7374 ; SANDY-SSE-LABEL: test_paddusw: 7375 ; SANDY-SSE: # %bb.0: 7376 ; SANDY-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] 7377 ; SANDY-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] 7378 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 7379 ; 7380 ; SANDY-LABEL: test_paddusw: 7381 ; SANDY: # %bb.0: 7382 ; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7383 ; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7384 ; SANDY-NEXT: retq # sched: [1:1.00] 7385 ; 7386 ; HASWELL-SSE-LABEL: test_paddusw: 7387 ; HASWELL-SSE: # %bb.0: 7388 ; HASWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] 7389 ; HASWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] 7390 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 7391 ; 7392 ; HASWELL-LABEL: test_paddusw: 7393 ; HASWELL: # %bb.0: 7394 ; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7395 ; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7396 ; HASWELL-NEXT: retq # sched: [7:1.00] 7397 ; 7398 ; BROADWELL-SSE-LABEL: test_paddusw: 7399 ; BROADWELL-SSE: # %bb.0: 7400 ; BROADWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] 7401 ; BROADWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:0.50] 7402 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 7403 ; 7404 ; BROADWELL-LABEL: test_paddusw: 7405 ; BROADWELL: # %bb.0: 7406 ; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7407 ; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 7408 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7409 ; 7410 ; SKYLAKE-SSE-LABEL: test_paddusw: 7411 ; SKYLAKE-SSE: # %bb.0: 7412 ; SKYLAKE-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] 7413 ; SKYLAKE-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] 7414 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 7415 ; 7416 ; SKYLAKE-LABEL: test_paddusw: 7417 ; SKYLAKE: # %bb.0: 7418 ; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7419 ; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7420 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7421 ; 7422 ; SKX-SSE-LABEL: test_paddusw: 7423 ; SKX-SSE: # %bb.0: 7424 ; SKX-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] 7425 ; SKX-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] 7426 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 7427 ; 7428 ; SKX-LABEL: test_paddusw: 7429 ; SKX: # %bb.0: 7430 ; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7431 ; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7432 ; SKX-NEXT: retq # sched: [7:1.00] 7433 ; 7434 ; BTVER2-SSE-LABEL: test_paddusw: 7435 ; BTVER2-SSE: # %bb.0: 7436 ; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] 7437 ; BTVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:1.00] 7438 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 7439 ; 7440 ; BTVER2-LABEL: test_paddusw: 7441 ; BTVER2: # %bb.0: 7442 ; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7443 ; BTVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 7444 ; BTVER2-NEXT: retq # sched: [4:1.00] 7445 ; 7446 ; ZNVER1-SSE-LABEL: test_paddusw: 7447 ; ZNVER1-SSE: # %bb.0: 7448 ; ZNVER1-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.25] 7449 ; ZNVER1-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [8:0.50] 7450 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 7451 ; 7452 ; ZNVER1-LABEL: test_paddusw: 7453 ; ZNVER1: # %bb.0: 7454 ; ZNVER1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7455 ; ZNVER1-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 7456 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7457 %1 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) 7458 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 7459 %3 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %1, <8 x i16> %2) 7460 ret <8 x i16> %3 7461 } 7462 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 7463 7464 define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 7465 ; GENERIC-LABEL: test_paddw: 7466 ; GENERIC: # %bb.0: 7467 ; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 7468 ; GENERIC-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] 7469 ; GENERIC-NEXT: retq # sched: [1:1.00] 7470 ; 7471 ; ATOM-LABEL: test_paddw: 7472 ; ATOM: # %bb.0: 7473 ; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 7474 ; ATOM-NEXT: paddw (%rdi), %xmm0 # sched: [1:1.00] 7475 ; ATOM-NEXT: nop # sched: [1:0.50] 7476 ; ATOM-NEXT: nop # sched: [1:0.50] 7477 ; ATOM-NEXT: nop # sched: [1:0.50] 7478 ; ATOM-NEXT: nop # sched: [1:0.50] 7479 ; ATOM-NEXT: retq # sched: [79:39.50] 7480 ; 7481 ; SLM-LABEL: test_paddw: 7482 ; SLM: # %bb.0: 7483 ; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 7484 ; SLM-NEXT: paddw (%rdi), %xmm0 # sched: [4:1.00] 7485 ; SLM-NEXT: retq # sched: [4:1.00] 7486 ; 7487 ; SANDY-SSE-LABEL: test_paddw: 7488 ; SANDY-SSE: # %bb.0: 7489 ; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 7490 ; SANDY-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] 7491 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 7492 ; 7493 ; SANDY-LABEL: test_paddw: 7494 ; SANDY: # %bb.0: 7495 ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7496 ; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7497 ; SANDY-NEXT: retq # sched: [1:1.00] 7498 ; 7499 ; HASWELL-SSE-LABEL: test_paddw: 7500 ; HASWELL-SSE: # %bb.0: 7501 ; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 7502 ; HASWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] 7503 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 7504 ; 7505 ; HASWELL-LABEL: test_paddw: 7506 ; HASWELL: # %bb.0: 7507 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7508 ; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7509 ; HASWELL-NEXT: retq # sched: [7:1.00] 7510 ; 7511 ; BROADWELL-SSE-LABEL: test_paddw: 7512 ; BROADWELL-SSE: # %bb.0: 7513 ; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 7514 ; BROADWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:0.50] 7515 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 7516 ; 7517 ; BROADWELL-LABEL: test_paddw: 7518 ; BROADWELL: # %bb.0: 7519 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7520 ; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 7521 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7522 ; 7523 ; SKYLAKE-SSE-LABEL: test_paddw: 7524 ; SKYLAKE-SSE: # %bb.0: 7525 ; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 7526 ; SKYLAKE-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] 7527 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 7528 ; 7529 ; SKYLAKE-LABEL: test_paddw: 7530 ; SKYLAKE: # %bb.0: 7531 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7532 ; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7533 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7534 ; 7535 ; SKX-SSE-LABEL: test_paddw: 7536 ; SKX-SSE: # %bb.0: 7537 ; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 7538 ; SKX-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] 7539 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 7540 ; 7541 ; SKX-LABEL: test_paddw: 7542 ; SKX: # %bb.0: 7543 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7544 ; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7545 ; SKX-NEXT: retq # sched: [7:1.00] 7546 ; 7547 ; BTVER2-SSE-LABEL: test_paddw: 7548 ; BTVER2-SSE: # %bb.0: 7549 ; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 7550 ; BTVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:1.00] 7551 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 7552 ; 7553 ; BTVER2-LABEL: test_paddw: 7554 ; BTVER2: # %bb.0: 7555 ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7556 ; BTVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 7557 ; BTVER2-NEXT: retq # sched: [4:1.00] 7558 ; 7559 ; ZNVER1-SSE-LABEL: test_paddw: 7560 ; ZNVER1-SSE: # %bb.0: 7561 ; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] 7562 ; ZNVER1-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [8:0.50] 7563 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 7564 ; 7565 ; ZNVER1-LABEL: test_paddw: 7566 ; ZNVER1: # %bb.0: 7567 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7568 ; ZNVER1-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 7569 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7570 %1 = add <8 x i16> %a0, %a1 7571 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 7572 %3 = add <8 x i16> %1, %2 7573 ret <8 x i16> %3 7574 } 7575 7576 define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 7577 ; GENERIC-LABEL: test_pand: 7578 ; GENERIC: # %bb.0: 7579 ; GENERIC-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] 7580 ; GENERIC-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] 7581 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 7582 ; GENERIC-NEXT: retq # sched: [1:1.00] 7583 ; 7584 ; ATOM-LABEL: test_pand: 7585 ; ATOM: # %bb.0: 7586 ; ATOM-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] 7587 ; ATOM-NEXT: pand (%rdi), %xmm0 # sched: [1:1.00] 7588 ; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] 7589 ; ATOM-NEXT: retq # sched: [79:39.50] 7590 ; 7591 ; SLM-LABEL: test_pand: 7592 ; SLM: # %bb.0: 7593 ; SLM-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] 7594 ; SLM-NEXT: pand (%rdi), %xmm0 # sched: [4:1.00] 7595 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 7596 ; SLM-NEXT: retq # sched: [4:1.00] 7597 ; 7598 ; SANDY-SSE-LABEL: test_pand: 7599 ; SANDY-SSE: # %bb.0: 7600 ; SANDY-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] 7601 ; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] 7602 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 7603 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 7604 ; 7605 ; SANDY-LABEL: test_pand: 7606 ; SANDY: # %bb.0: 7607 ; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7608 ; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7609 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7610 ; SANDY-NEXT: retq # sched: [1:1.00] 7611 ; 7612 ; HASWELL-SSE-LABEL: test_pand: 7613 ; HASWELL-SSE: # %bb.0: 7614 ; HASWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] 7615 ; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] 7616 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 7617 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 7618 ; 7619 ; HASWELL-LABEL: test_pand: 7620 ; HASWELL: # %bb.0: 7621 ; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7622 ; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7623 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7624 ; HASWELL-NEXT: retq # sched: [7:1.00] 7625 ; 7626 ; BROADWELL-SSE-LABEL: test_pand: 7627 ; BROADWELL-SSE: # %bb.0: 7628 ; BROADWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] 7629 ; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50] 7630 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 7631 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 7632 ; 7633 ; BROADWELL-LABEL: test_pand: 7634 ; BROADWELL: # %bb.0: 7635 ; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7636 ; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 7637 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7638 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7639 ; 7640 ; SKYLAKE-SSE-LABEL: test_pand: 7641 ; SKYLAKE-SSE: # %bb.0: 7642 ; SKYLAKE-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] 7643 ; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] 7644 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 7645 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 7646 ; 7647 ; SKYLAKE-LABEL: test_pand: 7648 ; SKYLAKE: # %bb.0: 7649 ; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7650 ; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7651 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7652 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7653 ; 7654 ; SKX-SSE-LABEL: test_pand: 7655 ; SKX-SSE: # %bb.0: 7656 ; SKX-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] 7657 ; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] 7658 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 7659 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 7660 ; 7661 ; SKX-LABEL: test_pand: 7662 ; SKX: # %bb.0: 7663 ; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7664 ; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7665 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7666 ; SKX-NEXT: retq # sched: [7:1.00] 7667 ; 7668 ; BTVER2-SSE-LABEL: test_pand: 7669 ; BTVER2-SSE: # %bb.0: 7670 ; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] 7671 ; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00] 7672 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 7673 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 7674 ; 7675 ; BTVER2-LABEL: test_pand: 7676 ; BTVER2: # %bb.0: 7677 ; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7678 ; BTVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 7679 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7680 ; BTVER2-NEXT: retq # sched: [4:1.00] 7681 ; 7682 ; ZNVER1-SSE-LABEL: test_pand: 7683 ; ZNVER1-SSE: # %bb.0: 7684 ; ZNVER1-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.25] 7685 ; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50] 7686 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 7687 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 7688 ; 7689 ; ZNVER1-LABEL: test_pand: 7690 ; ZNVER1: # %bb.0: 7691 ; ZNVER1-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7692 ; ZNVER1-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 7693 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7694 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7695 %1 = and <2 x i64> %a0, %a1 7696 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 7697 %3 = and <2 x i64> %1, %2 7698 %4 = add <2 x i64> %3, %a1 7699 ret <2 x i64> %4 7700 } 7701 7702 define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 7703 ; GENERIC-LABEL: test_pandn: 7704 ; GENERIC: # %bb.0: 7705 ; GENERIC-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] 7706 ; GENERIC-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] 7707 ; GENERIC-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] 7708 ; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 7709 ; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 7710 ; GENERIC-NEXT: retq # sched: [1:1.00] 7711 ; 7712 ; ATOM-LABEL: test_pandn: 7713 ; ATOM: # %bb.0: 7714 ; ATOM-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] 7715 ; ATOM-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] 7716 ; ATOM-NEXT: pandn (%rdi), %xmm1 # sched: [1:1.00] 7717 ; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00] 7718 ; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 7719 ; ATOM-NEXT: retq # sched: [79:39.50] 7720 ; 7721 ; SLM-LABEL: test_pandn: 7722 ; SLM: # %bb.0: 7723 ; SLM-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] 7724 ; SLM-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] 7725 ; SLM-NEXT: pandn (%rdi), %xmm1 # sched: [4:1.00] 7726 ; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 7727 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 7728 ; SLM-NEXT: retq # sched: [4:1.00] 7729 ; 7730 ; SANDY-SSE-LABEL: test_pandn: 7731 ; SANDY-SSE: # %bb.0: 7732 ; SANDY-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] 7733 ; SANDY-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] 7734 ; SANDY-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] 7735 ; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 7736 ; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 7737 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 7738 ; 7739 ; SANDY-LABEL: test_pandn: 7740 ; SANDY: # %bb.0: 7741 ; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7742 ; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] 7743 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7744 ; SANDY-NEXT: retq # sched: [1:1.00] 7745 ; 7746 ; HASWELL-SSE-LABEL: test_pandn: 7747 ; HASWELL-SSE: # %bb.0: 7748 ; HASWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] 7749 ; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] 7750 ; HASWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] 7751 ; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 7752 ; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 7753 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 7754 ; 7755 ; HASWELL-LABEL: test_pandn: 7756 ; HASWELL: # %bb.0: 7757 ; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7758 ; HASWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] 7759 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7760 ; HASWELL-NEXT: retq # sched: [7:1.00] 7761 ; 7762 ; BROADWELL-SSE-LABEL: test_pandn: 7763 ; BROADWELL-SSE: # %bb.0: 7764 ; BROADWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] 7765 ; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] 7766 ; BROADWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:0.50] 7767 ; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 7768 ; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 7769 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 7770 ; 7771 ; BROADWELL-LABEL: test_pandn: 7772 ; BROADWELL: # %bb.0: 7773 ; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7774 ; BROADWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:0.50] 7775 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7776 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7777 ; 7778 ; SKYLAKE-SSE-LABEL: test_pandn: 7779 ; SKYLAKE-SSE: # %bb.0: 7780 ; SKYLAKE-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] 7781 ; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] 7782 ; SKYLAKE-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] 7783 ; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] 7784 ; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 7785 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 7786 ; 7787 ; SKYLAKE-LABEL: test_pandn: 7788 ; SKYLAKE: # %bb.0: 7789 ; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7790 ; SKYLAKE-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] 7791 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7792 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7793 ; 7794 ; SKX-SSE-LABEL: test_pandn: 7795 ; SKX-SSE: # %bb.0: 7796 ; SKX-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] 7797 ; SKX-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] 7798 ; SKX-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] 7799 ; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] 7800 ; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 7801 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 7802 ; 7803 ; SKX-LABEL: test_pandn: 7804 ; SKX: # %bb.0: 7805 ; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7806 ; SKX-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] 7807 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 7808 ; SKX-NEXT: retq # sched: [7:1.00] 7809 ; 7810 ; BTVER2-SSE-LABEL: test_pandn: 7811 ; BTVER2-SSE: # %bb.0: 7812 ; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] 7813 ; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] 7814 ; BTVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:1.00] 7815 ; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] 7816 ; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 7817 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 7818 ; 7819 ; BTVER2-LABEL: test_pandn: 7820 ; BTVER2: # %bb.0: 7821 ; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7822 ; BTVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:1.00] 7823 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7824 ; BTVER2-NEXT: retq # sched: [4:1.00] 7825 ; 7826 ; ZNVER1-SSE-LABEL: test_pandn: 7827 ; ZNVER1-SSE: # %bb.0: 7828 ; ZNVER1-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.25] 7829 ; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.25] 7830 ; ZNVER1-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [8:0.50] 7831 ; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] 7832 ; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25] 7833 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 7834 ; 7835 ; ZNVER1-LABEL: test_pandn: 7836 ; ZNVER1: # %bb.0: 7837 ; ZNVER1-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7838 ; ZNVER1-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [8:0.50] 7839 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7840 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7841 %1 = xor <2 x i64> %a0, <i64 -1, i64 -1> 7842 %2 = and <2 x i64> %a1, %1 7843 %3 = load <2 x i64>, <2 x i64> *%a2, align 16 7844 %4 = xor <2 x i64> %2, <i64 -1, i64 -1> 7845 %5 = and <2 x i64> %3, %4 7846 %6 = add <2 x i64> %2, %5 7847 ret <2 x i64> %6 7848 } 7849 7850 define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 7851 ; GENERIC-LABEL: test_pavgb: 7852 ; GENERIC: # %bb.0: 7853 ; GENERIC-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] 7854 ; GENERIC-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] 7855 ; GENERIC-NEXT: retq # sched: [1:1.00] 7856 ; 7857 ; ATOM-LABEL: test_pavgb: 7858 ; ATOM: # %bb.0: 7859 ; ATOM-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] 7860 ; ATOM-NEXT: pavgb (%rdi), %xmm0 # sched: [1:1.00] 7861 ; ATOM-NEXT: nop # sched: [1:0.50] 7862 ; ATOM-NEXT: nop # sched: [1:0.50] 7863 ; ATOM-NEXT: nop # sched: [1:0.50] 7864 ; ATOM-NEXT: nop # sched: [1:0.50] 7865 ; ATOM-NEXT: retq # sched: [79:39.50] 7866 ; 7867 ; SLM-LABEL: test_pavgb: 7868 ; SLM: # %bb.0: 7869 ; SLM-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] 7870 ; SLM-NEXT: pavgb (%rdi), %xmm0 # sched: [4:1.00] 7871 ; SLM-NEXT: retq # sched: [4:1.00] 7872 ; 7873 ; SANDY-SSE-LABEL: test_pavgb: 7874 ; SANDY-SSE: # %bb.0: 7875 ; SANDY-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] 7876 ; SANDY-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] 7877 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 7878 ; 7879 ; SANDY-LABEL: test_pavgb: 7880 ; SANDY: # %bb.0: 7881 ; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7882 ; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7883 ; SANDY-NEXT: retq # sched: [1:1.00] 7884 ; 7885 ; HASWELL-SSE-LABEL: test_pavgb: 7886 ; HASWELL-SSE: # %bb.0: 7887 ; HASWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] 7888 ; HASWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] 7889 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 7890 ; 7891 ; HASWELL-LABEL: test_pavgb: 7892 ; HASWELL: # %bb.0: 7893 ; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7894 ; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7895 ; HASWELL-NEXT: retq # sched: [7:1.00] 7896 ; 7897 ; BROADWELL-SSE-LABEL: test_pavgb: 7898 ; BROADWELL-SSE: # %bb.0: 7899 ; BROADWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] 7900 ; BROADWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:0.50] 7901 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 7902 ; 7903 ; BROADWELL-LABEL: test_pavgb: 7904 ; BROADWELL: # %bb.0: 7905 ; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7906 ; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 7907 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7908 ; 7909 ; SKYLAKE-SSE-LABEL: test_pavgb: 7910 ; SKYLAKE-SSE: # %bb.0: 7911 ; SKYLAKE-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] 7912 ; SKYLAKE-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] 7913 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 7914 ; 7915 ; SKYLAKE-LABEL: test_pavgb: 7916 ; SKYLAKE: # %bb.0: 7917 ; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7918 ; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7919 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7920 ; 7921 ; SKX-SSE-LABEL: test_pavgb: 7922 ; SKX-SSE: # %bb.0: 7923 ; SKX-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] 7924 ; SKX-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] 7925 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 7926 ; 7927 ; SKX-LABEL: test_pavgb: 7928 ; SKX: # %bb.0: 7929 ; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7930 ; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 7931 ; SKX-NEXT: retq # sched: [7:1.00] 7932 ; 7933 ; BTVER2-SSE-LABEL: test_pavgb: 7934 ; BTVER2-SSE: # %bb.0: 7935 ; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] 7936 ; BTVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:1.00] 7937 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 7938 ; 7939 ; BTVER2-LABEL: test_pavgb: 7940 ; BTVER2: # %bb.0: 7941 ; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 7942 ; BTVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 7943 ; BTVER2-NEXT: retq # sched: [4:1.00] 7944 ; 7945 ; ZNVER1-SSE-LABEL: test_pavgb: 7946 ; ZNVER1-SSE: # %bb.0: 7947 ; ZNVER1-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.25] 7948 ; ZNVER1-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [8:0.50] 7949 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 7950 ; 7951 ; ZNVER1-LABEL: test_pavgb: 7952 ; ZNVER1: # %bb.0: 7953 ; ZNVER1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 7954 ; ZNVER1-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 7955 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7956 %1 = zext <16 x i8> %a0 to <16 x i16> 7957 %2 = zext <16 x i8> %a1 to <16 x i16> 7958 %3 = add <16 x i16> %1, %2 7959 %4 = add <16 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 7960 %5 = lshr <16 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 7961 %6 = trunc <16 x i16> %5 to <16 x i8> 7962 %7 = load <16 x i8>, <16 x i8> *%a2, align 16 7963 %8 = zext <16 x i8> %6 to <16 x i16> 7964 %9 = zext <16 x i8> %7 to <16 x i16> 7965 %10 = add <16 x i16> %8, %9 7966 %11 = add <16 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 7967 %12 = lshr <16 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 7968 %13 = trunc <16 x i16> %12 to <16 x i8> 7969 ret <16 x i8> %13 7970 } 7971 7972 define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 7973 ; GENERIC-LABEL: test_pavgw: 7974 ; GENERIC: # %bb.0: 7975 ; GENERIC-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] 7976 ; GENERIC-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] 7977 ; GENERIC-NEXT: retq # sched: [1:1.00] 7978 ; 7979 ; ATOM-LABEL: test_pavgw: 7980 ; ATOM: # %bb.0: 7981 ; ATOM-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] 7982 ; ATOM-NEXT: pavgw (%rdi), %xmm0 # sched: [1:1.00] 7983 ; ATOM-NEXT: nop # sched: [1:0.50] 7984 ; ATOM-NEXT: nop # sched: [1:0.50] 7985 ; ATOM-NEXT: nop # sched: [1:0.50] 7986 ; ATOM-NEXT: nop # sched: [1:0.50] 7987 ; ATOM-NEXT: retq # sched: [79:39.50] 7988 ; 7989 ; SLM-LABEL: test_pavgw: 7990 ; SLM: # %bb.0: 7991 ; SLM-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] 7992 ; SLM-NEXT: pavgw (%rdi), %xmm0 # sched: [4:1.00] 7993 ; SLM-NEXT: retq # sched: [4:1.00] 7994 ; 7995 ; SANDY-SSE-LABEL: test_pavgw: 7996 ; SANDY-SSE: # %bb.0: 7997 ; SANDY-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] 7998 ; SANDY-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] 7999 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 8000 ; 8001 ; SANDY-LABEL: test_pavgw: 8002 ; SANDY: # %bb.0: 8003 ; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 8004 ; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8005 ; SANDY-NEXT: retq # sched: [1:1.00] 8006 ; 8007 ; HASWELL-SSE-LABEL: test_pavgw: 8008 ; HASWELL-SSE: # %bb.0: 8009 ; HASWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] 8010 ; HASWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] 8011 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 8012 ; 8013 ; HASWELL-LABEL: test_pavgw: 8014 ; HASWELL: # %bb.0: 8015 ; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 8016 ; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8017 ; HASWELL-NEXT: retq # sched: [7:1.00] 8018 ; 8019 ; BROADWELL-SSE-LABEL: test_pavgw: 8020 ; BROADWELL-SSE: # %bb.0: 8021 ; BROADWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] 8022 ; BROADWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:0.50] 8023 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 8024 ; 8025 ; BROADWELL-LABEL: test_pavgw: 8026 ; BROADWELL: # %bb.0: 8027 ; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 8028 ; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 8029 ; BROADWELL-NEXT: retq # sched: [7:1.00] 8030 ; 8031 ; SKYLAKE-SSE-LABEL: test_pavgw: 8032 ; SKYLAKE-SSE: # %bb.0: 8033 ; SKYLAKE-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] 8034 ; SKYLAKE-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] 8035 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 8036 ; 8037 ; SKYLAKE-LABEL: test_pavgw: 8038 ; SKYLAKE: # %bb.0: 8039 ; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 8040 ; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8041 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 8042 ; 8043 ; SKX-SSE-LABEL: test_pavgw: 8044 ; SKX-SSE: # %bb.0: 8045 ; SKX-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] 8046 ; SKX-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] 8047 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 8048 ; 8049 ; SKX-LABEL: test_pavgw: 8050 ; SKX: # %bb.0: 8051 ; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 8052 ; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8053 ; SKX-NEXT: retq # sched: [7:1.00] 8054 ; 8055 ; BTVER2-SSE-LABEL: test_pavgw: 8056 ; BTVER2-SSE: # %bb.0: 8057 ; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] 8058 ; BTVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:1.00] 8059 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 8060 ; 8061 ; BTVER2-LABEL: test_pavgw: 8062 ; BTVER2: # %bb.0: 8063 ; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 8064 ; BTVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 8065 ; BTVER2-NEXT: retq # sched: [4:1.00] 8066 ; 8067 ; ZNVER1-SSE-LABEL: test_pavgw: 8068 ; ZNVER1-SSE: # %bb.0: 8069 ; ZNVER1-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.25] 8070 ; ZNVER1-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [8:0.50] 8071 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 8072 ; 8073 ; ZNVER1-LABEL: test_pavgw: 8074 ; ZNVER1: # %bb.0: 8075 ; ZNVER1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 8076 ; ZNVER1-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 8077 ; ZNVER1-NEXT: retq # sched: [1:0.50] 8078 %1 = zext <8 x i16> %a0 to <8 x i32> 8079 %2 = zext <8 x i16> %a1 to <8 x i32> 8080 %3 = add <8 x i32> %1, %2 8081 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 8082 %5 = lshr <8 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 8083 %6 = trunc <8 x i32> %5 to <8 x i16> 8084 %7 = load <8 x i16>, <8 x i16> *%a2, align 16 8085 %8 = zext <8 x i16> %6 to <8 x i32> 8086 %9 = zext <8 x i16> %7 to <8 x i32> 8087 %10 = add <8 x i32> %8, %9 8088 %11 = add <8 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 8089 %12 = lshr <8 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 8090 %13 = trunc <8 x i32> %12 to <8 x i16> 8091 ret <8 x i16> %13 8092 } 8093 8094 define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 8095 ; GENERIC-LABEL: test_pcmpeqb: 8096 ; GENERIC: # %bb.0: 8097 ; GENERIC-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] 8098 ; GENERIC-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] 8099 ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8100 ; GENERIC-NEXT: retq # sched: [1:1.00] 8101 ; 8102 ; ATOM-LABEL: test_pcmpeqb: 8103 ; ATOM: # %bb.0: 8104 ; ATOM-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] 8105 ; ATOM-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [1:1.00] 8106 ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 8107 ; ATOM-NEXT: nop # sched: [1:0.50] 8108 ; ATOM-NEXT: nop # sched: [1:0.50] 8109 ; ATOM-NEXT: retq # sched: [79:39.50] 8110 ; 8111 ; SLM-LABEL: test_pcmpeqb: 8112 ; SLM: # %bb.0: 8113 ; SLM-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] 8114 ; SLM-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [4:1.00] 8115 ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 8116 ; SLM-NEXT: retq # sched: [4:1.00] 8117 ; 8118 ; SANDY-SSE-LABEL: test_pcmpeqb: 8119 ; SANDY-SSE: # %bb.0: 8120 ; SANDY-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] 8121 ; SANDY-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] 8122 ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8123 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 8124 ; 8125 ; SANDY-LABEL: test_pcmpeqb: 8126 ; SANDY: # %bb.0: 8127 ; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8128 ; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8129 ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8130 ; SANDY-NEXT: retq # sched: [1:1.00] 8131 ; 8132 ; HASWELL-SSE-LABEL: test_pcmpeqb: 8133 ; HASWELL-SSE: # %bb.0: 8134 ; HASWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] 8135 ; HASWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] 8136 ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8137 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 8138 ; 8139 ; HASWELL-LABEL: test_pcmpeqb: 8140 ; HASWELL: # %bb.0: 8141 ; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8142 ; HASWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8143 ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8144 ; HASWELL-NEXT: retq # sched: [7:1.00] 8145 ; 8146 ; BROADWELL-SSE-LABEL: test_pcmpeqb: 8147 ; BROADWELL-SSE: # %bb.0: 8148 ; BROADWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] 8149 ; BROADWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:0.50] 8150 ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8151 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 8152 ; 8153 ; BROADWELL-LABEL: test_pcmpeqb: 8154 ; BROADWELL: # %bb.0: 8155 ; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8156 ; BROADWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 8157 ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8158 ; BROADWELL-NEXT: retq # sched: [7:1.00] 8159 ; 8160 ; SKYLAKE-SSE-LABEL: test_pcmpeqb: 8161 ; SKYLAKE-SSE: # %bb.0: 8162 ; SKYLAKE-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] 8163 ; SKYLAKE-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] 8164 ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8165 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 8166 ; 8167 ; SKYLAKE-LABEL: test_pcmpeqb: 8168 ; SKYLAKE: # %bb.0: 8169 ; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8170 ; SKYLAKE-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8171 ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8172 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 8173 ; 8174 ; SKX-SSE-LABEL: test_pcmpeqb: 8175 ; SKX-SSE: # %bb.0: 8176 ; SKX-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] 8177 ; SKX-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] 8178 ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8179 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 8180 ; 8181 ; SKX-LABEL: test_pcmpeqb: 8182 ; SKX: # %bb.0: 8183 ; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8184 ; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8185 ; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8186 ; SKX-NEXT: retq # sched: [7:1.00] 8187 ; 8188 ; BTVER2-SSE-LABEL: test_pcmpeqb: 8189 ; BTVER2-SSE: # %bb.0: 8190 ; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] 8191 ; BTVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:1.00] 8192 ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 8193 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 8194 ; 8195 ; BTVER2-LABEL: test_pcmpeqb: 8196 ; BTVER2: # %bb.0: 8197 ; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8198 ; BTVER2-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 8199 ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 8200 ; BTVER2-NEXT: retq # sched: [4:1.00] 8201 ; 8202 ; ZNVER1-SSE-LABEL: test_pcmpeqb: 8203 ; ZNVER1-SSE: # %bb.0: 8204 ; ZNVER1-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.25] 8205 ; ZNVER1-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [8:0.50] 8206 ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] 8207 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 8208 ; 8209 ; ZNVER1-LABEL: test_pcmpeqb: 8210 ; ZNVER1: # %bb.0: 8211 ; ZNVER1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] 8212 ; ZNVER1-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 8213 ; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] 8214 ; ZNVER1-NEXT: retq # sched: [1:0.50] 8215 %1 = icmp eq <16 x i8> %a0, %a1 8216 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 8217 %3 = icmp eq <16 x i8> %a0, %2 8218 %4 = or <16 x i1> %1, %3 8219 %5 = sext <16 x i1> %4 to <16 x i8> 8220 ret <16 x i8> %5 8221 } 8222 8223 define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 8224 ; GENERIC-LABEL: test_pcmpeqd: 8225 ; GENERIC: # %bb.0: 8226 ; GENERIC-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] 8227 ; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8228 ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8229 ; GENERIC-NEXT: retq # sched: [1:1.00] 8230 ; 8231 ; ATOM-LABEL: test_pcmpeqd: 8232 ; ATOM: # %bb.0: 8233 ; ATOM-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] 8234 ; ATOM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [1:1.00] 8235 ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 8236 ; ATOM-NEXT: nop # sched: [1:0.50] 8237 ; ATOM-NEXT: nop # sched: [1:0.50] 8238 ; ATOM-NEXT: retq # sched: [79:39.50] 8239 ; 8240 ; SLM-LABEL: test_pcmpeqd: 8241 ; SLM: # %bb.0: 8242 ; SLM-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] 8243 ; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00] 8244 ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 8245 ; SLM-NEXT: retq # sched: [4:1.00] 8246 ; 8247 ; SANDY-SSE-LABEL: test_pcmpeqd: 8248 ; SANDY-SSE: # %bb.0: 8249 ; SANDY-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] 8250 ; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8251 ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8252 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 8253 ; 8254 ; SANDY-LABEL: test_pcmpeqd: 8255 ; SANDY: # %bb.0: 8256 ; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8257 ; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8258 ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8259 ; SANDY-NEXT: retq # sched: [1:1.00] 8260 ; 8261 ; HASWELL-SSE-LABEL: test_pcmpeqd: 8262 ; HASWELL-SSE: # %bb.0: 8263 ; HASWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] 8264 ; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8265 ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8266 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 8267 ; 8268 ; HASWELL-LABEL: test_pcmpeqd: 8269 ; HASWELL: # %bb.0: 8270 ; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8271 ; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8272 ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8273 ; HASWELL-NEXT: retq # sched: [7:1.00] 8274 ; 8275 ; BROADWELL-SSE-LABEL: test_pcmpeqd: 8276 ; BROADWELL-SSE: # %bb.0: 8277 ; BROADWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] 8278 ; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] 8279 ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8280 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 8281 ; 8282 ; BROADWELL-LABEL: test_pcmpeqd: 8283 ; BROADWELL: # %bb.0: 8284 ; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8285 ; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 8286 ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8287 ; BROADWELL-NEXT: retq # sched: [7:1.00] 8288 ; 8289 ; SKYLAKE-SSE-LABEL: test_pcmpeqd: 8290 ; SKYLAKE-SSE: # %bb.0: 8291 ; SKYLAKE-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] 8292 ; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8293 ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8294 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 8295 ; 8296 ; SKYLAKE-LABEL: test_pcmpeqd: 8297 ; SKYLAKE: # %bb.0: 8298 ; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8299 ; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8300 ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8301 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 8302 ; 8303 ; SKX-SSE-LABEL: test_pcmpeqd: 8304 ; SKX-SSE: # %bb.0: 8305 ; SKX-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] 8306 ; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8307 ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8308 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 8309 ; 8310 ; SKX-LABEL: test_pcmpeqd: 8311 ; SKX: # %bb.0: 8312 ; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8313 ; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8314 ; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8315 ; SKX-NEXT: retq # sched: [7:1.00] 8316 ; 8317 ; BTVER2-SSE-LABEL: test_pcmpeqd: 8318 ; BTVER2-SSE: # %bb.0: 8319 ; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] 8320 ; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] 8321 ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 8322 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 8323 ; 8324 ; BTVER2-LABEL: test_pcmpeqd: 8325 ; BTVER2: # %bb.0: 8326 ; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8327 ; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 8328 ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 8329 ; BTVER2-NEXT: retq # sched: [4:1.00] 8330 ; 8331 ; ZNVER1-SSE-LABEL: test_pcmpeqd: 8332 ; ZNVER1-SSE: # %bb.0: 8333 ; ZNVER1-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.25] 8334 ; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] 8335 ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] 8336 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 8337 ; 8338 ; ZNVER1-LABEL: test_pcmpeqd: 8339 ; ZNVER1: # %bb.0: 8340 ; ZNVER1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] 8341 ; ZNVER1-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 8342 ; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] 8343 ; ZNVER1-NEXT: retq # sched: [1:0.50] 8344 %1 = icmp eq <4 x i32> %a0, %a1 8345 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 8346 %3 = icmp eq <4 x i32> %a0, %2 8347 %4 = or <4 x i1> %1, %3 8348 %5 = sext <4 x i1> %4 to <4 x i32> 8349 ret <4 x i32> %5 8350 } 8351 8352 define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 8353 ; GENERIC-LABEL: test_pcmpeqw: 8354 ; GENERIC: # %bb.0: 8355 ; GENERIC-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] 8356 ; GENERIC-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] 8357 ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8358 ; GENERIC-NEXT: retq # sched: [1:1.00] 8359 ; 8360 ; ATOM-LABEL: test_pcmpeqw: 8361 ; ATOM: # %bb.0: 8362 ; ATOM-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] 8363 ; ATOM-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [1:1.00] 8364 ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 8365 ; ATOM-NEXT: nop # sched: [1:0.50] 8366 ; ATOM-NEXT: nop # sched: [1:0.50] 8367 ; ATOM-NEXT: retq # sched: [79:39.50] 8368 ; 8369 ; SLM-LABEL: test_pcmpeqw: 8370 ; SLM: # %bb.0: 8371 ; SLM-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] 8372 ; SLM-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [4:1.00] 8373 ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 8374 ; SLM-NEXT: retq # sched: [4:1.00] 8375 ; 8376 ; SANDY-SSE-LABEL: test_pcmpeqw: 8377 ; SANDY-SSE: # %bb.0: 8378 ; SANDY-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] 8379 ; SANDY-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] 8380 ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8381 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 8382 ; 8383 ; SANDY-LABEL: test_pcmpeqw: 8384 ; SANDY: # %bb.0: 8385 ; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8386 ; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8387 ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8388 ; SANDY-NEXT: retq # sched: [1:1.00] 8389 ; 8390 ; HASWELL-SSE-LABEL: test_pcmpeqw: 8391 ; HASWELL-SSE: # %bb.0: 8392 ; HASWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] 8393 ; HASWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] 8394 ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8395 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 8396 ; 8397 ; HASWELL-LABEL: test_pcmpeqw: 8398 ; HASWELL: # %bb.0: 8399 ; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8400 ; HASWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8401 ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8402 ; HASWELL-NEXT: retq # sched: [7:1.00] 8403 ; 8404 ; BROADWELL-SSE-LABEL: test_pcmpeqw: 8405 ; BROADWELL-SSE: # %bb.0: 8406 ; BROADWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] 8407 ; BROADWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:0.50] 8408 ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8409 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 8410 ; 8411 ; BROADWELL-LABEL: test_pcmpeqw: 8412 ; BROADWELL: # %bb.0: 8413 ; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8414 ; BROADWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 8415 ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8416 ; BROADWELL-NEXT: retq # sched: [7:1.00] 8417 ; 8418 ; SKYLAKE-SSE-LABEL: test_pcmpeqw: 8419 ; SKYLAKE-SSE: # %bb.0: 8420 ; SKYLAKE-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] 8421 ; SKYLAKE-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] 8422 ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8423 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 8424 ; 8425 ; SKYLAKE-LABEL: test_pcmpeqw: 8426 ; SKYLAKE: # %bb.0: 8427 ; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8428 ; SKYLAKE-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8429 ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8430 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 8431 ; 8432 ; SKX-SSE-LABEL: test_pcmpeqw: 8433 ; SKX-SSE: # %bb.0: 8434 ; SKX-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] 8435 ; SKX-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] 8436 ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 8437 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 8438 ; 8439 ; SKX-LABEL: test_pcmpeqw: 8440 ; SKX: # %bb.0: 8441 ; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8442 ; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8443 ; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8444 ; SKX-NEXT: retq # sched: [7:1.00] 8445 ; 8446 ; BTVER2-SSE-LABEL: test_pcmpeqw: 8447 ; BTVER2-SSE: # %bb.0: 8448 ; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] 8449 ; BTVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:1.00] 8450 ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 8451 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 8452 ; 8453 ; BTVER2-LABEL: test_pcmpeqw: 8454 ; BTVER2: # %bb.0: 8455 ; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8456 ; BTVER2-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 8457 ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 8458 ; BTVER2-NEXT: retq # sched: [4:1.00] 8459 ; 8460 ; ZNVER1-SSE-LABEL: test_pcmpeqw: 8461 ; ZNVER1-SSE: # %bb.0: 8462 ; ZNVER1-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.25] 8463 ; ZNVER1-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [8:0.50] 8464 ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] 8465 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 8466 ; 8467 ; ZNVER1-LABEL: test_pcmpeqw: 8468 ; ZNVER1: # %bb.0: 8469 ; ZNVER1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] 8470 ; ZNVER1-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 8471 ; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] 8472 ; ZNVER1-NEXT: retq # sched: [1:0.50] 8473 %1 = icmp eq <8 x i16> %a0, %a1 8474 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 8475 %3 = icmp eq <8 x i16> %a0, %2 8476 %4 = or <8 x i1> %1, %3 8477 %5 = sext <8 x i1> %4 to <8 x i16> 8478 ret <8 x i16> %5 8479 } 8480 8481 define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 8482 ; GENERIC-LABEL: test_pcmpgtb: 8483 ; GENERIC: # %bb.0: 8484 ; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8485 ; GENERIC-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] 8486 ; GENERIC-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] 8487 ; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8488 ; GENERIC-NEXT: retq # sched: [1:1.00] 8489 ; 8490 ; ATOM-LABEL: test_pcmpgtb: 8491 ; ATOM: # %bb.0: 8492 ; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] 8493 ; ATOM-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [1:1.00] 8494 ; ATOM-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] 8495 ; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] 8496 ; ATOM-NEXT: retq # sched: [79:39.50] 8497 ; 8498 ; SLM-LABEL: test_pcmpgtb: 8499 ; SLM: # %bb.0: 8500 ; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] 8501 ; SLM-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [4:1.00] 8502 ; SLM-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] 8503 ; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] 8504 ; SLM-NEXT: retq # sched: [4:1.00] 8505 ; 8506 ; SANDY-SSE-LABEL: test_pcmpgtb: 8507 ; SANDY-SSE: # %bb.0: 8508 ; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8509 ; SANDY-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] 8510 ; SANDY-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] 8511 ; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8512 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 8513 ; 8514 ; SANDY-LABEL: test_pcmpgtb: 8515 ; SANDY: # %bb.0: 8516 ; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8517 ; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8518 ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8519 ; SANDY-NEXT: retq # sched: [1:1.00] 8520 ; 8521 ; HASWELL-SSE-LABEL: test_pcmpgtb: 8522 ; HASWELL-SSE: # %bb.0: 8523 ; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8524 ; HASWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] 8525 ; HASWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] 8526 ; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8527 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 8528 ; 8529 ; HASWELL-LABEL: test_pcmpgtb: 8530 ; HASWELL: # %bb.0: 8531 ; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8532 ; HASWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8533 ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8534 ; HASWELL-NEXT: retq # sched: [7:1.00] 8535 ; 8536 ; BROADWELL-SSE-LABEL: test_pcmpgtb: 8537 ; BROADWELL-SSE: # %bb.0: 8538 ; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8539 ; BROADWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] 8540 ; BROADWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:0.50] 8541 ; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8542 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 8543 ; 8544 ; BROADWELL-LABEL: test_pcmpgtb: 8545 ; BROADWELL: # %bb.0: 8546 ; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8547 ; BROADWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 8548 ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8549 ; BROADWELL-NEXT: retq # sched: [7:1.00] 8550 ; 8551 ; SKYLAKE-SSE-LABEL: test_pcmpgtb: 8552 ; SKYLAKE-SSE: # %bb.0: 8553 ; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8554 ; SKYLAKE-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] 8555 ; SKYLAKE-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] 8556 ; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8557 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 8558 ; 8559 ; SKYLAKE-LABEL: test_pcmpgtb: 8560 ; SKYLAKE: # %bb.0: 8561 ; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8562 ; SKYLAKE-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8563 ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8564 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 8565 ; 8566 ; SKX-SSE-LABEL: test_pcmpgtb: 8567 ; SKX-SSE: # %bb.0: 8568 ; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8569 ; SKX-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] 8570 ; SKX-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] 8571 ; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8572 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 8573 ; 8574 ; SKX-LABEL: test_pcmpgtb: 8575 ; SKX: # %bb.0: 8576 ; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8577 ; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8578 ; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8579 ; SKX-NEXT: retq # sched: [7:1.00] 8580 ; 8581 ; BTVER2-SSE-LABEL: test_pcmpgtb: 8582 ; BTVER2-SSE: # %bb.0: 8583 ; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] 8584 ; BTVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:1.00] 8585 ; BTVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] 8586 ; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] 8587 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 8588 ; 8589 ; BTVER2-LABEL: test_pcmpgtb: 8590 ; BTVER2: # %bb.0: 8591 ; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8592 ; BTVER2-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 8593 ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 8594 ; BTVER2-NEXT: retq # sched: [4:1.00] 8595 ; 8596 ; ZNVER1-SSE-LABEL: test_pcmpgtb: 8597 ; ZNVER1-SSE: # %bb.0: 8598 ; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] 8599 ; ZNVER1-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [8:0.50] 8600 ; ZNVER1-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.25] 8601 ; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] 8602 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 8603 ; 8604 ; ZNVER1-LABEL: test_pcmpgtb: 8605 ; ZNVER1: # %bb.0: 8606 ; ZNVER1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] 8607 ; ZNVER1-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 8608 ; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] 8609 ; ZNVER1-NEXT: retq # sched: [1:0.50] 8610 %1 = icmp sgt <16 x i8> %a0, %a1 8611 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 8612 %3 = icmp sgt <16 x i8> %a0, %2 8613 %4 = or <16 x i1> %1, %3 8614 %5 = sext <16 x i1> %4 to <16 x i8> 8615 ret <16 x i8> %5 8616 } 8617 8618 define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 8619 ; GENERIC-LABEL: test_pcmpgtd: 8620 ; GENERIC: # %bb.0: 8621 ; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8622 ; GENERIC-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] 8623 ; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8624 ; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8625 ; GENERIC-NEXT: retq # sched: [1:1.00] 8626 ; 8627 ; ATOM-LABEL: test_pcmpgtd: 8628 ; ATOM: # %bb.0: 8629 ; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] 8630 ; ATOM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [1:1.00] 8631 ; ATOM-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] 8632 ; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] 8633 ; ATOM-NEXT: retq # sched: [79:39.50] 8634 ; 8635 ; SLM-LABEL: test_pcmpgtd: 8636 ; SLM: # %bb.0: 8637 ; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] 8638 ; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00] 8639 ; SLM-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] 8640 ; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] 8641 ; SLM-NEXT: retq # sched: [4:1.00] 8642 ; 8643 ; SANDY-SSE-LABEL: test_pcmpgtd: 8644 ; SANDY-SSE: # %bb.0: 8645 ; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8646 ; SANDY-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] 8647 ; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8648 ; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8649 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 8650 ; 8651 ; SANDY-LABEL: test_pcmpgtd: 8652 ; SANDY: # %bb.0: 8653 ; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8654 ; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8655 ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8656 ; SANDY-NEXT: retq # sched: [1:1.00] 8657 ; 8658 ; HASWELL-SSE-LABEL: test_pcmpgtd: 8659 ; HASWELL-SSE: # %bb.0: 8660 ; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8661 ; HASWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] 8662 ; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8663 ; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8664 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 8665 ; 8666 ; HASWELL-LABEL: test_pcmpgtd: 8667 ; HASWELL: # %bb.0: 8668 ; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8669 ; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8670 ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8671 ; HASWELL-NEXT: retq # sched: [7:1.00] 8672 ; 8673 ; BROADWELL-SSE-LABEL: test_pcmpgtd: 8674 ; BROADWELL-SSE: # %bb.0: 8675 ; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8676 ; BROADWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] 8677 ; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] 8678 ; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8679 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 8680 ; 8681 ; BROADWELL-LABEL: test_pcmpgtd: 8682 ; BROADWELL: # %bb.0: 8683 ; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8684 ; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 8685 ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8686 ; BROADWELL-NEXT: retq # sched: [7:1.00] 8687 ; 8688 ; SKYLAKE-SSE-LABEL: test_pcmpgtd: 8689 ; SKYLAKE-SSE: # %bb.0: 8690 ; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8691 ; SKYLAKE-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] 8692 ; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8693 ; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8694 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 8695 ; 8696 ; SKYLAKE-LABEL: test_pcmpgtd: 8697 ; SKYLAKE: # %bb.0: 8698 ; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8699 ; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8700 ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8701 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 8702 ; 8703 ; SKX-SSE-LABEL: test_pcmpgtd: 8704 ; SKX-SSE: # %bb.0: 8705 ; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8706 ; SKX-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] 8707 ; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] 8708 ; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8709 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 8710 ; 8711 ; SKX-LABEL: test_pcmpgtd: 8712 ; SKX: # %bb.0: 8713 ; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8714 ; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8715 ; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8716 ; SKX-NEXT: retq # sched: [7:1.00] 8717 ; 8718 ; BTVER2-SSE-LABEL: test_pcmpgtd: 8719 ; BTVER2-SSE: # %bb.0: 8720 ; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] 8721 ; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] 8722 ; BTVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] 8723 ; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] 8724 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 8725 ; 8726 ; BTVER2-LABEL: test_pcmpgtd: 8727 ; BTVER2: # %bb.0: 8728 ; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8729 ; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 8730 ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 8731 ; BTVER2-NEXT: retq # sched: [4:1.00] 8732 ; 8733 ; ZNVER1-SSE-LABEL: test_pcmpgtd: 8734 ; ZNVER1-SSE: # %bb.0: 8735 ; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] 8736 ; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] 8737 ; ZNVER1-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.25] 8738 ; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] 8739 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 8740 ; 8741 ; ZNVER1-LABEL: test_pcmpgtd: 8742 ; ZNVER1: # %bb.0: 8743 ; ZNVER1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] 8744 ; ZNVER1-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 8745 ; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] 8746 ; ZNVER1-NEXT: retq # sched: [1:0.50] 8747 %1 = icmp sgt <4 x i32> %a0, %a1 8748 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 8749 %3 = icmp eq <4 x i32> %a0, %2 8750 %4 = or <4 x i1> %1, %3 8751 %5 = sext <4 x i1> %4 to <4 x i32> 8752 ret <4 x i32> %5 8753 } 8754 8755 define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 8756 ; GENERIC-LABEL: test_pcmpgtw: 8757 ; GENERIC: # %bb.0: 8758 ; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8759 ; GENERIC-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] 8760 ; GENERIC-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] 8761 ; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8762 ; GENERIC-NEXT: retq # sched: [1:1.00] 8763 ; 8764 ; ATOM-LABEL: test_pcmpgtw: 8765 ; ATOM: # %bb.0: 8766 ; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] 8767 ; ATOM-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [1:1.00] 8768 ; ATOM-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] 8769 ; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] 8770 ; ATOM-NEXT: retq # sched: [79:39.50] 8771 ; 8772 ; SLM-LABEL: test_pcmpgtw: 8773 ; SLM: # %bb.0: 8774 ; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] 8775 ; SLM-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [4:1.00] 8776 ; SLM-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] 8777 ; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] 8778 ; SLM-NEXT: retq # sched: [4:1.00] 8779 ; 8780 ; SANDY-SSE-LABEL: test_pcmpgtw: 8781 ; SANDY-SSE: # %bb.0: 8782 ; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8783 ; SANDY-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] 8784 ; SANDY-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] 8785 ; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8786 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 8787 ; 8788 ; SANDY-LABEL: test_pcmpgtw: 8789 ; SANDY: # %bb.0: 8790 ; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8791 ; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8792 ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8793 ; SANDY-NEXT: retq # sched: [1:1.00] 8794 ; 8795 ; HASWELL-SSE-LABEL: test_pcmpgtw: 8796 ; HASWELL-SSE: # %bb.0: 8797 ; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8798 ; HASWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] 8799 ; HASWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] 8800 ; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8801 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 8802 ; 8803 ; HASWELL-LABEL: test_pcmpgtw: 8804 ; HASWELL: # %bb.0: 8805 ; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8806 ; HASWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8807 ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8808 ; HASWELL-NEXT: retq # sched: [7:1.00] 8809 ; 8810 ; BROADWELL-SSE-LABEL: test_pcmpgtw: 8811 ; BROADWELL-SSE: # %bb.0: 8812 ; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8813 ; BROADWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] 8814 ; BROADWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:0.50] 8815 ; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8816 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 8817 ; 8818 ; BROADWELL-LABEL: test_pcmpgtw: 8819 ; BROADWELL: # %bb.0: 8820 ; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8821 ; BROADWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 8822 ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8823 ; BROADWELL-NEXT: retq # sched: [7:1.00] 8824 ; 8825 ; SKYLAKE-SSE-LABEL: test_pcmpgtw: 8826 ; SKYLAKE-SSE: # %bb.0: 8827 ; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8828 ; SKYLAKE-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] 8829 ; SKYLAKE-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] 8830 ; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8831 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 8832 ; 8833 ; SKYLAKE-LABEL: test_pcmpgtw: 8834 ; SKYLAKE: # %bb.0: 8835 ; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8836 ; SKYLAKE-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8837 ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8838 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 8839 ; 8840 ; SKX-SSE-LABEL: test_pcmpgtw: 8841 ; SKX-SSE: # %bb.0: 8842 ; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] 8843 ; SKX-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] 8844 ; SKX-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] 8845 ; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] 8846 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 8847 ; 8848 ; SKX-LABEL: test_pcmpgtw: 8849 ; SKX: # %bb.0: 8850 ; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8851 ; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 8852 ; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] 8853 ; SKX-NEXT: retq # sched: [7:1.00] 8854 ; 8855 ; BTVER2-SSE-LABEL: test_pcmpgtw: 8856 ; BTVER2-SSE: # %bb.0: 8857 ; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] 8858 ; BTVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:1.00] 8859 ; BTVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] 8860 ; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] 8861 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 8862 ; 8863 ; BTVER2-LABEL: test_pcmpgtw: 8864 ; BTVER2: # %bb.0: 8865 ; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] 8866 ; BTVER2-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 8867 ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] 8868 ; BTVER2-NEXT: retq # sched: [4:1.00] 8869 ; 8870 ; ZNVER1-SSE-LABEL: test_pcmpgtw: 8871 ; ZNVER1-SSE: # %bb.0: 8872 ; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] 8873 ; ZNVER1-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [8:0.50] 8874 ; ZNVER1-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.25] 8875 ; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] 8876 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 8877 ; 8878 ; ZNVER1-LABEL: test_pcmpgtw: 8879 ; ZNVER1: # %bb.0: 8880 ; ZNVER1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] 8881 ; ZNVER1-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 8882 ; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] 8883 ; ZNVER1-NEXT: retq # sched: [1:0.50] 8884 %1 = icmp sgt <8 x i16> %a0, %a1 8885 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 8886 %3 = icmp sgt <8 x i16> %a0, %2 8887 %4 = or <8 x i1> %1, %3 8888 %5 = sext <8 x i1> %4 to <8 x i16> 8889 ret <8 x i16> %5 8890 } 8891 8892 define i16 @test_pextrw(<8 x i16> %a0) { 8893 ; GENERIC-LABEL: test_pextrw: 8894 ; GENERIC: # %bb.0: 8895 ; GENERIC-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] 8896 ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax 8897 ; GENERIC-NEXT: retq # sched: [1:1.00] 8898 ; 8899 ; ATOM-LABEL: test_pextrw: 8900 ; ATOM: # %bb.0: 8901 ; ATOM-NEXT: pextrw $6, %xmm0, %eax # sched: [4:2.00] 8902 ; ATOM-NEXT: # kill: def $ax killed $ax killed $eax 8903 ; ATOM-NEXT: retq # sched: [79:39.50] 8904 ; 8905 ; SLM-LABEL: test_pextrw: 8906 ; SLM: # %bb.0: 8907 ; SLM-NEXT: pextrw $6, %xmm0, %eax # sched: [1:1.00] 8908 ; SLM-NEXT: # kill: def $ax killed $ax killed $eax 8909 ; SLM-NEXT: retq # sched: [4:1.00] 8910 ; 8911 ; SANDY-SSE-LABEL: test_pextrw: 8912 ; SANDY-SSE: # %bb.0: 8913 ; SANDY-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] 8914 ; SANDY-SSE-NEXT: # kill: def $ax killed $ax killed $eax 8915 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 8916 ; 8917 ; SANDY-LABEL: test_pextrw: 8918 ; SANDY: # %bb.0: 8919 ; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] 8920 ; SANDY-NEXT: # kill: def $ax killed $ax killed $eax 8921 ; SANDY-NEXT: retq # sched: [1:1.00] 8922 ; 8923 ; HASWELL-SSE-LABEL: test_pextrw: 8924 ; HASWELL-SSE: # %bb.0: 8925 ; HASWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] 8926 ; HASWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax 8927 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 8928 ; 8929 ; HASWELL-LABEL: test_pextrw: 8930 ; HASWELL: # %bb.0: 8931 ; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] 8932 ; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax 8933 ; HASWELL-NEXT: retq # sched: [7:1.00] 8934 ; 8935 ; BROADWELL-SSE-LABEL: test_pextrw: 8936 ; BROADWELL-SSE: # %bb.0: 8937 ; BROADWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] 8938 ; BROADWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax 8939 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 8940 ; 8941 ; BROADWELL-LABEL: test_pextrw: 8942 ; BROADWELL: # %bb.0: 8943 ; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] 8944 ; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax 8945 ; BROADWELL-NEXT: retq # sched: [7:1.00] 8946 ; 8947 ; SKYLAKE-SSE-LABEL: test_pextrw: 8948 ; SKYLAKE-SSE: # %bb.0: 8949 ; SKYLAKE-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] 8950 ; SKYLAKE-SSE-NEXT: # kill: def $ax killed $ax killed $eax 8951 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 8952 ; 8953 ; SKYLAKE-LABEL: test_pextrw: 8954 ; SKYLAKE: # %bb.0: 8955 ; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] 8956 ; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax 8957 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 8958 ; 8959 ; SKX-SSE-LABEL: test_pextrw: 8960 ; SKX-SSE: # %bb.0: 8961 ; SKX-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] 8962 ; SKX-SSE-NEXT: # kill: def $ax killed $ax killed $eax 8963 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 8964 ; 8965 ; SKX-LABEL: test_pextrw: 8966 ; SKX: # %bb.0: 8967 ; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] 8968 ; SKX-NEXT: # kill: def $ax killed $ax killed $eax 8969 ; SKX-NEXT: retq # sched: [7:1.00] 8970 ; 8971 ; BTVER2-SSE-LABEL: test_pextrw: 8972 ; BTVER2-SSE: # %bb.0: 8973 ; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] 8974 ; BTVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax 8975 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 8976 ; 8977 ; BTVER2-LABEL: test_pextrw: 8978 ; BTVER2: # %bb.0: 8979 ; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] 8980 ; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax 8981 ; BTVER2-NEXT: retq # sched: [4:1.00] 8982 ; 8983 ; ZNVER1-SSE-LABEL: test_pextrw: 8984 ; ZNVER1-SSE: # %bb.0: 8985 ; ZNVER1-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:2.00] 8986 ; ZNVER1-SSE-NEXT: # kill: def $ax killed $ax killed $eax 8987 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 8988 ; 8989 ; ZNVER1-LABEL: test_pextrw: 8990 ; ZNVER1: # %bb.0: 8991 ; ZNVER1-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:2.00] 8992 ; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax 8993 ; ZNVER1-NEXT: retq # sched: [1:0.50] 8994 %1 = extractelement <8 x i16> %a0, i32 6 8995 ret i16 %1 8996 } 8997 8998 define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { 8999 ; GENERIC-LABEL: test_pinsrw: 9000 ; GENERIC: # %bb.0: 9001 ; GENERIC-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] 9002 ; GENERIC-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] 9003 ; GENERIC-NEXT: retq # sched: [1:1.00] 9004 ; 9005 ; ATOM-LABEL: test_pinsrw: 9006 ; ATOM: # %bb.0: 9007 ; ATOM-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:1.00] 9008 ; ATOM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [1:1.00] 9009 ; ATOM-NEXT: nop # sched: [1:0.50] 9010 ; ATOM-NEXT: nop # sched: [1:0.50] 9011 ; ATOM-NEXT: nop # sched: [1:0.50] 9012 ; ATOM-NEXT: nop # sched: [1:0.50] 9013 ; ATOM-NEXT: retq # sched: [79:39.50] 9014 ; 9015 ; SLM-LABEL: test_pinsrw: 9016 ; SLM: # %bb.0: 9017 ; SLM-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:1.00] 9018 ; SLM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00] 9019 ; SLM-NEXT: retq # sched: [4:1.00] 9020 ; 9021 ; SANDY-SSE-LABEL: test_pinsrw: 9022 ; SANDY-SSE: # %bb.0: 9023 ; SANDY-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] 9024 ; SANDY-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] 9025 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 9026 ; 9027 ; SANDY-LABEL: test_pinsrw: 9028 ; SANDY: # %bb.0: 9029 ; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] 9030 ; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] 9031 ; SANDY-NEXT: retq # sched: [1:1.00] 9032 ; 9033 ; HASWELL-SSE-LABEL: test_pinsrw: 9034 ; HASWELL-SSE: # %bb.0: 9035 ; HASWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] 9036 ; HASWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] 9037 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 9038 ; 9039 ; HASWELL-LABEL: test_pinsrw: 9040 ; HASWELL: # %bb.0: 9041 ; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 9042 ; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 9043 ; HASWELL-NEXT: retq # sched: [7:1.00] 9044 ; 9045 ; BROADWELL-SSE-LABEL: test_pinsrw: 9046 ; BROADWELL-SSE: # %bb.0: 9047 ; BROADWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] 9048 ; BROADWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] 9049 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 9050 ; 9051 ; BROADWELL-LABEL: test_pinsrw: 9052 ; BROADWELL: # %bb.0: 9053 ; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 9054 ; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 9055 ; BROADWELL-NEXT: retq # sched: [7:1.00] 9056 ; 9057 ; SKYLAKE-SSE-LABEL: test_pinsrw: 9058 ; SKYLAKE-SSE: # %bb.0: 9059 ; SKYLAKE-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] 9060 ; SKYLAKE-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] 9061 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 9062 ; 9063 ; SKYLAKE-LABEL: test_pinsrw: 9064 ; SKYLAKE: # %bb.0: 9065 ; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 9066 ; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 9067 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 9068 ; 9069 ; SKX-SSE-LABEL: test_pinsrw: 9070 ; SKX-SSE: # %bb.0: 9071 ; SKX-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] 9072 ; SKX-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] 9073 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 9074 ; 9075 ; SKX-LABEL: test_pinsrw: 9076 ; SKX: # %bb.0: 9077 ; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] 9078 ; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] 9079 ; SKX-NEXT: retq # sched: [7:1.00] 9080 ; 9081 ; BTVER2-SSE-LABEL: test_pinsrw: 9082 ; BTVER2-SSE: # %bb.0: 9083 ; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [7:0.50] 9084 ; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00] 9085 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 9086 ; 9087 ; BTVER2-LABEL: test_pinsrw: 9088 ; BTVER2: # %bb.0: 9089 ; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [7:0.50] 9090 ; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00] 9091 ; BTVER2-NEXT: retq # sched: [4:1.00] 9092 ; 9093 ; ZNVER1-SSE-LABEL: test_pinsrw: 9094 ; ZNVER1-SSE: # %bb.0: 9095 ; ZNVER1-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.25] 9096 ; ZNVER1-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50] 9097 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 9098 ; 9099 ; ZNVER1-LABEL: test_pinsrw: 9100 ; ZNVER1: # %bb.0: 9101 ; ZNVER1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] 9102 ; ZNVER1-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50] 9103 ; ZNVER1-NEXT: retq # sched: [1:0.50] 9104 %1 = insertelement <8 x i16> %a0, i16 %a1, i32 1 9105 %2 = load i16, i16 *%a2 9106 %3 = insertelement <8 x i16> %1, i16 %2, i32 3 9107 ret <8 x i16> %3 9108 } 9109 9110 define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 9111 ; GENERIC-LABEL: test_pmaddwd: 9112 ; GENERIC: # %bb.0: 9113 ; GENERIC-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] 9114 ; GENERIC-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] 9115 ; GENERIC-NEXT: retq # sched: [1:1.00] 9116 ; 9117 ; ATOM-LABEL: test_pmaddwd: 9118 ; ATOM: # %bb.0: 9119 ; ATOM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:5.00] 9120 ; ATOM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [5:5.00] 9121 ; ATOM-NEXT: retq # sched: [79:39.50] 9122 ; 9123 ; SLM-LABEL: test_pmaddwd: 9124 ; SLM: # %bb.0: 9125 ; SLM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] 9126 ; SLM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] 9127 ; SLM-NEXT: retq # sched: [4:1.00] 9128 ; 9129 ; SANDY-SSE-LABEL: test_pmaddwd: 9130 ; SANDY-SSE: # %bb.0: 9131 ; SANDY-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] 9132 ; SANDY-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] 9133 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 9134 ; 9135 ; SANDY-LABEL: test_pmaddwd: 9136 ; SANDY: # %bb.0: 9137 ; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 9138 ; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 9139 ; SANDY-NEXT: retq # sched: [1:1.00] 9140 ; 9141 ; HASWELL-SSE-LABEL: test_pmaddwd: 9142 ; HASWELL-SSE: # %bb.0: 9143 ; HASWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] 9144 ; HASWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] 9145 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 9146 ; 9147 ; HASWELL-LABEL: test_pmaddwd: 9148 ; HASWELL: # %bb.0: 9149 ; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 9150 ; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 9151 ; HASWELL-NEXT: retq # sched: [7:1.00] 9152 ; 9153 ; BROADWELL-SSE-LABEL: test_pmaddwd: 9154 ; BROADWELL-SSE: # %bb.0: 9155 ; BROADWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] 9156 ; BROADWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:1.00] 9157 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 9158 ; 9159 ; BROADWELL-LABEL: test_pmaddwd: 9160 ; BROADWELL: # %bb.0: 9161 ; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 9162 ; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 9163 ; BROADWELL-NEXT: retq # sched: [7:1.00] 9164 ; 9165 ; SKYLAKE-SSE-LABEL: test_pmaddwd: 9166 ; SKYLAKE-SSE: # %bb.0: 9167 ; SKYLAKE-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] 9168 ; SKYLAKE-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] 9169 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 9170 ; 9171 ; SKYLAKE-LABEL: test_pmaddwd: 9172 ; SKYLAKE: # %bb.0: 9173 ; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 9174 ; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 9175 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 9176 ; 9177 ; SKX-SSE-LABEL: test_pmaddwd: 9178 ; SKX-SSE: # %bb.0: 9179 ; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] 9180 ; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] 9181 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 9182 ; 9183 ; SKX-LABEL: test_pmaddwd: 9184 ; SKX: # %bb.0: 9185 ; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 9186 ; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 9187 ; SKX-NEXT: retq # sched: [7:1.00] 9188 ; 9189 ; BTVER2-SSE-LABEL: test_pmaddwd: 9190 ; BTVER2-SSE: # %bb.0: 9191 ; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00] 9192 ; BTVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] 9193 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 9194 ; 9195 ; BTVER2-LABEL: test_pmaddwd: 9196 ; BTVER2: # %bb.0: 9197 ; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 9198 ; BTVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 9199 ; BTVER2-NEXT: retq # sched: [4:1.00] 9200 ; 9201 ; ZNVER1-SSE-LABEL: test_pmaddwd: 9202 ; ZNVER1-SSE: # %bb.0: 9203 ; ZNVER1-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] 9204 ; ZNVER1-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] 9205 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 9206 ; 9207 ; ZNVER1-LABEL: test_pmaddwd: 9208 ; ZNVER1: # %bb.0: 9209 ; ZNVER1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] 9210 ; ZNVER1-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 9211 ; ZNVER1-NEXT: retq # sched: [1:0.50] 9212 %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) 9213 %2 = bitcast <4 x i32> %1 to <8 x i16> 9214 %3 = load <8 x i16>, <8 x i16> *%a2, align 16 9215 %4 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %2, <8 x i16> %3) 9216 ret <4 x i32> %4 9217 } 9218 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 9219 9220 define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 9221 ; GENERIC-LABEL: test_pmaxsw: 9222 ; GENERIC: # %bb.0: 9223 ; GENERIC-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] 9224 ; GENERIC-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] 9225 ; GENERIC-NEXT: retq # sched: [1:1.00] 9226 ; 9227 ; ATOM-LABEL: test_pmaxsw: 9228 ; ATOM: # %bb.0: 9229 ; ATOM-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] 9230 ; ATOM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [1:1.00] 9231 ; ATOM-NEXT: nop # sched: [1:0.50] 9232 ; ATOM-NEXT: nop # sched: [1:0.50] 9233 ; ATOM-NEXT: nop # sched: [1:0.50] 9234 ; ATOM-NEXT: nop # sched: [1:0.50] 9235 ; ATOM-NEXT: retq # sched: [79:39.50] 9236 ; 9237 ; SLM-LABEL: test_pmaxsw: 9238 ; SLM: # %bb.0: 9239 ; SLM-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] 9240 ; SLM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [4:1.00] 9241 ; SLM-NEXT: retq # sched: [4:1.00] 9242 ; 9243 ; SANDY-SSE-LABEL: test_pmaxsw: 9244 ; SANDY-SSE: # %bb.0: 9245 ; SANDY-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] 9246 ; SANDY-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] 9247 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 9248 ; 9249 ; SANDY-LABEL: test_pmaxsw: 9250 ; SANDY: # %bb.0: 9251 ; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9252 ; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9253 ; SANDY-NEXT: retq # sched: [1:1.00] 9254 ; 9255 ; HASWELL-SSE-LABEL: test_pmaxsw: 9256 ; HASWELL-SSE: # %bb.0: 9257 ; HASWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] 9258 ; HASWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] 9259 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 9260 ; 9261 ; HASWELL-LABEL: test_pmaxsw: 9262 ; HASWELL: # %bb.0: 9263 ; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9264 ; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9265 ; HASWELL-NEXT: retq # sched: [7:1.00] 9266 ; 9267 ; BROADWELL-SSE-LABEL: test_pmaxsw: 9268 ; BROADWELL-SSE: # %bb.0: 9269 ; BROADWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] 9270 ; BROADWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:0.50] 9271 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 9272 ; 9273 ; BROADWELL-LABEL: test_pmaxsw: 9274 ; BROADWELL: # %bb.0: 9275 ; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9276 ; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 9277 ; BROADWELL-NEXT: retq # sched: [7:1.00] 9278 ; 9279 ; SKYLAKE-SSE-LABEL: test_pmaxsw: 9280 ; SKYLAKE-SSE: # %bb.0: 9281 ; SKYLAKE-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] 9282 ; SKYLAKE-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] 9283 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 9284 ; 9285 ; SKYLAKE-LABEL: test_pmaxsw: 9286 ; SKYLAKE: # %bb.0: 9287 ; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9288 ; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9289 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 9290 ; 9291 ; SKX-SSE-LABEL: test_pmaxsw: 9292 ; SKX-SSE: # %bb.0: 9293 ; SKX-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] 9294 ; SKX-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] 9295 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 9296 ; 9297 ; SKX-LABEL: test_pmaxsw: 9298 ; SKX: # %bb.0: 9299 ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9300 ; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9301 ; SKX-NEXT: retq # sched: [7:1.00] 9302 ; 9303 ; BTVER2-SSE-LABEL: test_pmaxsw: 9304 ; BTVER2-SSE: # %bb.0: 9305 ; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] 9306 ; BTVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:1.00] 9307 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 9308 ; 9309 ; BTVER2-LABEL: test_pmaxsw: 9310 ; BTVER2: # %bb.0: 9311 ; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9312 ; BTVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 9313 ; BTVER2-NEXT: retq # sched: [4:1.00] 9314 ; 9315 ; ZNVER1-SSE-LABEL: test_pmaxsw: 9316 ; ZNVER1-SSE: # %bb.0: 9317 ; ZNVER1-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.25] 9318 ; ZNVER1-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [8:0.50] 9319 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 9320 ; 9321 ; ZNVER1-LABEL: test_pmaxsw: 9322 ; ZNVER1: # %bb.0: 9323 ; ZNVER1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 9324 ; ZNVER1-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 9325 ; ZNVER1-NEXT: retq # sched: [1:0.50] 9326 %1 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) 9327 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 9328 %3 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %1, <8 x i16> %2) 9329 ret <8 x i16> %3 9330 } 9331 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone 9332 9333 define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 9334 ; GENERIC-LABEL: test_pmaxub: 9335 ; GENERIC: # %bb.0: 9336 ; GENERIC-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] 9337 ; GENERIC-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] 9338 ; GENERIC-NEXT: retq # sched: [1:1.00] 9339 ; 9340 ; ATOM-LABEL: test_pmaxub: 9341 ; ATOM: # %bb.0: 9342 ; ATOM-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] 9343 ; ATOM-NEXT: pmaxub (%rdi), %xmm0 # sched: [1:1.00] 9344 ; ATOM-NEXT: nop # sched: [1:0.50] 9345 ; ATOM-NEXT: nop # sched: [1:0.50] 9346 ; ATOM-NEXT: nop # sched: [1:0.50] 9347 ; ATOM-NEXT: nop # sched: [1:0.50] 9348 ; ATOM-NEXT: retq # sched: [79:39.50] 9349 ; 9350 ; SLM-LABEL: test_pmaxub: 9351 ; SLM: # %bb.0: 9352 ; SLM-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] 9353 ; SLM-NEXT: pmaxub (%rdi), %xmm0 # sched: [4:1.00] 9354 ; SLM-NEXT: retq # sched: [4:1.00] 9355 ; 9356 ; SANDY-SSE-LABEL: test_pmaxub: 9357 ; SANDY-SSE: # %bb.0: 9358 ; SANDY-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] 9359 ; SANDY-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] 9360 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 9361 ; 9362 ; SANDY-LABEL: test_pmaxub: 9363 ; SANDY: # %bb.0: 9364 ; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9365 ; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9366 ; SANDY-NEXT: retq # sched: [1:1.00] 9367 ; 9368 ; HASWELL-SSE-LABEL: test_pmaxub: 9369 ; HASWELL-SSE: # %bb.0: 9370 ; HASWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] 9371 ; HASWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] 9372 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 9373 ; 9374 ; HASWELL-LABEL: test_pmaxub: 9375 ; HASWELL: # %bb.0: 9376 ; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9377 ; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9378 ; HASWELL-NEXT: retq # sched: [7:1.00] 9379 ; 9380 ; BROADWELL-SSE-LABEL: test_pmaxub: 9381 ; BROADWELL-SSE: # %bb.0: 9382 ; BROADWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] 9383 ; BROADWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:0.50] 9384 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 9385 ; 9386 ; BROADWELL-LABEL: test_pmaxub: 9387 ; BROADWELL: # %bb.0: 9388 ; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9389 ; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 9390 ; BROADWELL-NEXT: retq # sched: [7:1.00] 9391 ; 9392 ; SKYLAKE-SSE-LABEL: test_pmaxub: 9393 ; SKYLAKE-SSE: # %bb.0: 9394 ; SKYLAKE-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] 9395 ; SKYLAKE-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] 9396 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 9397 ; 9398 ; SKYLAKE-LABEL: test_pmaxub: 9399 ; SKYLAKE: # %bb.0: 9400 ; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9401 ; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9402 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 9403 ; 9404 ; SKX-SSE-LABEL: test_pmaxub: 9405 ; SKX-SSE: # %bb.0: 9406 ; SKX-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] 9407 ; SKX-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] 9408 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 9409 ; 9410 ; SKX-LABEL: test_pmaxub: 9411 ; SKX: # %bb.0: 9412 ; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9413 ; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9414 ; SKX-NEXT: retq # sched: [7:1.00] 9415 ; 9416 ; BTVER2-SSE-LABEL: test_pmaxub: 9417 ; BTVER2-SSE: # %bb.0: 9418 ; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] 9419 ; BTVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:1.00] 9420 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 9421 ; 9422 ; BTVER2-LABEL: test_pmaxub: 9423 ; BTVER2: # %bb.0: 9424 ; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9425 ; BTVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 9426 ; BTVER2-NEXT: retq # sched: [4:1.00] 9427 ; 9428 ; ZNVER1-SSE-LABEL: test_pmaxub: 9429 ; ZNVER1-SSE: # %bb.0: 9430 ; ZNVER1-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.25] 9431 ; ZNVER1-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [8:0.50] 9432 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 9433 ; 9434 ; ZNVER1-LABEL: test_pmaxub: 9435 ; ZNVER1: # %bb.0: 9436 ; ZNVER1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 9437 ; ZNVER1-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 9438 ; ZNVER1-NEXT: retq # sched: [1:0.50] 9439 %1 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) 9440 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 9441 %3 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %1, <16 x i8> %2) 9442 ret <16 x i8> %3 9443 } 9444 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone 9445 9446 define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 9447 ; GENERIC-LABEL: test_pminsw: 9448 ; GENERIC: # %bb.0: 9449 ; GENERIC-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] 9450 ; GENERIC-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] 9451 ; GENERIC-NEXT: retq # sched: [1:1.00] 9452 ; 9453 ; ATOM-LABEL: test_pminsw: 9454 ; ATOM: # %bb.0: 9455 ; ATOM-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] 9456 ; ATOM-NEXT: pminsw (%rdi), %xmm0 # sched: [1:1.00] 9457 ; ATOM-NEXT: nop # sched: [1:0.50] 9458 ; ATOM-NEXT: nop # sched: [1:0.50] 9459 ; ATOM-NEXT: nop # sched: [1:0.50] 9460 ; ATOM-NEXT: nop # sched: [1:0.50] 9461 ; ATOM-NEXT: retq # sched: [79:39.50] 9462 ; 9463 ; SLM-LABEL: test_pminsw: 9464 ; SLM: # %bb.0: 9465 ; SLM-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] 9466 ; SLM-NEXT: pminsw (%rdi), %xmm0 # sched: [4:1.00] 9467 ; SLM-NEXT: retq # sched: [4:1.00] 9468 ; 9469 ; SANDY-SSE-LABEL: test_pminsw: 9470 ; SANDY-SSE: # %bb.0: 9471 ; SANDY-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] 9472 ; SANDY-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] 9473 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 9474 ; 9475 ; SANDY-LABEL: test_pminsw: 9476 ; SANDY: # %bb.0: 9477 ; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9478 ; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9479 ; SANDY-NEXT: retq # sched: [1:1.00] 9480 ; 9481 ; HASWELL-SSE-LABEL: test_pminsw: 9482 ; HASWELL-SSE: # %bb.0: 9483 ; HASWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] 9484 ; HASWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] 9485 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 9486 ; 9487 ; HASWELL-LABEL: test_pminsw: 9488 ; HASWELL: # %bb.0: 9489 ; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9490 ; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9491 ; HASWELL-NEXT: retq # sched: [7:1.00] 9492 ; 9493 ; BROADWELL-SSE-LABEL: test_pminsw: 9494 ; BROADWELL-SSE: # %bb.0: 9495 ; BROADWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] 9496 ; BROADWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:0.50] 9497 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 9498 ; 9499 ; BROADWELL-LABEL: test_pminsw: 9500 ; BROADWELL: # %bb.0: 9501 ; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9502 ; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 9503 ; BROADWELL-NEXT: retq # sched: [7:1.00] 9504 ; 9505 ; SKYLAKE-SSE-LABEL: test_pminsw: 9506 ; SKYLAKE-SSE: # %bb.0: 9507 ; SKYLAKE-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] 9508 ; SKYLAKE-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] 9509 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 9510 ; 9511 ; SKYLAKE-LABEL: test_pminsw: 9512 ; SKYLAKE: # %bb.0: 9513 ; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9514 ; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9515 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 9516 ; 9517 ; SKX-SSE-LABEL: test_pminsw: 9518 ; SKX-SSE: # %bb.0: 9519 ; SKX-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] 9520 ; SKX-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] 9521 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 9522 ; 9523 ; SKX-LABEL: test_pminsw: 9524 ; SKX: # %bb.0: 9525 ; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9526 ; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9527 ; SKX-NEXT: retq # sched: [7:1.00] 9528 ; 9529 ; BTVER2-SSE-LABEL: test_pminsw: 9530 ; BTVER2-SSE: # %bb.0: 9531 ; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] 9532 ; BTVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:1.00] 9533 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 9534 ; 9535 ; BTVER2-LABEL: test_pminsw: 9536 ; BTVER2: # %bb.0: 9537 ; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9538 ; BTVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 9539 ; BTVER2-NEXT: retq # sched: [4:1.00] 9540 ; 9541 ; ZNVER1-SSE-LABEL: test_pminsw: 9542 ; ZNVER1-SSE: # %bb.0: 9543 ; ZNVER1-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.25] 9544 ; ZNVER1-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [8:0.50] 9545 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 9546 ; 9547 ; ZNVER1-LABEL: test_pminsw: 9548 ; ZNVER1: # %bb.0: 9549 ; ZNVER1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 9550 ; ZNVER1-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 9551 ; ZNVER1-NEXT: retq # sched: [1:0.50] 9552 %1 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) 9553 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 9554 %3 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %1, <8 x i16> %2) 9555 ret <8 x i16> %3 9556 } 9557 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone 9558 9559 define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 9560 ; GENERIC-LABEL: test_pminub: 9561 ; GENERIC: # %bb.0: 9562 ; GENERIC-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] 9563 ; GENERIC-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] 9564 ; GENERIC-NEXT: retq # sched: [1:1.00] 9565 ; 9566 ; ATOM-LABEL: test_pminub: 9567 ; ATOM: # %bb.0: 9568 ; ATOM-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] 9569 ; ATOM-NEXT: pminub (%rdi), %xmm0 # sched: [1:1.00] 9570 ; ATOM-NEXT: nop # sched: [1:0.50] 9571 ; ATOM-NEXT: nop # sched: [1:0.50] 9572 ; ATOM-NEXT: nop # sched: [1:0.50] 9573 ; ATOM-NEXT: nop # sched: [1:0.50] 9574 ; ATOM-NEXT: retq # sched: [79:39.50] 9575 ; 9576 ; SLM-LABEL: test_pminub: 9577 ; SLM: # %bb.0: 9578 ; SLM-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] 9579 ; SLM-NEXT: pminub (%rdi), %xmm0 # sched: [4:1.00] 9580 ; SLM-NEXT: retq # sched: [4:1.00] 9581 ; 9582 ; SANDY-SSE-LABEL: test_pminub: 9583 ; SANDY-SSE: # %bb.0: 9584 ; SANDY-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] 9585 ; SANDY-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] 9586 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 9587 ; 9588 ; SANDY-LABEL: test_pminub: 9589 ; SANDY: # %bb.0: 9590 ; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9591 ; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9592 ; SANDY-NEXT: retq # sched: [1:1.00] 9593 ; 9594 ; HASWELL-SSE-LABEL: test_pminub: 9595 ; HASWELL-SSE: # %bb.0: 9596 ; HASWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] 9597 ; HASWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] 9598 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 9599 ; 9600 ; HASWELL-LABEL: test_pminub: 9601 ; HASWELL: # %bb.0: 9602 ; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9603 ; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9604 ; HASWELL-NEXT: retq # sched: [7:1.00] 9605 ; 9606 ; BROADWELL-SSE-LABEL: test_pminub: 9607 ; BROADWELL-SSE: # %bb.0: 9608 ; BROADWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] 9609 ; BROADWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:0.50] 9610 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 9611 ; 9612 ; BROADWELL-LABEL: test_pminub: 9613 ; BROADWELL: # %bb.0: 9614 ; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9615 ; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 9616 ; BROADWELL-NEXT: retq # sched: [7:1.00] 9617 ; 9618 ; SKYLAKE-SSE-LABEL: test_pminub: 9619 ; SKYLAKE-SSE: # %bb.0: 9620 ; SKYLAKE-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] 9621 ; SKYLAKE-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] 9622 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 9623 ; 9624 ; SKYLAKE-LABEL: test_pminub: 9625 ; SKYLAKE: # %bb.0: 9626 ; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9627 ; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9628 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 9629 ; 9630 ; SKX-SSE-LABEL: test_pminub: 9631 ; SKX-SSE: # %bb.0: 9632 ; SKX-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] 9633 ; SKX-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] 9634 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 9635 ; 9636 ; SKX-LABEL: test_pminub: 9637 ; SKX: # %bb.0: 9638 ; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9639 ; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 9640 ; SKX-NEXT: retq # sched: [7:1.00] 9641 ; 9642 ; BTVER2-SSE-LABEL: test_pminub: 9643 ; BTVER2-SSE: # %bb.0: 9644 ; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] 9645 ; BTVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:1.00] 9646 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 9647 ; 9648 ; BTVER2-LABEL: test_pminub: 9649 ; BTVER2: # %bb.0: 9650 ; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 9651 ; BTVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 9652 ; BTVER2-NEXT: retq # sched: [4:1.00] 9653 ; 9654 ; ZNVER1-SSE-LABEL: test_pminub: 9655 ; ZNVER1-SSE: # %bb.0: 9656 ; ZNVER1-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.25] 9657 ; ZNVER1-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [8:0.50] 9658 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 9659 ; 9660 ; ZNVER1-LABEL: test_pminub: 9661 ; ZNVER1: # %bb.0: 9662 ; ZNVER1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 9663 ; ZNVER1-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 9664 ; ZNVER1-NEXT: retq # sched: [1:0.50] 9665 %1 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) 9666 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 9667 %3 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %1, <16 x i8> %2) 9668 ret <16 x i8> %3 9669 } 9670 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone 9671 9672 define i32 @test_pmovmskb(<16 x i8> %a0) { 9673 ; GENERIC-LABEL: test_pmovmskb: 9674 ; GENERIC: # %bb.0: 9675 ; GENERIC-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] 9676 ; GENERIC-NEXT: retq # sched: [1:1.00] 9677 ; 9678 ; ATOM-LABEL: test_pmovmskb: 9679 ; ATOM: # %bb.0: 9680 ; ATOM-NEXT: pmovmskb %xmm0, %eax # sched: [3:3.00] 9681 ; ATOM-NEXT: nop # sched: [1:0.50] 9682 ; ATOM-NEXT: nop # sched: [1:0.50] 9683 ; ATOM-NEXT: retq # sched: [79:39.50] 9684 ; 9685 ; SLM-LABEL: test_pmovmskb: 9686 ; SLM: # %bb.0: 9687 ; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [4:1.00] 9688 ; SLM-NEXT: retq # sched: [4:1.00] 9689 ; 9690 ; SANDY-SSE-LABEL: test_pmovmskb: 9691 ; SANDY-SSE: # %bb.0: 9692 ; SANDY-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] 9693 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 9694 ; 9695 ; SANDY-LABEL: test_pmovmskb: 9696 ; SANDY: # %bb.0: 9697 ; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] 9698 ; SANDY-NEXT: retq # sched: [1:1.00] 9699 ; 9700 ; HASWELL-SSE-LABEL: test_pmovmskb: 9701 ; HASWELL-SSE: # %bb.0: 9702 ; HASWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] 9703 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 9704 ; 9705 ; HASWELL-LABEL: test_pmovmskb: 9706 ; HASWELL: # %bb.0: 9707 ; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] 9708 ; HASWELL-NEXT: retq # sched: [7:1.00] 9709 ; 9710 ; BROADWELL-SSE-LABEL: test_pmovmskb: 9711 ; BROADWELL-SSE: # %bb.0: 9712 ; BROADWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] 9713 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 9714 ; 9715 ; BROADWELL-LABEL: test_pmovmskb: 9716 ; BROADWELL: # %bb.0: 9717 ; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] 9718 ; BROADWELL-NEXT: retq # sched: [7:1.00] 9719 ; 9720 ; SKYLAKE-SSE-LABEL: test_pmovmskb: 9721 ; SKYLAKE-SSE: # %bb.0: 9722 ; SKYLAKE-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] 9723 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 9724 ; 9725 ; SKYLAKE-LABEL: test_pmovmskb: 9726 ; SKYLAKE: # %bb.0: 9727 ; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] 9728 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 9729 ; 9730 ; SKX-SSE-LABEL: test_pmovmskb: 9731 ; SKX-SSE: # %bb.0: 9732 ; SKX-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] 9733 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 9734 ; 9735 ; SKX-LABEL: test_pmovmskb: 9736 ; SKX: # %bb.0: 9737 ; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] 9738 ; SKX-NEXT: retq # sched: [7:1.00] 9739 ; 9740 ; BTVER2-SSE-LABEL: test_pmovmskb: 9741 ; BTVER2-SSE: # %bb.0: 9742 ; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] 9743 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 9744 ; 9745 ; BTVER2-LABEL: test_pmovmskb: 9746 ; BTVER2: # %bb.0: 9747 ; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] 9748 ; BTVER2-NEXT: retq # sched: [4:1.00] 9749 ; 9750 ; ZNVER1-SSE-LABEL: test_pmovmskb: 9751 ; ZNVER1-SSE: # %bb.0: 9752 ; ZNVER1-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [1:1.00] 9753 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 9754 ; 9755 ; ZNVER1-LABEL: test_pmovmskb: 9756 ; ZNVER1: # %bb.0: 9757 ; ZNVER1-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00] 9758 ; ZNVER1-NEXT: retq # sched: [1:0.50] 9759 %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) 9760 ret i32 %1 9761 } 9762 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 9763 9764 define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 9765 ; GENERIC-LABEL: test_pmulhuw: 9766 ; GENERIC: # %bb.0: 9767 ; GENERIC-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] 9768 ; GENERIC-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] 9769 ; GENERIC-NEXT: retq # sched: [1:1.00] 9770 ; 9771 ; ATOM-LABEL: test_pmulhuw: 9772 ; ATOM: # %bb.0: 9773 ; ATOM-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:5.00] 9774 ; ATOM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [5:5.00] 9775 ; ATOM-NEXT: retq # sched: [79:39.50] 9776 ; 9777 ; SLM-LABEL: test_pmulhuw: 9778 ; SLM: # %bb.0: 9779 ; SLM-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] 9780 ; SLM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] 9781 ; SLM-NEXT: retq # sched: [4:1.00] 9782 ; 9783 ; SANDY-SSE-LABEL: test_pmulhuw: 9784 ; SANDY-SSE: # %bb.0: 9785 ; SANDY-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] 9786 ; SANDY-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] 9787 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 9788 ; 9789 ; SANDY-LABEL: test_pmulhuw: 9790 ; SANDY: # %bb.0: 9791 ; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 9792 ; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 9793 ; SANDY-NEXT: retq # sched: [1:1.00] 9794 ; 9795 ; HASWELL-SSE-LABEL: test_pmulhuw: 9796 ; HASWELL-SSE: # %bb.0: 9797 ; HASWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] 9798 ; HASWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] 9799 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 9800 ; 9801 ; HASWELL-LABEL: test_pmulhuw: 9802 ; HASWELL: # %bb.0: 9803 ; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 9804 ; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 9805 ; HASWELL-NEXT: retq # sched: [7:1.00] 9806 ; 9807 ; BROADWELL-SSE-LABEL: test_pmulhuw: 9808 ; BROADWELL-SSE: # %bb.0: 9809 ; BROADWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] 9810 ; BROADWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:1.00] 9811 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 9812 ; 9813 ; BROADWELL-LABEL: test_pmulhuw: 9814 ; BROADWELL: # %bb.0: 9815 ; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 9816 ; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 9817 ; BROADWELL-NEXT: retq # sched: [7:1.00] 9818 ; 9819 ; SKYLAKE-SSE-LABEL: test_pmulhuw: 9820 ; SKYLAKE-SSE: # %bb.0: 9821 ; SKYLAKE-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] 9822 ; SKYLAKE-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] 9823 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 9824 ; 9825 ; SKYLAKE-LABEL: test_pmulhuw: 9826 ; SKYLAKE: # %bb.0: 9827 ; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 9828 ; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 9829 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 9830 ; 9831 ; SKX-SSE-LABEL: test_pmulhuw: 9832 ; SKX-SSE: # %bb.0: 9833 ; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] 9834 ; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] 9835 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 9836 ; 9837 ; SKX-LABEL: test_pmulhuw: 9838 ; SKX: # %bb.0: 9839 ; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 9840 ; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 9841 ; SKX-NEXT: retq # sched: [7:1.00] 9842 ; 9843 ; BTVER2-SSE-LABEL: test_pmulhuw: 9844 ; BTVER2-SSE: # %bb.0: 9845 ; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00] 9846 ; BTVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] 9847 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 9848 ; 9849 ; BTVER2-LABEL: test_pmulhuw: 9850 ; BTVER2: # %bb.0: 9851 ; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 9852 ; BTVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 9853 ; BTVER2-NEXT: retq # sched: [4:1.00] 9854 ; 9855 ; ZNVER1-SSE-LABEL: test_pmulhuw: 9856 ; ZNVER1-SSE: # %bb.0: 9857 ; ZNVER1-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] 9858 ; ZNVER1-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] 9859 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 9860 ; 9861 ; ZNVER1-LABEL: test_pmulhuw: 9862 ; ZNVER1: # %bb.0: 9863 ; ZNVER1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] 9864 ; ZNVER1-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 9865 ; ZNVER1-NEXT: retq # sched: [1:0.50] 9866 %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) 9867 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 9868 %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2) 9869 ret <8 x i16> %3 9870 } 9871 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 9872 9873 define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 9874 ; GENERIC-LABEL: test_pmulhw: 9875 ; GENERIC: # %bb.0: 9876 ; GENERIC-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] 9877 ; GENERIC-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] 9878 ; GENERIC-NEXT: retq # sched: [1:1.00] 9879 ; 9880 ; ATOM-LABEL: test_pmulhw: 9881 ; ATOM: # %bb.0: 9882 ; ATOM-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:5.00] 9883 ; ATOM-NEXT: pmulhw (%rdi), %xmm0 # sched: [5:5.00] 9884 ; ATOM-NEXT: retq # sched: [79:39.50] 9885 ; 9886 ; SLM-LABEL: test_pmulhw: 9887 ; SLM: # %bb.0: 9888 ; SLM-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] 9889 ; SLM-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] 9890 ; SLM-NEXT: retq # sched: [4:1.00] 9891 ; 9892 ; SANDY-SSE-LABEL: test_pmulhw: 9893 ; SANDY-SSE: # %bb.0: 9894 ; SANDY-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] 9895 ; SANDY-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] 9896 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 9897 ; 9898 ; SANDY-LABEL: test_pmulhw: 9899 ; SANDY: # %bb.0: 9900 ; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 9901 ; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 9902 ; SANDY-NEXT: retq # sched: [1:1.00] 9903 ; 9904 ; HASWELL-SSE-LABEL: test_pmulhw: 9905 ; HASWELL-SSE: # %bb.0: 9906 ; HASWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] 9907 ; HASWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] 9908 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 9909 ; 9910 ; HASWELL-LABEL: test_pmulhw: 9911 ; HASWELL: # %bb.0: 9912 ; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 9913 ; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 9914 ; HASWELL-NEXT: retq # sched: [7:1.00] 9915 ; 9916 ; BROADWELL-SSE-LABEL: test_pmulhw: 9917 ; BROADWELL-SSE: # %bb.0: 9918 ; BROADWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] 9919 ; BROADWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:1.00] 9920 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 9921 ; 9922 ; BROADWELL-LABEL: test_pmulhw: 9923 ; BROADWELL: # %bb.0: 9924 ; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 9925 ; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 9926 ; BROADWELL-NEXT: retq # sched: [7:1.00] 9927 ; 9928 ; SKYLAKE-SSE-LABEL: test_pmulhw: 9929 ; SKYLAKE-SSE: # %bb.0: 9930 ; SKYLAKE-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] 9931 ; SKYLAKE-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] 9932 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 9933 ; 9934 ; SKYLAKE-LABEL: test_pmulhw: 9935 ; SKYLAKE: # %bb.0: 9936 ; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 9937 ; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 9938 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 9939 ; 9940 ; SKX-SSE-LABEL: test_pmulhw: 9941 ; SKX-SSE: # %bb.0: 9942 ; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] 9943 ; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] 9944 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 9945 ; 9946 ; SKX-LABEL: test_pmulhw: 9947 ; SKX: # %bb.0: 9948 ; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 9949 ; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 9950 ; SKX-NEXT: retq # sched: [7:1.00] 9951 ; 9952 ; BTVER2-SSE-LABEL: test_pmulhw: 9953 ; BTVER2-SSE: # %bb.0: 9954 ; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00] 9955 ; BTVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] 9956 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 9957 ; 9958 ; BTVER2-LABEL: test_pmulhw: 9959 ; BTVER2: # %bb.0: 9960 ; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 9961 ; BTVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 9962 ; BTVER2-NEXT: retq # sched: [4:1.00] 9963 ; 9964 ; ZNVER1-SSE-LABEL: test_pmulhw: 9965 ; ZNVER1-SSE: # %bb.0: 9966 ; ZNVER1-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] 9967 ; ZNVER1-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] 9968 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 9969 ; 9970 ; ZNVER1-LABEL: test_pmulhw: 9971 ; ZNVER1: # %bb.0: 9972 ; ZNVER1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] 9973 ; ZNVER1-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 9974 ; ZNVER1-NEXT: retq # sched: [1:0.50] 9975 %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) 9976 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 9977 %3 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %1, <8 x i16> %2) 9978 ret <8 x i16> %3 9979 } 9980 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 9981 9982 define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 9983 ; GENERIC-LABEL: test_pmullw: 9984 ; GENERIC: # %bb.0: 9985 ; GENERIC-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] 9986 ; GENERIC-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] 9987 ; GENERIC-NEXT: retq # sched: [1:1.00] 9988 ; 9989 ; ATOM-LABEL: test_pmullw: 9990 ; ATOM: # %bb.0: 9991 ; ATOM-NEXT: pmullw %xmm1, %xmm0 # sched: [5:5.00] 9992 ; ATOM-NEXT: pmullw (%rdi), %xmm0 # sched: [5:5.00] 9993 ; ATOM-NEXT: retq # sched: [79:39.50] 9994 ; 9995 ; SLM-LABEL: test_pmullw: 9996 ; SLM: # %bb.0: 9997 ; SLM-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] 9998 ; SLM-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] 9999 ; SLM-NEXT: retq # sched: [4:1.00] 10000 ; 10001 ; SANDY-SSE-LABEL: test_pmullw: 10002 ; SANDY-SSE: # %bb.0: 10003 ; SANDY-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] 10004 ; SANDY-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] 10005 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 10006 ; 10007 ; SANDY-LABEL: test_pmullw: 10008 ; SANDY: # %bb.0: 10009 ; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 10010 ; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 10011 ; SANDY-NEXT: retq # sched: [1:1.00] 10012 ; 10013 ; HASWELL-SSE-LABEL: test_pmullw: 10014 ; HASWELL-SSE: # %bb.0: 10015 ; HASWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] 10016 ; HASWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] 10017 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 10018 ; 10019 ; HASWELL-LABEL: test_pmullw: 10020 ; HASWELL: # %bb.0: 10021 ; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 10022 ; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 10023 ; HASWELL-NEXT: retq # sched: [7:1.00] 10024 ; 10025 ; BROADWELL-SSE-LABEL: test_pmullw: 10026 ; BROADWELL-SSE: # %bb.0: 10027 ; BROADWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] 10028 ; BROADWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:1.00] 10029 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 10030 ; 10031 ; BROADWELL-LABEL: test_pmullw: 10032 ; BROADWELL: # %bb.0: 10033 ; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 10034 ; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 10035 ; BROADWELL-NEXT: retq # sched: [7:1.00] 10036 ; 10037 ; SKYLAKE-SSE-LABEL: test_pmullw: 10038 ; SKYLAKE-SSE: # %bb.0: 10039 ; SKYLAKE-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] 10040 ; SKYLAKE-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] 10041 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 10042 ; 10043 ; SKYLAKE-LABEL: test_pmullw: 10044 ; SKYLAKE: # %bb.0: 10045 ; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 10046 ; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 10047 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 10048 ; 10049 ; SKX-SSE-LABEL: test_pmullw: 10050 ; SKX-SSE: # %bb.0: 10051 ; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] 10052 ; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] 10053 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 10054 ; 10055 ; SKX-LABEL: test_pmullw: 10056 ; SKX: # %bb.0: 10057 ; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 10058 ; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 10059 ; SKX-NEXT: retq # sched: [7:1.00] 10060 ; 10061 ; BTVER2-SSE-LABEL: test_pmullw: 10062 ; BTVER2-SSE: # %bb.0: 10063 ; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00] 10064 ; BTVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] 10065 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 10066 ; 10067 ; BTVER2-LABEL: test_pmullw: 10068 ; BTVER2: # %bb.0: 10069 ; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 10070 ; BTVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 10071 ; BTVER2-NEXT: retq # sched: [4:1.00] 10072 ; 10073 ; ZNVER1-SSE-LABEL: test_pmullw: 10074 ; ZNVER1-SSE: # %bb.0: 10075 ; ZNVER1-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] 10076 ; ZNVER1-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] 10077 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 10078 ; 10079 ; ZNVER1-LABEL: test_pmullw: 10080 ; ZNVER1: # %bb.0: 10081 ; ZNVER1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] 10082 ; ZNVER1-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 10083 ; ZNVER1-NEXT: retq # sched: [1:0.50] 10084 %1 = mul <8 x i16> %a0, %a1 10085 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 10086 %3 = mul <8 x i16> %1, %2 10087 ret <8 x i16> %3 10088 } 10089 10090 define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 10091 ; GENERIC-LABEL: test_pmuludq: 10092 ; GENERIC: # %bb.0: 10093 ; GENERIC-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] 10094 ; GENERIC-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] 10095 ; GENERIC-NEXT: retq # sched: [1:1.00] 10096 ; 10097 ; ATOM-LABEL: test_pmuludq: 10098 ; ATOM: # %bb.0: 10099 ; ATOM-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:5.00] 10100 ; ATOM-NEXT: pmuludq (%rdi), %xmm0 # sched: [5:5.00] 10101 ; ATOM-NEXT: retq # sched: [79:39.50] 10102 ; 10103 ; SLM-LABEL: test_pmuludq: 10104 ; SLM: # %bb.0: 10105 ; SLM-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] 10106 ; SLM-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] 10107 ; SLM-NEXT: retq # sched: [4:1.00] 10108 ; 10109 ; SANDY-SSE-LABEL: test_pmuludq: 10110 ; SANDY-SSE: # %bb.0: 10111 ; SANDY-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] 10112 ; SANDY-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] 10113 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 10114 ; 10115 ; SANDY-LABEL: test_pmuludq: 10116 ; SANDY: # %bb.0: 10117 ; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 10118 ; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 10119 ; SANDY-NEXT: retq # sched: [1:1.00] 10120 ; 10121 ; HASWELL-SSE-LABEL: test_pmuludq: 10122 ; HASWELL-SSE: # %bb.0: 10123 ; HASWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] 10124 ; HASWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] 10125 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 10126 ; 10127 ; HASWELL-LABEL: test_pmuludq: 10128 ; HASWELL: # %bb.0: 10129 ; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 10130 ; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 10131 ; HASWELL-NEXT: retq # sched: [7:1.00] 10132 ; 10133 ; BROADWELL-SSE-LABEL: test_pmuludq: 10134 ; BROADWELL-SSE: # %bb.0: 10135 ; BROADWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] 10136 ; BROADWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:1.00] 10137 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 10138 ; 10139 ; BROADWELL-LABEL: test_pmuludq: 10140 ; BROADWELL: # %bb.0: 10141 ; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 10142 ; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 10143 ; BROADWELL-NEXT: retq # sched: [7:1.00] 10144 ; 10145 ; SKYLAKE-SSE-LABEL: test_pmuludq: 10146 ; SKYLAKE-SSE: # %bb.0: 10147 ; SKYLAKE-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] 10148 ; SKYLAKE-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] 10149 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 10150 ; 10151 ; SKYLAKE-LABEL: test_pmuludq: 10152 ; SKYLAKE: # %bb.0: 10153 ; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 10154 ; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 10155 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 10156 ; 10157 ; SKX-SSE-LABEL: test_pmuludq: 10158 ; SKX-SSE: # %bb.0: 10159 ; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] 10160 ; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] 10161 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 10162 ; 10163 ; SKX-LABEL: test_pmuludq: 10164 ; SKX: # %bb.0: 10165 ; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 10166 ; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 10167 ; SKX-NEXT: retq # sched: [7:1.00] 10168 ; 10169 ; BTVER2-SSE-LABEL: test_pmuludq: 10170 ; BTVER2-SSE: # %bb.0: 10171 ; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00] 10172 ; BTVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] 10173 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 10174 ; 10175 ; BTVER2-LABEL: test_pmuludq: 10176 ; BTVER2: # %bb.0: 10177 ; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 10178 ; BTVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 10179 ; BTVER2-NEXT: retq # sched: [4:1.00] 10180 ; 10181 ; ZNVER1-SSE-LABEL: test_pmuludq: 10182 ; ZNVER1-SSE: # %bb.0: 10183 ; ZNVER1-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] 10184 ; ZNVER1-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] 10185 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 10186 ; 10187 ; ZNVER1-LABEL: test_pmuludq: 10188 ; ZNVER1: # %bb.0: 10189 ; ZNVER1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] 10190 ; ZNVER1-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 10191 ; ZNVER1-NEXT: retq # sched: [1:0.50] 10192 %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) 10193 %2 = bitcast <2 x i64> %1 to <4 x i32> 10194 %3 = load <4 x i32>, <4 x i32> *%a2, align 16 10195 %4 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %2, <4 x i32> %3) 10196 ret <2 x i64> %4 10197 } 10198 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone 10199 10200 define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 10201 ; GENERIC-LABEL: test_por: 10202 ; GENERIC: # %bb.0: 10203 ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 10204 ; GENERIC-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] 10205 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 10206 ; GENERIC-NEXT: retq # sched: [1:1.00] 10207 ; 10208 ; ATOM-LABEL: test_por: 10209 ; ATOM: # %bb.0: 10210 ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 10211 ; ATOM-NEXT: por (%rdi), %xmm0 # sched: [1:1.00] 10212 ; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] 10213 ; ATOM-NEXT: retq # sched: [79:39.50] 10214 ; 10215 ; SLM-LABEL: test_por: 10216 ; SLM: # %bb.0: 10217 ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 10218 ; SLM-NEXT: por (%rdi), %xmm0 # sched: [4:1.00] 10219 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 10220 ; SLM-NEXT: retq # sched: [4:1.00] 10221 ; 10222 ; SANDY-SSE-LABEL: test_por: 10223 ; SANDY-SSE: # %bb.0: 10224 ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 10225 ; SANDY-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] 10226 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 10227 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 10228 ; 10229 ; SANDY-LABEL: test_por: 10230 ; SANDY: # %bb.0: 10231 ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10232 ; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 10233 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10234 ; SANDY-NEXT: retq # sched: [1:1.00] 10235 ; 10236 ; HASWELL-SSE-LABEL: test_por: 10237 ; HASWELL-SSE: # %bb.0: 10238 ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 10239 ; HASWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] 10240 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 10241 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 10242 ; 10243 ; HASWELL-LABEL: test_por: 10244 ; HASWELL: # %bb.0: 10245 ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10246 ; HASWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 10247 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10248 ; HASWELL-NEXT: retq # sched: [7:1.00] 10249 ; 10250 ; BROADWELL-SSE-LABEL: test_por: 10251 ; BROADWELL-SSE: # %bb.0: 10252 ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 10253 ; BROADWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:0.50] 10254 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 10255 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 10256 ; 10257 ; BROADWELL-LABEL: test_por: 10258 ; BROADWELL: # %bb.0: 10259 ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10260 ; BROADWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 10261 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10262 ; BROADWELL-NEXT: retq # sched: [7:1.00] 10263 ; 10264 ; SKYLAKE-SSE-LABEL: test_por: 10265 ; SKYLAKE-SSE: # %bb.0: 10266 ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 10267 ; SKYLAKE-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] 10268 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 10269 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 10270 ; 10271 ; SKYLAKE-LABEL: test_por: 10272 ; SKYLAKE: # %bb.0: 10273 ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10274 ; SKYLAKE-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 10275 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10276 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 10277 ; 10278 ; SKX-SSE-LABEL: test_por: 10279 ; SKX-SSE: # %bb.0: 10280 ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 10281 ; SKX-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] 10282 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 10283 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 10284 ; 10285 ; SKX-LABEL: test_por: 10286 ; SKX: # %bb.0: 10287 ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10288 ; SKX-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 10289 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10290 ; SKX-NEXT: retq # sched: [7:1.00] 10291 ; 10292 ; BTVER2-SSE-LABEL: test_por: 10293 ; BTVER2-SSE: # %bb.0: 10294 ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 10295 ; BTVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:1.00] 10296 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 10297 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 10298 ; 10299 ; BTVER2-LABEL: test_por: 10300 ; BTVER2: # %bb.0: 10301 ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10302 ; BTVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 10303 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10304 ; BTVER2-NEXT: retq # sched: [4:1.00] 10305 ; 10306 ; ZNVER1-SSE-LABEL: test_por: 10307 ; ZNVER1-SSE: # %bb.0: 10308 ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] 10309 ; ZNVER1-SSE-NEXT: por (%rdi), %xmm0 # sched: [8:0.50] 10310 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 10311 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 10312 ; 10313 ; ZNVER1-LABEL: test_por: 10314 ; ZNVER1: # %bb.0: 10315 ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 10316 ; ZNVER1-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 10317 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 10318 ; ZNVER1-NEXT: retq # sched: [1:0.50] 10319 %1 = or <2 x i64> %a0, %a1 10320 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 10321 %3 = or <2 x i64> %1, %2 10322 %4 = add <2 x i64> %3, %a1 10323 ret <2 x i64> %4 10324 } 10325 10326 define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 10327 ; GENERIC-LABEL: test_psadbw: 10328 ; GENERIC: # %bb.0: 10329 ; GENERIC-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] 10330 ; GENERIC-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] 10331 ; GENERIC-NEXT: retq # sched: [1:1.00] 10332 ; 10333 ; ATOM-LABEL: test_psadbw: 10334 ; ATOM: # %bb.0: 10335 ; ATOM-NEXT: psadbw %xmm1, %xmm0 # sched: [5:5.00] 10336 ; ATOM-NEXT: psadbw (%rdi), %xmm0 # sched: [5:5.00] 10337 ; ATOM-NEXT: retq # sched: [79:39.50] 10338 ; 10339 ; SLM-LABEL: test_psadbw: 10340 ; SLM: # %bb.0: 10341 ; SLM-NEXT: psadbw %xmm1, %xmm0 # sched: [4:1.00] 10342 ; SLM-NEXT: psadbw (%rdi), %xmm0 # sched: [7:1.00] 10343 ; SLM-NEXT: retq # sched: [4:1.00] 10344 ; 10345 ; SANDY-SSE-LABEL: test_psadbw: 10346 ; SANDY-SSE: # %bb.0: 10347 ; SANDY-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] 10348 ; SANDY-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] 10349 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 10350 ; 10351 ; SANDY-LABEL: test_psadbw: 10352 ; SANDY: # %bb.0: 10353 ; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 10354 ; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 10355 ; SANDY-NEXT: retq # sched: [1:1.00] 10356 ; 10357 ; HASWELL-SSE-LABEL: test_psadbw: 10358 ; HASWELL-SSE: # %bb.0: 10359 ; HASWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] 10360 ; HASWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] 10361 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 10362 ; 10363 ; HASWELL-LABEL: test_psadbw: 10364 ; HASWELL: # %bb.0: 10365 ; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 10366 ; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 10367 ; HASWELL-NEXT: retq # sched: [7:1.00] 10368 ; 10369 ; BROADWELL-SSE-LABEL: test_psadbw: 10370 ; BROADWELL-SSE: # %bb.0: 10371 ; BROADWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] 10372 ; BROADWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00] 10373 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 10374 ; 10375 ; BROADWELL-LABEL: test_psadbw: 10376 ; BROADWELL: # %bb.0: 10377 ; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 10378 ; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 10379 ; BROADWELL-NEXT: retq # sched: [7:1.00] 10380 ; 10381 ; SKYLAKE-SSE-LABEL: test_psadbw: 10382 ; SKYLAKE-SSE: # %bb.0: 10383 ; SKYLAKE-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] 10384 ; SKYLAKE-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] 10385 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 10386 ; 10387 ; SKYLAKE-LABEL: test_psadbw: 10388 ; SKYLAKE: # %bb.0: 10389 ; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 10390 ; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 10391 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 10392 ; 10393 ; SKX-SSE-LABEL: test_psadbw: 10394 ; SKX-SSE: # %bb.0: 10395 ; SKX-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] 10396 ; SKX-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] 10397 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 10398 ; 10399 ; SKX-LABEL: test_psadbw: 10400 ; SKX: # %bb.0: 10401 ; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 10402 ; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 10403 ; SKX-NEXT: retq # sched: [7:1.00] 10404 ; 10405 ; BTVER2-SSE-LABEL: test_psadbw: 10406 ; BTVER2-SSE: # %bb.0: 10407 ; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [2:0.50] 10408 ; BTVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [7:1.00] 10409 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 10410 ; 10411 ; BTVER2-LABEL: test_psadbw: 10412 ; BTVER2: # %bb.0: 10413 ; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] 10414 ; BTVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 10415 ; BTVER2-NEXT: retq # sched: [4:1.00] 10416 ; 10417 ; ZNVER1-SSE-LABEL: test_psadbw: 10418 ; ZNVER1-SSE: # %bb.0: 10419 ; ZNVER1-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] 10420 ; ZNVER1-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00] 10421 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 10422 ; 10423 ; ZNVER1-LABEL: test_psadbw: 10424 ; ZNVER1: # %bb.0: 10425 ; ZNVER1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 10426 ; ZNVER1-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 10427 ; ZNVER1-NEXT: retq # sched: [1:0.50] 10428 %1 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) 10429 %2 = bitcast <2 x i64> %1 to <16 x i8> 10430 %3 = load <16 x i8>, <16 x i8> *%a2, align 16 10431 %4 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %2, <16 x i8> %3) 10432 ret <2 x i64> %4 10433 } 10434 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 10435 10436 define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { 10437 ; GENERIC-LABEL: test_pshufd: 10438 ; GENERIC: # %bb.0: 10439 ; GENERIC-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] 10440 ; GENERIC-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] 10441 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 10442 ; GENERIC-NEXT: retq # sched: [1:1.00] 10443 ; 10444 ; ATOM-LABEL: test_pshufd: 10445 ; ATOM: # %bb.0: 10446 ; ATOM-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] 10447 ; ATOM-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00] 10448 ; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 10449 ; ATOM-NEXT: nop # sched: [1:0.50] 10450 ; ATOM-NEXT: nop # sched: [1:0.50] 10451 ; ATOM-NEXT: retq # sched: [79:39.50] 10452 ; 10453 ; SLM-LABEL: test_pshufd: 10454 ; SLM: # %bb.0: 10455 ; SLM-NEXT: pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [4:1.00] 10456 ; SLM-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] 10457 ; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] 10458 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 10459 ; SLM-NEXT: retq # sched: [4:1.00] 10460 ; 10461 ; SANDY-SSE-LABEL: test_pshufd: 10462 ; SANDY-SSE: # %bb.0: 10463 ; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] 10464 ; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] 10465 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 10466 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 10467 ; 10468 ; SANDY-LABEL: test_pshufd: 10469 ; SANDY: # %bb.0: 10470 ; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] 10471 ; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] 10472 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10473 ; SANDY-NEXT: retq # sched: [1:1.00] 10474 ; 10475 ; HASWELL-SSE-LABEL: test_pshufd: 10476 ; HASWELL-SSE: # %bb.0: 10477 ; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] 10478 ; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] 10479 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 10480 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 10481 ; 10482 ; HASWELL-LABEL: test_pshufd: 10483 ; HASWELL: # %bb.0: 10484 ; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] 10485 ; HASWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] 10486 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10487 ; HASWELL-NEXT: retq # sched: [7:1.00] 10488 ; 10489 ; BROADWELL-SSE-LABEL: test_pshufd: 10490 ; BROADWELL-SSE: # %bb.0: 10491 ; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] 10492 ; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] 10493 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 10494 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 10495 ; 10496 ; BROADWELL-LABEL: test_pshufd: 10497 ; BROADWELL: # %bb.0: 10498 ; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] 10499 ; BROADWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] 10500 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10501 ; BROADWELL-NEXT: retq # sched: [7:1.00] 10502 ; 10503 ; SKYLAKE-SSE-LABEL: test_pshufd: 10504 ; SKYLAKE-SSE: # %bb.0: 10505 ; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] 10506 ; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] 10507 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 10508 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 10509 ; 10510 ; SKYLAKE-LABEL: test_pshufd: 10511 ; SKYLAKE: # %bb.0: 10512 ; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] 10513 ; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] 10514 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10515 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 10516 ; 10517 ; SKX-SSE-LABEL: test_pshufd: 10518 ; SKX-SSE: # %bb.0: 10519 ; SKX-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] 10520 ; SKX-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] 10521 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 10522 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 10523 ; 10524 ; SKX-LABEL: test_pshufd: 10525 ; SKX: # %bb.0: 10526 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] 10527 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] 10528 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10529 ; SKX-NEXT: retq # sched: [7:1.00] 10530 ; 10531 ; BTVER2-SSE-LABEL: test_pshufd: 10532 ; BTVER2-SSE: # %bb.0: 10533 ; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] 10534 ; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] 10535 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 10536 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 10537 ; 10538 ; BTVER2-LABEL: test_pshufd: 10539 ; BTVER2: # %bb.0: 10540 ; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] 10541 ; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] 10542 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10543 ; BTVER2-NEXT: retq # sched: [4:1.00] 10544 ; 10545 ; ZNVER1-SSE-LABEL: test_pshufd: 10546 ; ZNVER1-SSE: # %bb.0: 10547 ; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25] 10548 ; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50] 10549 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 10550 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 10551 ; 10552 ; ZNVER1-LABEL: test_pshufd: 10553 ; ZNVER1: # %bb.0: 10554 ; ZNVER1-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50] 10555 ; ZNVER1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.25] 10556 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 10557 ; ZNVER1-NEXT: retq # sched: [1:0.50] 10558 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 10559 %2 = load <4 x i32>, <4 x i32> *%a1, align 16 10560 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 10561 %4 = add <4 x i32> %1, %3 10562 ret <4 x i32> %4 10563 } 10564 10565 define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { 10566 ; GENERIC-LABEL: test_pshufhw: 10567 ; GENERIC: # %bb.0: 10568 ; GENERIC-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] 10569 ; GENERIC-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] 10570 ; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10571 ; GENERIC-NEXT: retq # sched: [1:1.00] 10572 ; 10573 ; ATOM-LABEL: test_pshufhw: 10574 ; ATOM: # %bb.0: 10575 ; ATOM-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10576 ; ATOM-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] 10577 ; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10578 ; ATOM-NEXT: nop # sched: [1:0.50] 10579 ; ATOM-NEXT: nop # sched: [1:0.50] 10580 ; ATOM-NEXT: retq # sched: [79:39.50] 10581 ; 10582 ; SLM-LABEL: test_pshufhw: 10583 ; SLM: # %bb.0: 10584 ; SLM-NEXT: pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [4:1.00] 10585 ; SLM-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10586 ; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] 10587 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 10588 ; SLM-NEXT: retq # sched: [4:1.00] 10589 ; 10590 ; SANDY-SSE-LABEL: test_pshufhw: 10591 ; SANDY-SSE: # %bb.0: 10592 ; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] 10593 ; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] 10594 ; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10595 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 10596 ; 10597 ; SANDY-LABEL: test_pshufhw: 10598 ; SANDY: # %bb.0: 10599 ; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] 10600 ; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] 10601 ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10602 ; SANDY-NEXT: retq # sched: [1:1.00] 10603 ; 10604 ; HASWELL-SSE-LABEL: test_pshufhw: 10605 ; HASWELL-SSE: # %bb.0: 10606 ; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10607 ; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] 10608 ; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10609 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 10610 ; 10611 ; HASWELL-LABEL: test_pshufhw: 10612 ; HASWELL: # %bb.0: 10613 ; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10614 ; HASWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] 10615 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10616 ; HASWELL-NEXT: retq # sched: [7:1.00] 10617 ; 10618 ; BROADWELL-SSE-LABEL: test_pshufhw: 10619 ; BROADWELL-SSE: # %bb.0: 10620 ; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10621 ; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] 10622 ; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10623 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 10624 ; 10625 ; BROADWELL-LABEL: test_pshufhw: 10626 ; BROADWELL: # %bb.0: 10627 ; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10628 ; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] 10629 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10630 ; BROADWELL-NEXT: retq # sched: [7:1.00] 10631 ; 10632 ; SKYLAKE-SSE-LABEL: test_pshufhw: 10633 ; SKYLAKE-SSE: # %bb.0: 10634 ; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10635 ; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] 10636 ; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 10637 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 10638 ; 10639 ; SKYLAKE-LABEL: test_pshufhw: 10640 ; SKYLAKE: # %bb.0: 10641 ; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10642 ; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] 10643 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10644 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 10645 ; 10646 ; SKX-SSE-LABEL: test_pshufhw: 10647 ; SKX-SSE: # %bb.0: 10648 ; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10649 ; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] 10650 ; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 10651 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 10652 ; 10653 ; SKX-LABEL: test_pshufhw: 10654 ; SKX: # %bb.0: 10655 ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] 10656 ; SKX-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] 10657 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10658 ; SKX-NEXT: retq # sched: [7:1.00] 10659 ; 10660 ; BTVER2-SSE-LABEL: test_pshufhw: 10661 ; BTVER2-SSE: # %bb.0: 10662 ; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] 10663 ; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] 10664 ; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10665 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 10666 ; 10667 ; BTVER2-LABEL: test_pshufhw: 10668 ; BTVER2: # %bb.0: 10669 ; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] 10670 ; BTVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] 10671 ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10672 ; BTVER2-NEXT: retq # sched: [4:1.00] 10673 ; 10674 ; ZNVER1-SSE-LABEL: test_pshufhw: 10675 ; ZNVER1-SSE: # %bb.0: 10676 ; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25] 10677 ; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] 10678 ; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] 10679 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 10680 ; 10681 ; ZNVER1-LABEL: test_pshufhw: 10682 ; ZNVER1: # %bb.0: 10683 ; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] 10684 ; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25] 10685 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 10686 ; ZNVER1-NEXT: retq # sched: [1:0.50] 10687 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6> 10688 %2 = load <8 x i16>, <8 x i16> *%a1, align 16 10689 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4> 10690 %4 = add <8 x i16> %1, %3 10691 ret <8 x i16> %4 10692 } 10693 10694 define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { 10695 ; GENERIC-LABEL: test_pshuflw: 10696 ; GENERIC: # %bb.0: 10697 ; GENERIC-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] 10698 ; GENERIC-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] 10699 ; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10700 ; GENERIC-NEXT: retq # sched: [1:1.00] 10701 ; 10702 ; ATOM-LABEL: test_pshuflw: 10703 ; ATOM: # %bb.0: 10704 ; ATOM-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10705 ; ATOM-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] 10706 ; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10707 ; ATOM-NEXT: nop # sched: [1:0.50] 10708 ; ATOM-NEXT: nop # sched: [1:0.50] 10709 ; ATOM-NEXT: retq # sched: [79:39.50] 10710 ; 10711 ; SLM-LABEL: test_pshuflw: 10712 ; SLM: # %bb.0: 10713 ; SLM-NEXT: pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [4:1.00] 10714 ; SLM-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10715 ; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] 10716 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 10717 ; SLM-NEXT: retq # sched: [4:1.00] 10718 ; 10719 ; SANDY-SSE-LABEL: test_pshuflw: 10720 ; SANDY-SSE: # %bb.0: 10721 ; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] 10722 ; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] 10723 ; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10724 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 10725 ; 10726 ; SANDY-LABEL: test_pshuflw: 10727 ; SANDY: # %bb.0: 10728 ; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] 10729 ; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] 10730 ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10731 ; SANDY-NEXT: retq # sched: [1:1.00] 10732 ; 10733 ; HASWELL-SSE-LABEL: test_pshuflw: 10734 ; HASWELL-SSE: # %bb.0: 10735 ; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10736 ; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] 10737 ; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10738 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 10739 ; 10740 ; HASWELL-LABEL: test_pshuflw: 10741 ; HASWELL: # %bb.0: 10742 ; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10743 ; HASWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] 10744 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10745 ; HASWELL-NEXT: retq # sched: [7:1.00] 10746 ; 10747 ; BROADWELL-SSE-LABEL: test_pshuflw: 10748 ; BROADWELL-SSE: # %bb.0: 10749 ; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10750 ; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] 10751 ; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10752 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 10753 ; 10754 ; BROADWELL-LABEL: test_pshuflw: 10755 ; BROADWELL: # %bb.0: 10756 ; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10757 ; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] 10758 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10759 ; BROADWELL-NEXT: retq # sched: [7:1.00] 10760 ; 10761 ; SKYLAKE-SSE-LABEL: test_pshuflw: 10762 ; SKYLAKE-SSE: # %bb.0: 10763 ; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10764 ; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] 10765 ; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 10766 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 10767 ; 10768 ; SKYLAKE-LABEL: test_pshuflw: 10769 ; SKYLAKE: # %bb.0: 10770 ; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10771 ; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] 10772 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10773 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 10774 ; 10775 ; SKX-SSE-LABEL: test_pshuflw: 10776 ; SKX-SSE: # %bb.0: 10777 ; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10778 ; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] 10779 ; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] 10780 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 10781 ; 10782 ; SKX-LABEL: test_pshuflw: 10783 ; SKX: # %bb.0: 10784 ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] 10785 ; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] 10786 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 10787 ; SKX-NEXT: retq # sched: [7:1.00] 10788 ; 10789 ; BTVER2-SSE-LABEL: test_pshuflw: 10790 ; BTVER2-SSE: # %bb.0: 10791 ; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] 10792 ; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] 10793 ; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] 10794 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 10795 ; 10796 ; BTVER2-LABEL: test_pshuflw: 10797 ; BTVER2: # %bb.0: 10798 ; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] 10799 ; BTVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] 10800 ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10801 ; BTVER2-NEXT: retq # sched: [4:1.00] 10802 ; 10803 ; ZNVER1-SSE-LABEL: test_pshuflw: 10804 ; ZNVER1-SSE: # %bb.0: 10805 ; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25] 10806 ; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] 10807 ; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] 10808 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 10809 ; 10810 ; ZNVER1-LABEL: test_pshuflw: 10811 ; ZNVER1: # %bb.0: 10812 ; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] 10813 ; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25] 10814 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 10815 ; ZNVER1-NEXT: retq # sched: [1:0.50] 10816 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> 10817 %2 = load <8 x i16>, <8 x i16> *%a1, align 16 10818 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 10819 %4 = add <8 x i16> %1, %3 10820 ret <8 x i16> %4 10821 } 10822 10823 define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 10824 ; GENERIC-LABEL: test_pslld: 10825 ; GENERIC: # %bb.0: 10826 ; GENERIC-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] 10827 ; GENERIC-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] 10828 ; GENERIC-NEXT: pslld $2, %xmm0 # sched: [1:1.00] 10829 ; GENERIC-NEXT: retq # sched: [1:1.00] 10830 ; 10831 ; ATOM-LABEL: test_pslld: 10832 ; ATOM: # %bb.0: 10833 ; ATOM-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] 10834 ; ATOM-NEXT: pslld (%rdi), %xmm0 # sched: [3:1.50] 10835 ; ATOM-NEXT: pslld $2, %xmm0 # sched: [1:0.50] 10836 ; ATOM-NEXT: retq # sched: [79:39.50] 10837 ; 10838 ; SLM-LABEL: test_pslld: 10839 ; SLM: # %bb.0: 10840 ; SLM-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00] 10841 ; SLM-NEXT: pslld (%rdi), %xmm0 # sched: [4:1.00] 10842 ; SLM-NEXT: pslld $2, %xmm0 # sched: [1:1.00] 10843 ; SLM-NEXT: retq # sched: [4:1.00] 10844 ; 10845 ; SANDY-SSE-LABEL: test_pslld: 10846 ; SANDY-SSE: # %bb.0: 10847 ; SANDY-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] 10848 ; SANDY-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] 10849 ; SANDY-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] 10850 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 10851 ; 10852 ; SANDY-LABEL: test_pslld: 10853 ; SANDY: # %bb.0: 10854 ; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 10855 ; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 10856 ; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] 10857 ; SANDY-NEXT: retq # sched: [1:1.00] 10858 ; 10859 ; HASWELL-SSE-LABEL: test_pslld: 10860 ; HASWELL-SSE: # %bb.0: 10861 ; HASWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] 10862 ; HASWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] 10863 ; HASWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] 10864 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 10865 ; 10866 ; HASWELL-LABEL: test_pslld: 10867 ; HASWELL: # %bb.0: 10868 ; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 10869 ; HASWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 10870 ; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] 10871 ; HASWELL-NEXT: retq # sched: [7:1.00] 10872 ; 10873 ; BROADWELL-SSE-LABEL: test_pslld: 10874 ; BROADWELL-SSE: # %bb.0: 10875 ; BROADWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] 10876 ; BROADWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00] 10877 ; BROADWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] 10878 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 10879 ; 10880 ; BROADWELL-LABEL: test_pslld: 10881 ; BROADWELL: # %bb.0: 10882 ; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 10883 ; BROADWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 10884 ; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] 10885 ; BROADWELL-NEXT: retq # sched: [7:1.00] 10886 ; 10887 ; SKYLAKE-SSE-LABEL: test_pslld: 10888 ; SKYLAKE-SSE: # %bb.0: 10889 ; SKYLAKE-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] 10890 ; SKYLAKE-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] 10891 ; SKYLAKE-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] 10892 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 10893 ; 10894 ; SKYLAKE-LABEL: test_pslld: 10895 ; SKYLAKE: # %bb.0: 10896 ; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 10897 ; SKYLAKE-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 10898 ; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] 10899 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 10900 ; 10901 ; SKX-SSE-LABEL: test_pslld: 10902 ; SKX-SSE: # %bb.0: 10903 ; SKX-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] 10904 ; SKX-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] 10905 ; SKX-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] 10906 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 10907 ; 10908 ; SKX-LABEL: test_pslld: 10909 ; SKX: # %bb.0: 10910 ; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 10911 ; SKX-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 10912 ; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] 10913 ; SKX-NEXT: retq # sched: [7:1.00] 10914 ; 10915 ; BTVER2-SSE-LABEL: test_pslld: 10916 ; BTVER2-SSE: # %bb.0: 10917 ; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:0.50] 10918 ; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [6:1.00] 10919 ; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] 10920 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 10921 ; 10922 ; BTVER2-LABEL: test_pslld: 10923 ; BTVER2: # %bb.0: 10924 ; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 10925 ; BTVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 10926 ; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] 10927 ; BTVER2-NEXT: retq # sched: [4:1.00] 10928 ; 10929 ; ZNVER1-SSE-LABEL: test_pslld: 10930 ; ZNVER1-SSE: # %bb.0: 10931 ; ZNVER1-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00] 10932 ; ZNVER1-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] 10933 ; ZNVER1-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.25] 10934 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 10935 ; 10936 ; ZNVER1-LABEL: test_pslld: 10937 ; ZNVER1: # %bb.0: 10938 ; ZNVER1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 10939 ; ZNVER1-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 10940 ; ZNVER1-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.25] 10941 ; ZNVER1-NEXT: retq # sched: [1:0.50] 10942 %1 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) 10943 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 10944 %3 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %1, <4 x i32> %2) 10945 %4 = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %3, i32 2) 10946 ret <4 x i32> %4 10947 } 10948 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 10949 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 10950 10951 define <4 x i32> @test_pslldq(<4 x i32> %a0) { 10952 ; GENERIC-LABEL: test_pslldq: 10953 ; GENERIC: # %bb.0: 10954 ; GENERIC-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] 10955 ; GENERIC-NEXT: retq # sched: [1:1.00] 10956 ; 10957 ; ATOM-LABEL: test_pslldq: 10958 ; ATOM: # %bb.0: 10959 ; ATOM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] 10960 ; ATOM-NEXT: nop # sched: [1:0.50] 10961 ; ATOM-NEXT: nop # sched: [1:0.50] 10962 ; ATOM-NEXT: nop # sched: [1:0.50] 10963 ; ATOM-NEXT: nop # sched: [1:0.50] 10964 ; ATOM-NEXT: nop # sched: [1:0.50] 10965 ; ATOM-NEXT: nop # sched: [1:0.50] 10966 ; ATOM-NEXT: retq # sched: [79:39.50] 10967 ; 10968 ; SLM-LABEL: test_pslldq: 10969 ; SLM: # %bb.0: 10970 ; SLM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 10971 ; SLM-NEXT: retq # sched: [4:1.00] 10972 ; 10973 ; SANDY-SSE-LABEL: test_pslldq: 10974 ; SANDY-SSE: # %bb.0: 10975 ; SANDY-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] 10976 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 10977 ; 10978 ; SANDY-LABEL: test_pslldq: 10979 ; SANDY: # %bb.0: 10980 ; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] 10981 ; SANDY-NEXT: retq # sched: [1:1.00] 10982 ; 10983 ; HASWELL-SSE-LABEL: test_pslldq: 10984 ; HASWELL-SSE: # %bb.0: 10985 ; HASWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 10986 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 10987 ; 10988 ; HASWELL-LABEL: test_pslldq: 10989 ; HASWELL: # %bb.0: 10990 ; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 10991 ; HASWELL-NEXT: retq # sched: [7:1.00] 10992 ; 10993 ; BROADWELL-SSE-LABEL: test_pslldq: 10994 ; BROADWELL-SSE: # %bb.0: 10995 ; BROADWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 10996 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 10997 ; 10998 ; BROADWELL-LABEL: test_pslldq: 10999 ; BROADWELL: # %bb.0: 11000 ; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 11001 ; BROADWELL-NEXT: retq # sched: [7:1.00] 11002 ; 11003 ; SKYLAKE-SSE-LABEL: test_pslldq: 11004 ; SKYLAKE-SSE: # %bb.0: 11005 ; SKYLAKE-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 11006 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 11007 ; 11008 ; SKYLAKE-LABEL: test_pslldq: 11009 ; SKYLAKE: # %bb.0: 11010 ; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 11011 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 11012 ; 11013 ; SKX-SSE-LABEL: test_pslldq: 11014 ; SKX-SSE: # %bb.0: 11015 ; SKX-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 11016 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 11017 ; 11018 ; SKX-LABEL: test_pslldq: 11019 ; SKX: # %bb.0: 11020 ; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 11021 ; SKX-NEXT: retq # sched: [7:1.00] 11022 ; 11023 ; BTVER2-SSE-LABEL: test_pslldq: 11024 ; BTVER2-SSE: # %bb.0: 11025 ; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] 11026 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 11027 ; 11028 ; BTVER2-LABEL: test_pslldq: 11029 ; BTVER2: # %bb.0: 11030 ; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] 11031 ; BTVER2-NEXT: retq # sched: [4:1.00] 11032 ; 11033 ; ZNVER1-SSE-LABEL: test_pslldq: 11034 ; ZNVER1-SSE: # %bb.0: 11035 ; ZNVER1-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 11036 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 11037 ; 11038 ; ZNVER1-LABEL: test_pslldq: 11039 ; ZNVER1: # %bb.0: 11040 ; ZNVER1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] 11041 ; ZNVER1-NEXT: retq # sched: [1:0.50] 11042 %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 1, i32 2> 11043 ret <4 x i32> %1 11044 } 11045 11046 define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 11047 ; GENERIC-LABEL: test_psllq: 11048 ; GENERIC: # %bb.0: 11049 ; GENERIC-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] 11050 ; GENERIC-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] 11051 ; GENERIC-NEXT: psllq $2, %xmm0 # sched: [1:1.00] 11052 ; GENERIC-NEXT: retq # sched: [1:1.00] 11053 ; 11054 ; ATOM-LABEL: test_psllq: 11055 ; ATOM: # %bb.0: 11056 ; ATOM-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] 11057 ; ATOM-NEXT: psllq (%rdi), %xmm0 # sched: [3:1.50] 11058 ; ATOM-NEXT: psllq $2, %xmm0 # sched: [1:0.50] 11059 ; ATOM-NEXT: retq # sched: [79:39.50] 11060 ; 11061 ; SLM-LABEL: test_psllq: 11062 ; SLM: # %bb.0: 11063 ; SLM-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00] 11064 ; SLM-NEXT: psllq (%rdi), %xmm0 # sched: [4:1.00] 11065 ; SLM-NEXT: psllq $2, %xmm0 # sched: [1:1.00] 11066 ; SLM-NEXT: retq # sched: [4:1.00] 11067 ; 11068 ; SANDY-SSE-LABEL: test_psllq: 11069 ; SANDY-SSE: # %bb.0: 11070 ; SANDY-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] 11071 ; SANDY-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] 11072 ; SANDY-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] 11073 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 11074 ; 11075 ; SANDY-LABEL: test_psllq: 11076 ; SANDY: # %bb.0: 11077 ; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11078 ; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11079 ; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] 11080 ; SANDY-NEXT: retq # sched: [1:1.00] 11081 ; 11082 ; HASWELL-SSE-LABEL: test_psllq: 11083 ; HASWELL-SSE: # %bb.0: 11084 ; HASWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] 11085 ; HASWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] 11086 ; HASWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] 11087 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 11088 ; 11089 ; HASWELL-LABEL: test_psllq: 11090 ; HASWELL: # %bb.0: 11091 ; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11092 ; HASWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11093 ; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] 11094 ; HASWELL-NEXT: retq # sched: [7:1.00] 11095 ; 11096 ; BROADWELL-SSE-LABEL: test_psllq: 11097 ; BROADWELL-SSE: # %bb.0: 11098 ; BROADWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] 11099 ; BROADWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00] 11100 ; BROADWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] 11101 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 11102 ; 11103 ; BROADWELL-LABEL: test_psllq: 11104 ; BROADWELL: # %bb.0: 11105 ; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11106 ; BROADWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 11107 ; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] 11108 ; BROADWELL-NEXT: retq # sched: [7:1.00] 11109 ; 11110 ; SKYLAKE-SSE-LABEL: test_psllq: 11111 ; SKYLAKE-SSE: # %bb.0: 11112 ; SKYLAKE-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] 11113 ; SKYLAKE-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] 11114 ; SKYLAKE-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] 11115 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 11116 ; 11117 ; SKYLAKE-LABEL: test_psllq: 11118 ; SKYLAKE: # %bb.0: 11119 ; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11120 ; SKYLAKE-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11121 ; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] 11122 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 11123 ; 11124 ; SKX-SSE-LABEL: test_psllq: 11125 ; SKX-SSE: # %bb.0: 11126 ; SKX-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] 11127 ; SKX-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] 11128 ; SKX-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] 11129 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 11130 ; 11131 ; SKX-LABEL: test_psllq: 11132 ; SKX: # %bb.0: 11133 ; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11134 ; SKX-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11135 ; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] 11136 ; SKX-NEXT: retq # sched: [7:1.00] 11137 ; 11138 ; BTVER2-SSE-LABEL: test_psllq: 11139 ; BTVER2-SSE: # %bb.0: 11140 ; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:0.50] 11141 ; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [6:1.00] 11142 ; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] 11143 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 11144 ; 11145 ; BTVER2-LABEL: test_psllq: 11146 ; BTVER2: # %bb.0: 11147 ; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 11148 ; BTVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 11149 ; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] 11150 ; BTVER2-NEXT: retq # sched: [4:1.00] 11151 ; 11152 ; ZNVER1-SSE-LABEL: test_psllq: 11153 ; ZNVER1-SSE: # %bb.0: 11154 ; ZNVER1-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00] 11155 ; ZNVER1-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] 11156 ; ZNVER1-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.25] 11157 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 11158 ; 11159 ; ZNVER1-LABEL: test_psllq: 11160 ; ZNVER1: # %bb.0: 11161 ; ZNVER1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 11162 ; ZNVER1-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11163 ; ZNVER1-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.25] 11164 ; ZNVER1-NEXT: retq # sched: [1:0.50] 11165 %1 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) 11166 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 11167 %3 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %1, <2 x i64> %2) 11168 %4 = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %3, i32 2) 11169 ret <2 x i64> %4 11170 } 11171 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 11172 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 11173 11174 define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 11175 ; GENERIC-LABEL: test_psllw: 11176 ; GENERIC: # %bb.0: 11177 ; GENERIC-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] 11178 ; GENERIC-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] 11179 ; GENERIC-NEXT: psllw $2, %xmm0 # sched: [1:1.00] 11180 ; GENERIC-NEXT: retq # sched: [1:1.00] 11181 ; 11182 ; ATOM-LABEL: test_psllw: 11183 ; ATOM: # %bb.0: 11184 ; ATOM-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] 11185 ; ATOM-NEXT: psllw (%rdi), %xmm0 # sched: [3:1.50] 11186 ; ATOM-NEXT: psllw $2, %xmm0 # sched: [1:0.50] 11187 ; ATOM-NEXT: retq # sched: [79:39.50] 11188 ; 11189 ; SLM-LABEL: test_psllw: 11190 ; SLM: # %bb.0: 11191 ; SLM-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00] 11192 ; SLM-NEXT: psllw (%rdi), %xmm0 # sched: [4:1.00] 11193 ; SLM-NEXT: psllw $2, %xmm0 # sched: [1:1.00] 11194 ; SLM-NEXT: retq # sched: [4:1.00] 11195 ; 11196 ; SANDY-SSE-LABEL: test_psllw: 11197 ; SANDY-SSE: # %bb.0: 11198 ; SANDY-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] 11199 ; SANDY-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] 11200 ; SANDY-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] 11201 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 11202 ; 11203 ; SANDY-LABEL: test_psllw: 11204 ; SANDY: # %bb.0: 11205 ; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11206 ; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11207 ; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] 11208 ; SANDY-NEXT: retq # sched: [1:1.00] 11209 ; 11210 ; HASWELL-SSE-LABEL: test_psllw: 11211 ; HASWELL-SSE: # %bb.0: 11212 ; HASWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] 11213 ; HASWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] 11214 ; HASWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] 11215 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 11216 ; 11217 ; HASWELL-LABEL: test_psllw: 11218 ; HASWELL: # %bb.0: 11219 ; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11220 ; HASWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11221 ; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] 11222 ; HASWELL-NEXT: retq # sched: [7:1.00] 11223 ; 11224 ; BROADWELL-SSE-LABEL: test_psllw: 11225 ; BROADWELL-SSE: # %bb.0: 11226 ; BROADWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] 11227 ; BROADWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00] 11228 ; BROADWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] 11229 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 11230 ; 11231 ; BROADWELL-LABEL: test_psllw: 11232 ; BROADWELL: # %bb.0: 11233 ; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11234 ; BROADWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 11235 ; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] 11236 ; BROADWELL-NEXT: retq # sched: [7:1.00] 11237 ; 11238 ; SKYLAKE-SSE-LABEL: test_psllw: 11239 ; SKYLAKE-SSE: # %bb.0: 11240 ; SKYLAKE-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] 11241 ; SKYLAKE-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] 11242 ; SKYLAKE-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] 11243 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 11244 ; 11245 ; SKYLAKE-LABEL: test_psllw: 11246 ; SKYLAKE: # %bb.0: 11247 ; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11248 ; SKYLAKE-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11249 ; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] 11250 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 11251 ; 11252 ; SKX-SSE-LABEL: test_psllw: 11253 ; SKX-SSE: # %bb.0: 11254 ; SKX-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] 11255 ; SKX-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] 11256 ; SKX-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] 11257 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 11258 ; 11259 ; SKX-LABEL: test_psllw: 11260 ; SKX: # %bb.0: 11261 ; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11262 ; SKX-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11263 ; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] 11264 ; SKX-NEXT: retq # sched: [7:1.00] 11265 ; 11266 ; BTVER2-SSE-LABEL: test_psllw: 11267 ; BTVER2-SSE: # %bb.0: 11268 ; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:0.50] 11269 ; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [6:1.00] 11270 ; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] 11271 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 11272 ; 11273 ; BTVER2-LABEL: test_psllw: 11274 ; BTVER2: # %bb.0: 11275 ; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 11276 ; BTVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 11277 ; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] 11278 ; BTVER2-NEXT: retq # sched: [4:1.00] 11279 ; 11280 ; ZNVER1-SSE-LABEL: test_psllw: 11281 ; ZNVER1-SSE: # %bb.0: 11282 ; ZNVER1-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00] 11283 ; ZNVER1-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] 11284 ; ZNVER1-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.25] 11285 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 11286 ; 11287 ; ZNVER1-LABEL: test_psllw: 11288 ; ZNVER1: # %bb.0: 11289 ; ZNVER1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 11290 ; ZNVER1-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11291 ; ZNVER1-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.25] 11292 ; ZNVER1-NEXT: retq # sched: [1:0.50] 11293 %1 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) 11294 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 11295 %3 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %1, <8 x i16> %2) 11296 %4 = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %3, i32 2) 11297 ret <8 x i16> %4 11298 } 11299 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 11300 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 11301 11302 define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 11303 ; GENERIC-LABEL: test_psrad: 11304 ; GENERIC: # %bb.0: 11305 ; GENERIC-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] 11306 ; GENERIC-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] 11307 ; GENERIC-NEXT: psrad $2, %xmm0 # sched: [1:1.00] 11308 ; GENERIC-NEXT: retq # sched: [1:1.00] 11309 ; 11310 ; ATOM-LABEL: test_psrad: 11311 ; ATOM: # %bb.0: 11312 ; ATOM-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] 11313 ; ATOM-NEXT: psrad (%rdi), %xmm0 # sched: [3:1.50] 11314 ; ATOM-NEXT: psrad $2, %xmm0 # sched: [1:0.50] 11315 ; ATOM-NEXT: retq # sched: [79:39.50] 11316 ; 11317 ; SLM-LABEL: test_psrad: 11318 ; SLM: # %bb.0: 11319 ; SLM-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00] 11320 ; SLM-NEXT: psrad (%rdi), %xmm0 # sched: [4:1.00] 11321 ; SLM-NEXT: psrad $2, %xmm0 # sched: [1:1.00] 11322 ; SLM-NEXT: retq # sched: [4:1.00] 11323 ; 11324 ; SANDY-SSE-LABEL: test_psrad: 11325 ; SANDY-SSE: # %bb.0: 11326 ; SANDY-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] 11327 ; SANDY-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] 11328 ; SANDY-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] 11329 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 11330 ; 11331 ; SANDY-LABEL: test_psrad: 11332 ; SANDY: # %bb.0: 11333 ; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11334 ; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11335 ; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] 11336 ; SANDY-NEXT: retq # sched: [1:1.00] 11337 ; 11338 ; HASWELL-SSE-LABEL: test_psrad: 11339 ; HASWELL-SSE: # %bb.0: 11340 ; HASWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] 11341 ; HASWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] 11342 ; HASWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] 11343 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 11344 ; 11345 ; HASWELL-LABEL: test_psrad: 11346 ; HASWELL: # %bb.0: 11347 ; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11348 ; HASWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11349 ; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] 11350 ; HASWELL-NEXT: retq # sched: [7:1.00] 11351 ; 11352 ; BROADWELL-SSE-LABEL: test_psrad: 11353 ; BROADWELL-SSE: # %bb.0: 11354 ; BROADWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] 11355 ; BROADWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00] 11356 ; BROADWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] 11357 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 11358 ; 11359 ; BROADWELL-LABEL: test_psrad: 11360 ; BROADWELL: # %bb.0: 11361 ; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11362 ; BROADWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 11363 ; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] 11364 ; BROADWELL-NEXT: retq # sched: [7:1.00] 11365 ; 11366 ; SKYLAKE-SSE-LABEL: test_psrad: 11367 ; SKYLAKE-SSE: # %bb.0: 11368 ; SKYLAKE-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] 11369 ; SKYLAKE-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] 11370 ; SKYLAKE-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] 11371 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 11372 ; 11373 ; SKYLAKE-LABEL: test_psrad: 11374 ; SKYLAKE: # %bb.0: 11375 ; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11376 ; SKYLAKE-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11377 ; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] 11378 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 11379 ; 11380 ; SKX-SSE-LABEL: test_psrad: 11381 ; SKX-SSE: # %bb.0: 11382 ; SKX-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] 11383 ; SKX-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] 11384 ; SKX-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] 11385 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 11386 ; 11387 ; SKX-LABEL: test_psrad: 11388 ; SKX: # %bb.0: 11389 ; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11390 ; SKX-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11391 ; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] 11392 ; SKX-NEXT: retq # sched: [7:1.00] 11393 ; 11394 ; BTVER2-SSE-LABEL: test_psrad: 11395 ; BTVER2-SSE: # %bb.0: 11396 ; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:0.50] 11397 ; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [6:1.00] 11398 ; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] 11399 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 11400 ; 11401 ; BTVER2-LABEL: test_psrad: 11402 ; BTVER2: # %bb.0: 11403 ; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 11404 ; BTVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 11405 ; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] 11406 ; BTVER2-NEXT: retq # sched: [4:1.00] 11407 ; 11408 ; ZNVER1-SSE-LABEL: test_psrad: 11409 ; ZNVER1-SSE: # %bb.0: 11410 ; ZNVER1-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00] 11411 ; ZNVER1-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] 11412 ; ZNVER1-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.25] 11413 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 11414 ; 11415 ; ZNVER1-LABEL: test_psrad: 11416 ; ZNVER1: # %bb.0: 11417 ; ZNVER1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 11418 ; ZNVER1-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11419 ; ZNVER1-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.25] 11420 ; ZNVER1-NEXT: retq # sched: [1:0.50] 11421 %1 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) 11422 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 11423 %3 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> %2) 11424 %4 = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2) 11425 ret <4 x i32> %4 11426 } 11427 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 11428 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 11429 11430 define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 11431 ; GENERIC-LABEL: test_psraw: 11432 ; GENERIC: # %bb.0: 11433 ; GENERIC-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] 11434 ; GENERIC-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] 11435 ; GENERIC-NEXT: psraw $2, %xmm0 # sched: [1:1.00] 11436 ; GENERIC-NEXT: retq # sched: [1:1.00] 11437 ; 11438 ; ATOM-LABEL: test_psraw: 11439 ; ATOM: # %bb.0: 11440 ; ATOM-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] 11441 ; ATOM-NEXT: psraw (%rdi), %xmm0 # sched: [3:1.50] 11442 ; ATOM-NEXT: psraw $2, %xmm0 # sched: [1:0.50] 11443 ; ATOM-NEXT: retq # sched: [79:39.50] 11444 ; 11445 ; SLM-LABEL: test_psraw: 11446 ; SLM: # %bb.0: 11447 ; SLM-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00] 11448 ; SLM-NEXT: psraw (%rdi), %xmm0 # sched: [4:1.00] 11449 ; SLM-NEXT: psraw $2, %xmm0 # sched: [1:1.00] 11450 ; SLM-NEXT: retq # sched: [4:1.00] 11451 ; 11452 ; SANDY-SSE-LABEL: test_psraw: 11453 ; SANDY-SSE: # %bb.0: 11454 ; SANDY-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] 11455 ; SANDY-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] 11456 ; SANDY-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] 11457 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 11458 ; 11459 ; SANDY-LABEL: test_psraw: 11460 ; SANDY: # %bb.0: 11461 ; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11462 ; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11463 ; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] 11464 ; SANDY-NEXT: retq # sched: [1:1.00] 11465 ; 11466 ; HASWELL-SSE-LABEL: test_psraw: 11467 ; HASWELL-SSE: # %bb.0: 11468 ; HASWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] 11469 ; HASWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] 11470 ; HASWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] 11471 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 11472 ; 11473 ; HASWELL-LABEL: test_psraw: 11474 ; HASWELL: # %bb.0: 11475 ; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11476 ; HASWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11477 ; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] 11478 ; HASWELL-NEXT: retq # sched: [7:1.00] 11479 ; 11480 ; BROADWELL-SSE-LABEL: test_psraw: 11481 ; BROADWELL-SSE: # %bb.0: 11482 ; BROADWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] 11483 ; BROADWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00] 11484 ; BROADWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] 11485 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 11486 ; 11487 ; BROADWELL-LABEL: test_psraw: 11488 ; BROADWELL: # %bb.0: 11489 ; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11490 ; BROADWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 11491 ; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] 11492 ; BROADWELL-NEXT: retq # sched: [7:1.00] 11493 ; 11494 ; SKYLAKE-SSE-LABEL: test_psraw: 11495 ; SKYLAKE-SSE: # %bb.0: 11496 ; SKYLAKE-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] 11497 ; SKYLAKE-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] 11498 ; SKYLAKE-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] 11499 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 11500 ; 11501 ; SKYLAKE-LABEL: test_psraw: 11502 ; SKYLAKE: # %bb.0: 11503 ; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11504 ; SKYLAKE-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11505 ; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] 11506 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 11507 ; 11508 ; SKX-SSE-LABEL: test_psraw: 11509 ; SKX-SSE: # %bb.0: 11510 ; SKX-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] 11511 ; SKX-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] 11512 ; SKX-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] 11513 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 11514 ; 11515 ; SKX-LABEL: test_psraw: 11516 ; SKX: # %bb.0: 11517 ; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11518 ; SKX-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11519 ; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] 11520 ; SKX-NEXT: retq # sched: [7:1.00] 11521 ; 11522 ; BTVER2-SSE-LABEL: test_psraw: 11523 ; BTVER2-SSE: # %bb.0: 11524 ; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:0.50] 11525 ; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [6:1.00] 11526 ; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] 11527 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 11528 ; 11529 ; BTVER2-LABEL: test_psraw: 11530 ; BTVER2: # %bb.0: 11531 ; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 11532 ; BTVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 11533 ; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] 11534 ; BTVER2-NEXT: retq # sched: [4:1.00] 11535 ; 11536 ; ZNVER1-SSE-LABEL: test_psraw: 11537 ; ZNVER1-SSE: # %bb.0: 11538 ; ZNVER1-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00] 11539 ; ZNVER1-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] 11540 ; ZNVER1-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.25] 11541 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 11542 ; 11543 ; ZNVER1-LABEL: test_psraw: 11544 ; ZNVER1: # %bb.0: 11545 ; ZNVER1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 11546 ; ZNVER1-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11547 ; ZNVER1-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.25] 11548 ; ZNVER1-NEXT: retq # sched: [1:0.50] 11549 %1 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) 11550 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 11551 %3 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> %2) 11552 %4 = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2) 11553 ret <8 x i16> %4 11554 } 11555 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 11556 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 11557 11558 define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 11559 ; GENERIC-LABEL: test_psrld: 11560 ; GENERIC: # %bb.0: 11561 ; GENERIC-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] 11562 ; GENERIC-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] 11563 ; GENERIC-NEXT: psrld $2, %xmm0 # sched: [1:1.00] 11564 ; GENERIC-NEXT: retq # sched: [1:1.00] 11565 ; 11566 ; ATOM-LABEL: test_psrld: 11567 ; ATOM: # %bb.0: 11568 ; ATOM-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] 11569 ; ATOM-NEXT: psrld (%rdi), %xmm0 # sched: [3:1.50] 11570 ; ATOM-NEXT: psrld $2, %xmm0 # sched: [1:0.50] 11571 ; ATOM-NEXT: retq # sched: [79:39.50] 11572 ; 11573 ; SLM-LABEL: test_psrld: 11574 ; SLM: # %bb.0: 11575 ; SLM-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00] 11576 ; SLM-NEXT: psrld (%rdi), %xmm0 # sched: [4:1.00] 11577 ; SLM-NEXT: psrld $2, %xmm0 # sched: [1:1.00] 11578 ; SLM-NEXT: retq # sched: [4:1.00] 11579 ; 11580 ; SANDY-SSE-LABEL: test_psrld: 11581 ; SANDY-SSE: # %bb.0: 11582 ; SANDY-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] 11583 ; SANDY-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] 11584 ; SANDY-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] 11585 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 11586 ; 11587 ; SANDY-LABEL: test_psrld: 11588 ; SANDY: # %bb.0: 11589 ; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11590 ; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11591 ; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] 11592 ; SANDY-NEXT: retq # sched: [1:1.00] 11593 ; 11594 ; HASWELL-SSE-LABEL: test_psrld: 11595 ; HASWELL-SSE: # %bb.0: 11596 ; HASWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] 11597 ; HASWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] 11598 ; HASWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] 11599 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 11600 ; 11601 ; HASWELL-LABEL: test_psrld: 11602 ; HASWELL: # %bb.0: 11603 ; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11604 ; HASWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11605 ; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] 11606 ; HASWELL-NEXT: retq # sched: [7:1.00] 11607 ; 11608 ; BROADWELL-SSE-LABEL: test_psrld: 11609 ; BROADWELL-SSE: # %bb.0: 11610 ; BROADWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] 11611 ; BROADWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00] 11612 ; BROADWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] 11613 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 11614 ; 11615 ; BROADWELL-LABEL: test_psrld: 11616 ; BROADWELL: # %bb.0: 11617 ; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11618 ; BROADWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 11619 ; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] 11620 ; BROADWELL-NEXT: retq # sched: [7:1.00] 11621 ; 11622 ; SKYLAKE-SSE-LABEL: test_psrld: 11623 ; SKYLAKE-SSE: # %bb.0: 11624 ; SKYLAKE-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] 11625 ; SKYLAKE-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] 11626 ; SKYLAKE-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] 11627 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 11628 ; 11629 ; SKYLAKE-LABEL: test_psrld: 11630 ; SKYLAKE: # %bb.0: 11631 ; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11632 ; SKYLAKE-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11633 ; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] 11634 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 11635 ; 11636 ; SKX-SSE-LABEL: test_psrld: 11637 ; SKX-SSE: # %bb.0: 11638 ; SKX-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] 11639 ; SKX-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] 11640 ; SKX-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] 11641 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 11642 ; 11643 ; SKX-LABEL: test_psrld: 11644 ; SKX: # %bb.0: 11645 ; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11646 ; SKX-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11647 ; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] 11648 ; SKX-NEXT: retq # sched: [7:1.00] 11649 ; 11650 ; BTVER2-SSE-LABEL: test_psrld: 11651 ; BTVER2-SSE: # %bb.0: 11652 ; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:0.50] 11653 ; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [6:1.00] 11654 ; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] 11655 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 11656 ; 11657 ; BTVER2-LABEL: test_psrld: 11658 ; BTVER2: # %bb.0: 11659 ; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 11660 ; BTVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 11661 ; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] 11662 ; BTVER2-NEXT: retq # sched: [4:1.00] 11663 ; 11664 ; ZNVER1-SSE-LABEL: test_psrld: 11665 ; ZNVER1-SSE: # %bb.0: 11666 ; ZNVER1-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00] 11667 ; ZNVER1-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] 11668 ; ZNVER1-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.25] 11669 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 11670 ; 11671 ; ZNVER1-LABEL: test_psrld: 11672 ; ZNVER1: # %bb.0: 11673 ; ZNVER1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 11674 ; ZNVER1-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11675 ; ZNVER1-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.25] 11676 ; ZNVER1-NEXT: retq # sched: [1:0.50] 11677 %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) 11678 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 11679 %3 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %1, <4 x i32> %2) 11680 %4 = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %3, i32 2) 11681 ret <4 x i32> %4 11682 } 11683 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 11684 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 11685 11686 define <4 x i32> @test_psrldq(<4 x i32> %a0) { 11687 ; GENERIC-LABEL: test_psrldq: 11688 ; GENERIC: # %bb.0: 11689 ; GENERIC-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] 11690 ; GENERIC-NEXT: retq # sched: [1:1.00] 11691 ; 11692 ; ATOM-LABEL: test_psrldq: 11693 ; ATOM: # %bb.0: 11694 ; ATOM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] 11695 ; ATOM-NEXT: nop # sched: [1:0.50] 11696 ; ATOM-NEXT: nop # sched: [1:0.50] 11697 ; ATOM-NEXT: nop # sched: [1:0.50] 11698 ; ATOM-NEXT: nop # sched: [1:0.50] 11699 ; ATOM-NEXT: nop # sched: [1:0.50] 11700 ; ATOM-NEXT: nop # sched: [1:0.50] 11701 ; ATOM-NEXT: retq # sched: [79:39.50] 11702 ; 11703 ; SLM-LABEL: test_psrldq: 11704 ; SLM: # %bb.0: 11705 ; SLM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11706 ; SLM-NEXT: retq # sched: [4:1.00] 11707 ; 11708 ; SANDY-SSE-LABEL: test_psrldq: 11709 ; SANDY-SSE: # %bb.0: 11710 ; SANDY-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] 11711 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 11712 ; 11713 ; SANDY-LABEL: test_psrldq: 11714 ; SANDY: # %bb.0: 11715 ; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] 11716 ; SANDY-NEXT: retq # sched: [1:1.00] 11717 ; 11718 ; HASWELL-SSE-LABEL: test_psrldq: 11719 ; HASWELL-SSE: # %bb.0: 11720 ; HASWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11721 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 11722 ; 11723 ; HASWELL-LABEL: test_psrldq: 11724 ; HASWELL: # %bb.0: 11725 ; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11726 ; HASWELL-NEXT: retq # sched: [7:1.00] 11727 ; 11728 ; BROADWELL-SSE-LABEL: test_psrldq: 11729 ; BROADWELL-SSE: # %bb.0: 11730 ; BROADWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11731 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 11732 ; 11733 ; BROADWELL-LABEL: test_psrldq: 11734 ; BROADWELL: # %bb.0: 11735 ; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11736 ; BROADWELL-NEXT: retq # sched: [7:1.00] 11737 ; 11738 ; SKYLAKE-SSE-LABEL: test_psrldq: 11739 ; SKYLAKE-SSE: # %bb.0: 11740 ; SKYLAKE-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11741 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 11742 ; 11743 ; SKYLAKE-LABEL: test_psrldq: 11744 ; SKYLAKE: # %bb.0: 11745 ; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11746 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 11747 ; 11748 ; SKX-SSE-LABEL: test_psrldq: 11749 ; SKX-SSE: # %bb.0: 11750 ; SKX-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11751 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 11752 ; 11753 ; SKX-LABEL: test_psrldq: 11754 ; SKX: # %bb.0: 11755 ; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11756 ; SKX-NEXT: retq # sched: [7:1.00] 11757 ; 11758 ; BTVER2-SSE-LABEL: test_psrldq: 11759 ; BTVER2-SSE: # %bb.0: 11760 ; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] 11761 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 11762 ; 11763 ; BTVER2-LABEL: test_psrldq: 11764 ; BTVER2: # %bb.0: 11765 ; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] 11766 ; BTVER2-NEXT: retq # sched: [4:1.00] 11767 ; 11768 ; ZNVER1-SSE-LABEL: test_psrldq: 11769 ; ZNVER1-SSE: # %bb.0: 11770 ; ZNVER1-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11771 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 11772 ; 11773 ; ZNVER1-LABEL: test_psrldq: 11774 ; ZNVER1: # %bb.0: 11775 ; ZNVER1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] 11776 ; ZNVER1-NEXT: retq # sched: [1:0.50] 11777 %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 11778 ret <4 x i32> %1 11779 } 11780 11781 define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 11782 ; GENERIC-LABEL: test_psrlq: 11783 ; GENERIC: # %bb.0: 11784 ; GENERIC-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] 11785 ; GENERIC-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] 11786 ; GENERIC-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] 11787 ; GENERIC-NEXT: retq # sched: [1:1.00] 11788 ; 11789 ; ATOM-LABEL: test_psrlq: 11790 ; ATOM: # %bb.0: 11791 ; ATOM-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] 11792 ; ATOM-NEXT: psrlq (%rdi), %xmm0 # sched: [3:1.50] 11793 ; ATOM-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] 11794 ; ATOM-NEXT: retq # sched: [79:39.50] 11795 ; 11796 ; SLM-LABEL: test_psrlq: 11797 ; SLM: # %bb.0: 11798 ; SLM-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00] 11799 ; SLM-NEXT: psrlq (%rdi), %xmm0 # sched: [4:1.00] 11800 ; SLM-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] 11801 ; SLM-NEXT: retq # sched: [4:1.00] 11802 ; 11803 ; SANDY-SSE-LABEL: test_psrlq: 11804 ; SANDY-SSE: # %bb.0: 11805 ; SANDY-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] 11806 ; SANDY-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] 11807 ; SANDY-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] 11808 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 11809 ; 11810 ; SANDY-LABEL: test_psrlq: 11811 ; SANDY: # %bb.0: 11812 ; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11813 ; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11814 ; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] 11815 ; SANDY-NEXT: retq # sched: [1:1.00] 11816 ; 11817 ; HASWELL-SSE-LABEL: test_psrlq: 11818 ; HASWELL-SSE: # %bb.0: 11819 ; HASWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] 11820 ; HASWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] 11821 ; HASWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] 11822 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 11823 ; 11824 ; HASWELL-LABEL: test_psrlq: 11825 ; HASWELL: # %bb.0: 11826 ; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11827 ; HASWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11828 ; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] 11829 ; HASWELL-NEXT: retq # sched: [7:1.00] 11830 ; 11831 ; BROADWELL-SSE-LABEL: test_psrlq: 11832 ; BROADWELL-SSE: # %bb.0: 11833 ; BROADWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] 11834 ; BROADWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00] 11835 ; BROADWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] 11836 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 11837 ; 11838 ; BROADWELL-LABEL: test_psrlq: 11839 ; BROADWELL: # %bb.0: 11840 ; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11841 ; BROADWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 11842 ; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] 11843 ; BROADWELL-NEXT: retq # sched: [7:1.00] 11844 ; 11845 ; SKYLAKE-SSE-LABEL: test_psrlq: 11846 ; SKYLAKE-SSE: # %bb.0: 11847 ; SKYLAKE-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] 11848 ; SKYLAKE-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] 11849 ; SKYLAKE-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] 11850 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 11851 ; 11852 ; SKYLAKE-LABEL: test_psrlq: 11853 ; SKYLAKE: # %bb.0: 11854 ; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11855 ; SKYLAKE-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11856 ; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] 11857 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 11858 ; 11859 ; SKX-SSE-LABEL: test_psrlq: 11860 ; SKX-SSE: # %bb.0: 11861 ; SKX-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] 11862 ; SKX-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] 11863 ; SKX-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] 11864 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 11865 ; 11866 ; SKX-LABEL: test_psrlq: 11867 ; SKX: # %bb.0: 11868 ; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11869 ; SKX-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11870 ; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] 11871 ; SKX-NEXT: retq # sched: [7:1.00] 11872 ; 11873 ; BTVER2-SSE-LABEL: test_psrlq: 11874 ; BTVER2-SSE: # %bb.0: 11875 ; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:0.50] 11876 ; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [6:1.00] 11877 ; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] 11878 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 11879 ; 11880 ; BTVER2-LABEL: test_psrlq: 11881 ; BTVER2: # %bb.0: 11882 ; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 11883 ; BTVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 11884 ; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] 11885 ; BTVER2-NEXT: retq # sched: [4:1.00] 11886 ; 11887 ; ZNVER1-SSE-LABEL: test_psrlq: 11888 ; ZNVER1-SSE: # %bb.0: 11889 ; ZNVER1-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00] 11890 ; ZNVER1-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] 11891 ; ZNVER1-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.25] 11892 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 11893 ; 11894 ; ZNVER1-LABEL: test_psrlq: 11895 ; ZNVER1: # %bb.0: 11896 ; ZNVER1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 11897 ; ZNVER1-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11898 ; ZNVER1-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.25] 11899 ; ZNVER1-NEXT: retq # sched: [1:0.50] 11900 %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) 11901 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 11902 %3 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %1, <2 x i64> %2) 11903 %4 = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %3, i32 2) 11904 ret <2 x i64> %4 11905 } 11906 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 11907 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 11908 11909 define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 11910 ; GENERIC-LABEL: test_psrlw: 11911 ; GENERIC: # %bb.0: 11912 ; GENERIC-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] 11913 ; GENERIC-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] 11914 ; GENERIC-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] 11915 ; GENERIC-NEXT: retq # sched: [1:1.00] 11916 ; 11917 ; ATOM-LABEL: test_psrlw: 11918 ; ATOM: # %bb.0: 11919 ; ATOM-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] 11920 ; ATOM-NEXT: psrlw (%rdi), %xmm0 # sched: [3:1.50] 11921 ; ATOM-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] 11922 ; ATOM-NEXT: retq # sched: [79:39.50] 11923 ; 11924 ; SLM-LABEL: test_psrlw: 11925 ; SLM: # %bb.0: 11926 ; SLM-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00] 11927 ; SLM-NEXT: psrlw (%rdi), %xmm0 # sched: [4:1.00] 11928 ; SLM-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] 11929 ; SLM-NEXT: retq # sched: [4:1.00] 11930 ; 11931 ; SANDY-SSE-LABEL: test_psrlw: 11932 ; SANDY-SSE: # %bb.0: 11933 ; SANDY-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] 11934 ; SANDY-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] 11935 ; SANDY-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] 11936 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 11937 ; 11938 ; SANDY-LABEL: test_psrlw: 11939 ; SANDY: # %bb.0: 11940 ; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11941 ; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11942 ; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] 11943 ; SANDY-NEXT: retq # sched: [1:1.00] 11944 ; 11945 ; HASWELL-SSE-LABEL: test_psrlw: 11946 ; HASWELL-SSE: # %bb.0: 11947 ; HASWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] 11948 ; HASWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] 11949 ; HASWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] 11950 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 11951 ; 11952 ; HASWELL-LABEL: test_psrlw: 11953 ; HASWELL: # %bb.0: 11954 ; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11955 ; HASWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 11956 ; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] 11957 ; HASWELL-NEXT: retq # sched: [7:1.00] 11958 ; 11959 ; BROADWELL-SSE-LABEL: test_psrlw: 11960 ; BROADWELL-SSE: # %bb.0: 11961 ; BROADWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] 11962 ; BROADWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00] 11963 ; BROADWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] 11964 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 11965 ; 11966 ; BROADWELL-LABEL: test_psrlw: 11967 ; BROADWELL: # %bb.0: 11968 ; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11969 ; BROADWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 11970 ; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] 11971 ; BROADWELL-NEXT: retq # sched: [7:1.00] 11972 ; 11973 ; SKYLAKE-SSE-LABEL: test_psrlw: 11974 ; SKYLAKE-SSE: # %bb.0: 11975 ; SKYLAKE-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] 11976 ; SKYLAKE-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] 11977 ; SKYLAKE-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] 11978 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 11979 ; 11980 ; SKYLAKE-LABEL: test_psrlw: 11981 ; SKYLAKE: # %bb.0: 11982 ; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11983 ; SKYLAKE-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11984 ; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] 11985 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 11986 ; 11987 ; SKX-SSE-LABEL: test_psrlw: 11988 ; SKX-SSE: # %bb.0: 11989 ; SKX-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] 11990 ; SKX-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] 11991 ; SKX-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] 11992 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 11993 ; 11994 ; SKX-LABEL: test_psrlw: 11995 ; SKX: # %bb.0: 11996 ; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 11997 ; SKX-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 11998 ; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] 11999 ; SKX-NEXT: retq # sched: [7:1.00] 12000 ; 12001 ; BTVER2-SSE-LABEL: test_psrlw: 12002 ; BTVER2-SSE: # %bb.0: 12003 ; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:0.50] 12004 ; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [6:1.00] 12005 ; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] 12006 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 12007 ; 12008 ; BTVER2-LABEL: test_psrlw: 12009 ; BTVER2: # %bb.0: 12010 ; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12011 ; BTVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 12012 ; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] 12013 ; BTVER2-NEXT: retq # sched: [4:1.00] 12014 ; 12015 ; ZNVER1-SSE-LABEL: test_psrlw: 12016 ; ZNVER1-SSE: # %bb.0: 12017 ; ZNVER1-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00] 12018 ; ZNVER1-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] 12019 ; ZNVER1-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.25] 12020 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 12021 ; 12022 ; ZNVER1-LABEL: test_psrlw: 12023 ; ZNVER1: # %bb.0: 12024 ; ZNVER1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 12025 ; ZNVER1-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 12026 ; ZNVER1-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.25] 12027 ; ZNVER1-NEXT: retq # sched: [1:0.50] 12028 %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) 12029 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 12030 %3 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %1, <8 x i16> %2) 12031 %4 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %3, i32 2) 12032 ret <8 x i16> %4 12033 } 12034 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 12035 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 12036 12037 define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 12038 ; GENERIC-LABEL: test_psubb: 12039 ; GENERIC: # %bb.0: 12040 ; GENERIC-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] 12041 ; GENERIC-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] 12042 ; GENERIC-NEXT: retq # sched: [1:1.00] 12043 ; 12044 ; ATOM-LABEL: test_psubb: 12045 ; ATOM: # %bb.0: 12046 ; ATOM-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] 12047 ; ATOM-NEXT: psubb (%rdi), %xmm0 # sched: [1:1.00] 12048 ; ATOM-NEXT: nop # sched: [1:0.50] 12049 ; ATOM-NEXT: nop # sched: [1:0.50] 12050 ; ATOM-NEXT: nop # sched: [1:0.50] 12051 ; ATOM-NEXT: nop # sched: [1:0.50] 12052 ; ATOM-NEXT: retq # sched: [79:39.50] 12053 ; 12054 ; SLM-LABEL: test_psubb: 12055 ; SLM: # %bb.0: 12056 ; SLM-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] 12057 ; SLM-NEXT: psubb (%rdi), %xmm0 # sched: [4:1.00] 12058 ; SLM-NEXT: retq # sched: [4:1.00] 12059 ; 12060 ; SANDY-SSE-LABEL: test_psubb: 12061 ; SANDY-SSE: # %bb.0: 12062 ; SANDY-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] 12063 ; SANDY-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] 12064 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 12065 ; 12066 ; SANDY-LABEL: test_psubb: 12067 ; SANDY: # %bb.0: 12068 ; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12069 ; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12070 ; SANDY-NEXT: retq # sched: [1:1.00] 12071 ; 12072 ; HASWELL-SSE-LABEL: test_psubb: 12073 ; HASWELL-SSE: # %bb.0: 12074 ; HASWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] 12075 ; HASWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] 12076 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 12077 ; 12078 ; HASWELL-LABEL: test_psubb: 12079 ; HASWELL: # %bb.0: 12080 ; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12081 ; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12082 ; HASWELL-NEXT: retq # sched: [7:1.00] 12083 ; 12084 ; BROADWELL-SSE-LABEL: test_psubb: 12085 ; BROADWELL-SSE: # %bb.0: 12086 ; BROADWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] 12087 ; BROADWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:0.50] 12088 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 12089 ; 12090 ; BROADWELL-LABEL: test_psubb: 12091 ; BROADWELL: # %bb.0: 12092 ; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12093 ; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 12094 ; BROADWELL-NEXT: retq # sched: [7:1.00] 12095 ; 12096 ; SKYLAKE-SSE-LABEL: test_psubb: 12097 ; SKYLAKE-SSE: # %bb.0: 12098 ; SKYLAKE-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] 12099 ; SKYLAKE-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] 12100 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 12101 ; 12102 ; SKYLAKE-LABEL: test_psubb: 12103 ; SKYLAKE: # %bb.0: 12104 ; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 12105 ; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12106 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 12107 ; 12108 ; SKX-SSE-LABEL: test_psubb: 12109 ; SKX-SSE: # %bb.0: 12110 ; SKX-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] 12111 ; SKX-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] 12112 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 12113 ; 12114 ; SKX-LABEL: test_psubb: 12115 ; SKX: # %bb.0: 12116 ; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 12117 ; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12118 ; SKX-NEXT: retq # sched: [7:1.00] 12119 ; 12120 ; BTVER2-SSE-LABEL: test_psubb: 12121 ; BTVER2-SSE: # %bb.0: 12122 ; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] 12123 ; BTVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:1.00] 12124 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 12125 ; 12126 ; BTVER2-LABEL: test_psubb: 12127 ; BTVER2: # %bb.0: 12128 ; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12129 ; BTVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 12130 ; BTVER2-NEXT: retq # sched: [4:1.00] 12131 ; 12132 ; ZNVER1-SSE-LABEL: test_psubb: 12133 ; ZNVER1-SSE: # %bb.0: 12134 ; ZNVER1-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.25] 12135 ; ZNVER1-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [8:0.50] 12136 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 12137 ; 12138 ; ZNVER1-LABEL: test_psubb: 12139 ; ZNVER1: # %bb.0: 12140 ; ZNVER1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 12141 ; ZNVER1-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 12142 ; ZNVER1-NEXT: retq # sched: [1:0.50] 12143 %1 = sub <16 x i8> %a0, %a1 12144 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 12145 %3 = sub <16 x i8> %1, %2 12146 ret <16 x i8> %3 12147 } 12148 12149 define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 12150 ; GENERIC-LABEL: test_psubd: 12151 ; GENERIC: # %bb.0: 12152 ; GENERIC-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] 12153 ; GENERIC-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] 12154 ; GENERIC-NEXT: retq # sched: [1:1.00] 12155 ; 12156 ; ATOM-LABEL: test_psubd: 12157 ; ATOM: # %bb.0: 12158 ; ATOM-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] 12159 ; ATOM-NEXT: psubd (%rdi), %xmm0 # sched: [1:1.00] 12160 ; ATOM-NEXT: nop # sched: [1:0.50] 12161 ; ATOM-NEXT: nop # sched: [1:0.50] 12162 ; ATOM-NEXT: nop # sched: [1:0.50] 12163 ; ATOM-NEXT: nop # sched: [1:0.50] 12164 ; ATOM-NEXT: retq # sched: [79:39.50] 12165 ; 12166 ; SLM-LABEL: test_psubd: 12167 ; SLM: # %bb.0: 12168 ; SLM-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] 12169 ; SLM-NEXT: psubd (%rdi), %xmm0 # sched: [4:1.00] 12170 ; SLM-NEXT: retq # sched: [4:1.00] 12171 ; 12172 ; SANDY-SSE-LABEL: test_psubd: 12173 ; SANDY-SSE: # %bb.0: 12174 ; SANDY-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] 12175 ; SANDY-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] 12176 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 12177 ; 12178 ; SANDY-LABEL: test_psubd: 12179 ; SANDY: # %bb.0: 12180 ; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12181 ; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12182 ; SANDY-NEXT: retq # sched: [1:1.00] 12183 ; 12184 ; HASWELL-SSE-LABEL: test_psubd: 12185 ; HASWELL-SSE: # %bb.0: 12186 ; HASWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] 12187 ; HASWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] 12188 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 12189 ; 12190 ; HASWELL-LABEL: test_psubd: 12191 ; HASWELL: # %bb.0: 12192 ; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12193 ; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12194 ; HASWELL-NEXT: retq # sched: [7:1.00] 12195 ; 12196 ; BROADWELL-SSE-LABEL: test_psubd: 12197 ; BROADWELL-SSE: # %bb.0: 12198 ; BROADWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] 12199 ; BROADWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:0.50] 12200 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 12201 ; 12202 ; BROADWELL-LABEL: test_psubd: 12203 ; BROADWELL: # %bb.0: 12204 ; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12205 ; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 12206 ; BROADWELL-NEXT: retq # sched: [7:1.00] 12207 ; 12208 ; SKYLAKE-SSE-LABEL: test_psubd: 12209 ; SKYLAKE-SSE: # %bb.0: 12210 ; SKYLAKE-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] 12211 ; SKYLAKE-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] 12212 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 12213 ; 12214 ; SKYLAKE-LABEL: test_psubd: 12215 ; SKYLAKE: # %bb.0: 12216 ; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 12217 ; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12218 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 12219 ; 12220 ; SKX-SSE-LABEL: test_psubd: 12221 ; SKX-SSE: # %bb.0: 12222 ; SKX-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] 12223 ; SKX-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] 12224 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 12225 ; 12226 ; SKX-LABEL: test_psubd: 12227 ; SKX: # %bb.0: 12228 ; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 12229 ; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12230 ; SKX-NEXT: retq # sched: [7:1.00] 12231 ; 12232 ; BTVER2-SSE-LABEL: test_psubd: 12233 ; BTVER2-SSE: # %bb.0: 12234 ; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] 12235 ; BTVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:1.00] 12236 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 12237 ; 12238 ; BTVER2-LABEL: test_psubd: 12239 ; BTVER2: # %bb.0: 12240 ; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12241 ; BTVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 12242 ; BTVER2-NEXT: retq # sched: [4:1.00] 12243 ; 12244 ; ZNVER1-SSE-LABEL: test_psubd: 12245 ; ZNVER1-SSE: # %bb.0: 12246 ; ZNVER1-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.25] 12247 ; ZNVER1-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [8:0.50] 12248 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 12249 ; 12250 ; ZNVER1-LABEL: test_psubd: 12251 ; ZNVER1: # %bb.0: 12252 ; ZNVER1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 12253 ; ZNVER1-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 12254 ; ZNVER1-NEXT: retq # sched: [1:0.50] 12255 %1 = sub <4 x i32> %a0, %a1 12256 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 12257 %3 = sub <4 x i32> %1, %2 12258 ret <4 x i32> %3 12259 } 12260 12261 define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 12262 ; GENERIC-LABEL: test_psubq: 12263 ; GENERIC: # %bb.0: 12264 ; GENERIC-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] 12265 ; GENERIC-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] 12266 ; GENERIC-NEXT: retq # sched: [1:1.00] 12267 ; 12268 ; ATOM-LABEL: test_psubq: 12269 ; ATOM: # %bb.0: 12270 ; ATOM-NEXT: psubq %xmm1, %xmm0 # sched: [2:1.00] 12271 ; ATOM-NEXT: psubq (%rdi), %xmm0 # sched: [3:1.50] 12272 ; ATOM-NEXT: retq # sched: [79:39.50] 12273 ; 12274 ; SLM-LABEL: test_psubq: 12275 ; SLM: # %bb.0: 12276 ; SLM-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] 12277 ; SLM-NEXT: psubq (%rdi), %xmm0 # sched: [4:1.00] 12278 ; SLM-NEXT: retq # sched: [4:1.00] 12279 ; 12280 ; SANDY-SSE-LABEL: test_psubq: 12281 ; SANDY-SSE: # %bb.0: 12282 ; SANDY-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] 12283 ; SANDY-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] 12284 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 12285 ; 12286 ; SANDY-LABEL: test_psubq: 12287 ; SANDY: # %bb.0: 12288 ; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12289 ; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12290 ; SANDY-NEXT: retq # sched: [1:1.00] 12291 ; 12292 ; HASWELL-SSE-LABEL: test_psubq: 12293 ; HASWELL-SSE: # %bb.0: 12294 ; HASWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] 12295 ; HASWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] 12296 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 12297 ; 12298 ; HASWELL-LABEL: test_psubq: 12299 ; HASWELL: # %bb.0: 12300 ; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12301 ; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12302 ; HASWELL-NEXT: retq # sched: [7:1.00] 12303 ; 12304 ; BROADWELL-SSE-LABEL: test_psubq: 12305 ; BROADWELL-SSE: # %bb.0: 12306 ; BROADWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] 12307 ; BROADWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:0.50] 12308 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 12309 ; 12310 ; BROADWELL-LABEL: test_psubq: 12311 ; BROADWELL: # %bb.0: 12312 ; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12313 ; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 12314 ; BROADWELL-NEXT: retq # sched: [7:1.00] 12315 ; 12316 ; SKYLAKE-SSE-LABEL: test_psubq: 12317 ; SKYLAKE-SSE: # %bb.0: 12318 ; SKYLAKE-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] 12319 ; SKYLAKE-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] 12320 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 12321 ; 12322 ; SKYLAKE-LABEL: test_psubq: 12323 ; SKYLAKE: # %bb.0: 12324 ; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 12325 ; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12326 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 12327 ; 12328 ; SKX-SSE-LABEL: test_psubq: 12329 ; SKX-SSE: # %bb.0: 12330 ; SKX-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] 12331 ; SKX-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] 12332 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 12333 ; 12334 ; SKX-LABEL: test_psubq: 12335 ; SKX: # %bb.0: 12336 ; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 12337 ; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12338 ; SKX-NEXT: retq # sched: [7:1.00] 12339 ; 12340 ; BTVER2-SSE-LABEL: test_psubq: 12341 ; BTVER2-SSE: # %bb.0: 12342 ; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] 12343 ; BTVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:1.00] 12344 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 12345 ; 12346 ; BTVER2-LABEL: test_psubq: 12347 ; BTVER2: # %bb.0: 12348 ; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12349 ; BTVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 12350 ; BTVER2-NEXT: retq # sched: [4:1.00] 12351 ; 12352 ; ZNVER1-SSE-LABEL: test_psubq: 12353 ; ZNVER1-SSE: # %bb.0: 12354 ; ZNVER1-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.25] 12355 ; ZNVER1-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [8:0.50] 12356 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 12357 ; 12358 ; ZNVER1-LABEL: test_psubq: 12359 ; ZNVER1: # %bb.0: 12360 ; ZNVER1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 12361 ; ZNVER1-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 12362 ; ZNVER1-NEXT: retq # sched: [1:0.50] 12363 %1 = sub <2 x i64> %a0, %a1 12364 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 12365 %3 = sub <2 x i64> %1, %2 12366 ret <2 x i64> %3 12367 } 12368 12369 define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 12370 ; GENERIC-LABEL: test_psubsb: 12371 ; GENERIC: # %bb.0: 12372 ; GENERIC-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] 12373 ; GENERIC-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] 12374 ; GENERIC-NEXT: retq # sched: [1:1.00] 12375 ; 12376 ; ATOM-LABEL: test_psubsb: 12377 ; ATOM: # %bb.0: 12378 ; ATOM-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] 12379 ; ATOM-NEXT: psubsb (%rdi), %xmm0 # sched: [1:1.00] 12380 ; ATOM-NEXT: nop # sched: [1:0.50] 12381 ; ATOM-NEXT: nop # sched: [1:0.50] 12382 ; ATOM-NEXT: nop # sched: [1:0.50] 12383 ; ATOM-NEXT: nop # sched: [1:0.50] 12384 ; ATOM-NEXT: retq # sched: [79:39.50] 12385 ; 12386 ; SLM-LABEL: test_psubsb: 12387 ; SLM: # %bb.0: 12388 ; SLM-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] 12389 ; SLM-NEXT: psubsb (%rdi), %xmm0 # sched: [4:1.00] 12390 ; SLM-NEXT: retq # sched: [4:1.00] 12391 ; 12392 ; SANDY-SSE-LABEL: test_psubsb: 12393 ; SANDY-SSE: # %bb.0: 12394 ; SANDY-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] 12395 ; SANDY-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] 12396 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 12397 ; 12398 ; SANDY-LABEL: test_psubsb: 12399 ; SANDY: # %bb.0: 12400 ; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12401 ; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12402 ; SANDY-NEXT: retq # sched: [1:1.00] 12403 ; 12404 ; HASWELL-SSE-LABEL: test_psubsb: 12405 ; HASWELL-SSE: # %bb.0: 12406 ; HASWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] 12407 ; HASWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] 12408 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 12409 ; 12410 ; HASWELL-LABEL: test_psubsb: 12411 ; HASWELL: # %bb.0: 12412 ; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12413 ; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12414 ; HASWELL-NEXT: retq # sched: [7:1.00] 12415 ; 12416 ; BROADWELL-SSE-LABEL: test_psubsb: 12417 ; BROADWELL-SSE: # %bb.0: 12418 ; BROADWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] 12419 ; BROADWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:0.50] 12420 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 12421 ; 12422 ; BROADWELL-LABEL: test_psubsb: 12423 ; BROADWELL: # %bb.0: 12424 ; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12425 ; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 12426 ; BROADWELL-NEXT: retq # sched: [7:1.00] 12427 ; 12428 ; SKYLAKE-SSE-LABEL: test_psubsb: 12429 ; SKYLAKE-SSE: # %bb.0: 12430 ; SKYLAKE-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] 12431 ; SKYLAKE-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] 12432 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 12433 ; 12434 ; SKYLAKE-LABEL: test_psubsb: 12435 ; SKYLAKE: # %bb.0: 12436 ; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12437 ; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12438 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 12439 ; 12440 ; SKX-SSE-LABEL: test_psubsb: 12441 ; SKX-SSE: # %bb.0: 12442 ; SKX-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] 12443 ; SKX-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] 12444 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 12445 ; 12446 ; SKX-LABEL: test_psubsb: 12447 ; SKX: # %bb.0: 12448 ; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12449 ; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12450 ; SKX-NEXT: retq # sched: [7:1.00] 12451 ; 12452 ; BTVER2-SSE-LABEL: test_psubsb: 12453 ; BTVER2-SSE: # %bb.0: 12454 ; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] 12455 ; BTVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:1.00] 12456 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 12457 ; 12458 ; BTVER2-LABEL: test_psubsb: 12459 ; BTVER2: # %bb.0: 12460 ; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12461 ; BTVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 12462 ; BTVER2-NEXT: retq # sched: [4:1.00] 12463 ; 12464 ; ZNVER1-SSE-LABEL: test_psubsb: 12465 ; ZNVER1-SSE: # %bb.0: 12466 ; ZNVER1-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.25] 12467 ; ZNVER1-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [8:0.50] 12468 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 12469 ; 12470 ; ZNVER1-LABEL: test_psubsb: 12471 ; ZNVER1: # %bb.0: 12472 ; ZNVER1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 12473 ; ZNVER1-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 12474 ; ZNVER1-NEXT: retq # sched: [1:0.50] 12475 %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) 12476 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 12477 %3 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %1, <16 x i8> %2) 12478 ret <16 x i8> %3 12479 } 12480 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 12481 12482 define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 12483 ; GENERIC-LABEL: test_psubsw: 12484 ; GENERIC: # %bb.0: 12485 ; GENERIC-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] 12486 ; GENERIC-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] 12487 ; GENERIC-NEXT: retq # sched: [1:1.00] 12488 ; 12489 ; ATOM-LABEL: test_psubsw: 12490 ; ATOM: # %bb.0: 12491 ; ATOM-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] 12492 ; ATOM-NEXT: psubsw (%rdi), %xmm0 # sched: [1:1.00] 12493 ; ATOM-NEXT: nop # sched: [1:0.50] 12494 ; ATOM-NEXT: nop # sched: [1:0.50] 12495 ; ATOM-NEXT: nop # sched: [1:0.50] 12496 ; ATOM-NEXT: nop # sched: [1:0.50] 12497 ; ATOM-NEXT: retq # sched: [79:39.50] 12498 ; 12499 ; SLM-LABEL: test_psubsw: 12500 ; SLM: # %bb.0: 12501 ; SLM-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] 12502 ; SLM-NEXT: psubsw (%rdi), %xmm0 # sched: [4:1.00] 12503 ; SLM-NEXT: retq # sched: [4:1.00] 12504 ; 12505 ; SANDY-SSE-LABEL: test_psubsw: 12506 ; SANDY-SSE: # %bb.0: 12507 ; SANDY-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] 12508 ; SANDY-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] 12509 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 12510 ; 12511 ; SANDY-LABEL: test_psubsw: 12512 ; SANDY: # %bb.0: 12513 ; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12514 ; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12515 ; SANDY-NEXT: retq # sched: [1:1.00] 12516 ; 12517 ; HASWELL-SSE-LABEL: test_psubsw: 12518 ; HASWELL-SSE: # %bb.0: 12519 ; HASWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] 12520 ; HASWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] 12521 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 12522 ; 12523 ; HASWELL-LABEL: test_psubsw: 12524 ; HASWELL: # %bb.0: 12525 ; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12526 ; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12527 ; HASWELL-NEXT: retq # sched: [7:1.00] 12528 ; 12529 ; BROADWELL-SSE-LABEL: test_psubsw: 12530 ; BROADWELL-SSE: # %bb.0: 12531 ; BROADWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] 12532 ; BROADWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:0.50] 12533 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 12534 ; 12535 ; BROADWELL-LABEL: test_psubsw: 12536 ; BROADWELL: # %bb.0: 12537 ; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12538 ; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 12539 ; BROADWELL-NEXT: retq # sched: [7:1.00] 12540 ; 12541 ; SKYLAKE-SSE-LABEL: test_psubsw: 12542 ; SKYLAKE-SSE: # %bb.0: 12543 ; SKYLAKE-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] 12544 ; SKYLAKE-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] 12545 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 12546 ; 12547 ; SKYLAKE-LABEL: test_psubsw: 12548 ; SKYLAKE: # %bb.0: 12549 ; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12550 ; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12551 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 12552 ; 12553 ; SKX-SSE-LABEL: test_psubsw: 12554 ; SKX-SSE: # %bb.0: 12555 ; SKX-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] 12556 ; SKX-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] 12557 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 12558 ; 12559 ; SKX-LABEL: test_psubsw: 12560 ; SKX: # %bb.0: 12561 ; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12562 ; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12563 ; SKX-NEXT: retq # sched: [7:1.00] 12564 ; 12565 ; BTVER2-SSE-LABEL: test_psubsw: 12566 ; BTVER2-SSE: # %bb.0: 12567 ; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] 12568 ; BTVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:1.00] 12569 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 12570 ; 12571 ; BTVER2-LABEL: test_psubsw: 12572 ; BTVER2: # %bb.0: 12573 ; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12574 ; BTVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 12575 ; BTVER2-NEXT: retq # sched: [4:1.00] 12576 ; 12577 ; ZNVER1-SSE-LABEL: test_psubsw: 12578 ; ZNVER1-SSE: # %bb.0: 12579 ; ZNVER1-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.25] 12580 ; ZNVER1-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [8:0.50] 12581 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 12582 ; 12583 ; ZNVER1-LABEL: test_psubsw: 12584 ; ZNVER1: # %bb.0: 12585 ; ZNVER1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 12586 ; ZNVER1-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 12587 ; ZNVER1-NEXT: retq # sched: [1:0.50] 12588 %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) 12589 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 12590 %3 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %1, <8 x i16> %2) 12591 ret <8 x i16> %3 12592 } 12593 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 12594 12595 define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 12596 ; GENERIC-LABEL: test_psubusb: 12597 ; GENERIC: # %bb.0: 12598 ; GENERIC-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] 12599 ; GENERIC-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] 12600 ; GENERIC-NEXT: retq # sched: [1:1.00] 12601 ; 12602 ; ATOM-LABEL: test_psubusb: 12603 ; ATOM: # %bb.0: 12604 ; ATOM-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] 12605 ; ATOM-NEXT: psubusb (%rdi), %xmm0 # sched: [1:1.00] 12606 ; ATOM-NEXT: nop # sched: [1:0.50] 12607 ; ATOM-NEXT: nop # sched: [1:0.50] 12608 ; ATOM-NEXT: nop # sched: [1:0.50] 12609 ; ATOM-NEXT: nop # sched: [1:0.50] 12610 ; ATOM-NEXT: retq # sched: [79:39.50] 12611 ; 12612 ; SLM-LABEL: test_psubusb: 12613 ; SLM: # %bb.0: 12614 ; SLM-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] 12615 ; SLM-NEXT: psubusb (%rdi), %xmm0 # sched: [4:1.00] 12616 ; SLM-NEXT: retq # sched: [4:1.00] 12617 ; 12618 ; SANDY-SSE-LABEL: test_psubusb: 12619 ; SANDY-SSE: # %bb.0: 12620 ; SANDY-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] 12621 ; SANDY-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] 12622 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 12623 ; 12624 ; SANDY-LABEL: test_psubusb: 12625 ; SANDY: # %bb.0: 12626 ; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12627 ; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12628 ; SANDY-NEXT: retq # sched: [1:1.00] 12629 ; 12630 ; HASWELL-SSE-LABEL: test_psubusb: 12631 ; HASWELL-SSE: # %bb.0: 12632 ; HASWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] 12633 ; HASWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] 12634 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 12635 ; 12636 ; HASWELL-LABEL: test_psubusb: 12637 ; HASWELL: # %bb.0: 12638 ; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12639 ; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12640 ; HASWELL-NEXT: retq # sched: [7:1.00] 12641 ; 12642 ; BROADWELL-SSE-LABEL: test_psubusb: 12643 ; BROADWELL-SSE: # %bb.0: 12644 ; BROADWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] 12645 ; BROADWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:0.50] 12646 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 12647 ; 12648 ; BROADWELL-LABEL: test_psubusb: 12649 ; BROADWELL: # %bb.0: 12650 ; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12651 ; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 12652 ; BROADWELL-NEXT: retq # sched: [7:1.00] 12653 ; 12654 ; SKYLAKE-SSE-LABEL: test_psubusb: 12655 ; SKYLAKE-SSE: # %bb.0: 12656 ; SKYLAKE-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] 12657 ; SKYLAKE-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] 12658 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 12659 ; 12660 ; SKYLAKE-LABEL: test_psubusb: 12661 ; SKYLAKE: # %bb.0: 12662 ; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12663 ; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12664 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 12665 ; 12666 ; SKX-SSE-LABEL: test_psubusb: 12667 ; SKX-SSE: # %bb.0: 12668 ; SKX-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] 12669 ; SKX-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] 12670 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 12671 ; 12672 ; SKX-LABEL: test_psubusb: 12673 ; SKX: # %bb.0: 12674 ; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12675 ; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12676 ; SKX-NEXT: retq # sched: [7:1.00] 12677 ; 12678 ; BTVER2-SSE-LABEL: test_psubusb: 12679 ; BTVER2-SSE: # %bb.0: 12680 ; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] 12681 ; BTVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:1.00] 12682 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 12683 ; 12684 ; BTVER2-LABEL: test_psubusb: 12685 ; BTVER2: # %bb.0: 12686 ; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12687 ; BTVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 12688 ; BTVER2-NEXT: retq # sched: [4:1.00] 12689 ; 12690 ; ZNVER1-SSE-LABEL: test_psubusb: 12691 ; ZNVER1-SSE: # %bb.0: 12692 ; ZNVER1-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.25] 12693 ; ZNVER1-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [8:0.50] 12694 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 12695 ; 12696 ; ZNVER1-LABEL: test_psubusb: 12697 ; ZNVER1: # %bb.0: 12698 ; ZNVER1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 12699 ; ZNVER1-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 12700 ; ZNVER1-NEXT: retq # sched: [1:0.50] 12701 %1 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) 12702 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 12703 %3 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %1, <16 x i8> %2) 12704 ret <16 x i8> %3 12705 } 12706 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 12707 12708 define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 12709 ; GENERIC-LABEL: test_psubusw: 12710 ; GENERIC: # %bb.0: 12711 ; GENERIC-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] 12712 ; GENERIC-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] 12713 ; GENERIC-NEXT: retq # sched: [1:1.00] 12714 ; 12715 ; ATOM-LABEL: test_psubusw: 12716 ; ATOM: # %bb.0: 12717 ; ATOM-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] 12718 ; ATOM-NEXT: psubusw (%rdi), %xmm0 # sched: [1:1.00] 12719 ; ATOM-NEXT: nop # sched: [1:0.50] 12720 ; ATOM-NEXT: nop # sched: [1:0.50] 12721 ; ATOM-NEXT: nop # sched: [1:0.50] 12722 ; ATOM-NEXT: nop # sched: [1:0.50] 12723 ; ATOM-NEXT: retq # sched: [79:39.50] 12724 ; 12725 ; SLM-LABEL: test_psubusw: 12726 ; SLM: # %bb.0: 12727 ; SLM-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] 12728 ; SLM-NEXT: psubusw (%rdi), %xmm0 # sched: [4:1.00] 12729 ; SLM-NEXT: retq # sched: [4:1.00] 12730 ; 12731 ; SANDY-SSE-LABEL: test_psubusw: 12732 ; SANDY-SSE: # %bb.0: 12733 ; SANDY-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] 12734 ; SANDY-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] 12735 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 12736 ; 12737 ; SANDY-LABEL: test_psubusw: 12738 ; SANDY: # %bb.0: 12739 ; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12740 ; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12741 ; SANDY-NEXT: retq # sched: [1:1.00] 12742 ; 12743 ; HASWELL-SSE-LABEL: test_psubusw: 12744 ; HASWELL-SSE: # %bb.0: 12745 ; HASWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] 12746 ; HASWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] 12747 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 12748 ; 12749 ; HASWELL-LABEL: test_psubusw: 12750 ; HASWELL: # %bb.0: 12751 ; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12752 ; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12753 ; HASWELL-NEXT: retq # sched: [7:1.00] 12754 ; 12755 ; BROADWELL-SSE-LABEL: test_psubusw: 12756 ; BROADWELL-SSE: # %bb.0: 12757 ; BROADWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] 12758 ; BROADWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:0.50] 12759 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 12760 ; 12761 ; BROADWELL-LABEL: test_psubusw: 12762 ; BROADWELL: # %bb.0: 12763 ; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12764 ; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 12765 ; BROADWELL-NEXT: retq # sched: [7:1.00] 12766 ; 12767 ; SKYLAKE-SSE-LABEL: test_psubusw: 12768 ; SKYLAKE-SSE: # %bb.0: 12769 ; SKYLAKE-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] 12770 ; SKYLAKE-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] 12771 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 12772 ; 12773 ; SKYLAKE-LABEL: test_psubusw: 12774 ; SKYLAKE: # %bb.0: 12775 ; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12776 ; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12777 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 12778 ; 12779 ; SKX-SSE-LABEL: test_psubusw: 12780 ; SKX-SSE: # %bb.0: 12781 ; SKX-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] 12782 ; SKX-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] 12783 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 12784 ; 12785 ; SKX-LABEL: test_psubusw: 12786 ; SKX: # %bb.0: 12787 ; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12788 ; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12789 ; SKX-NEXT: retq # sched: [7:1.00] 12790 ; 12791 ; BTVER2-SSE-LABEL: test_psubusw: 12792 ; BTVER2-SSE: # %bb.0: 12793 ; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] 12794 ; BTVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:1.00] 12795 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 12796 ; 12797 ; BTVER2-LABEL: test_psubusw: 12798 ; BTVER2: # %bb.0: 12799 ; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12800 ; BTVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 12801 ; BTVER2-NEXT: retq # sched: [4:1.00] 12802 ; 12803 ; ZNVER1-SSE-LABEL: test_psubusw: 12804 ; ZNVER1-SSE: # %bb.0: 12805 ; ZNVER1-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.25] 12806 ; ZNVER1-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [8:0.50] 12807 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 12808 ; 12809 ; ZNVER1-LABEL: test_psubusw: 12810 ; ZNVER1: # %bb.0: 12811 ; ZNVER1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 12812 ; ZNVER1-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 12813 ; ZNVER1-NEXT: retq # sched: [1:0.50] 12814 %1 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) 12815 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 12816 %3 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %1, <8 x i16> %2) 12817 ret <8 x i16> %3 12818 } 12819 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 12820 12821 define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 12822 ; GENERIC-LABEL: test_psubw: 12823 ; GENERIC: # %bb.0: 12824 ; GENERIC-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] 12825 ; GENERIC-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] 12826 ; GENERIC-NEXT: retq # sched: [1:1.00] 12827 ; 12828 ; ATOM-LABEL: test_psubw: 12829 ; ATOM: # %bb.0: 12830 ; ATOM-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] 12831 ; ATOM-NEXT: psubw (%rdi), %xmm0 # sched: [1:1.00] 12832 ; ATOM-NEXT: nop # sched: [1:0.50] 12833 ; ATOM-NEXT: nop # sched: [1:0.50] 12834 ; ATOM-NEXT: nop # sched: [1:0.50] 12835 ; ATOM-NEXT: nop # sched: [1:0.50] 12836 ; ATOM-NEXT: retq # sched: [79:39.50] 12837 ; 12838 ; SLM-LABEL: test_psubw: 12839 ; SLM: # %bb.0: 12840 ; SLM-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] 12841 ; SLM-NEXT: psubw (%rdi), %xmm0 # sched: [4:1.00] 12842 ; SLM-NEXT: retq # sched: [4:1.00] 12843 ; 12844 ; SANDY-SSE-LABEL: test_psubw: 12845 ; SANDY-SSE: # %bb.0: 12846 ; SANDY-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] 12847 ; SANDY-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] 12848 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 12849 ; 12850 ; SANDY-LABEL: test_psubw: 12851 ; SANDY: # %bb.0: 12852 ; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12853 ; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12854 ; SANDY-NEXT: retq # sched: [1:1.00] 12855 ; 12856 ; HASWELL-SSE-LABEL: test_psubw: 12857 ; HASWELL-SSE: # %bb.0: 12858 ; HASWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] 12859 ; HASWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] 12860 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 12861 ; 12862 ; HASWELL-LABEL: test_psubw: 12863 ; HASWELL: # %bb.0: 12864 ; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12865 ; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12866 ; HASWELL-NEXT: retq # sched: [7:1.00] 12867 ; 12868 ; BROADWELL-SSE-LABEL: test_psubw: 12869 ; BROADWELL-SSE: # %bb.0: 12870 ; BROADWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] 12871 ; BROADWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:0.50] 12872 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 12873 ; 12874 ; BROADWELL-LABEL: test_psubw: 12875 ; BROADWELL: # %bb.0: 12876 ; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12877 ; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 12878 ; BROADWELL-NEXT: retq # sched: [7:1.00] 12879 ; 12880 ; SKYLAKE-SSE-LABEL: test_psubw: 12881 ; SKYLAKE-SSE: # %bb.0: 12882 ; SKYLAKE-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] 12883 ; SKYLAKE-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] 12884 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 12885 ; 12886 ; SKYLAKE-LABEL: test_psubw: 12887 ; SKYLAKE: # %bb.0: 12888 ; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 12889 ; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12890 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 12891 ; 12892 ; SKX-SSE-LABEL: test_psubw: 12893 ; SKX-SSE: # %bb.0: 12894 ; SKX-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] 12895 ; SKX-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] 12896 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 12897 ; 12898 ; SKX-LABEL: test_psubw: 12899 ; SKX: # %bb.0: 12900 ; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 12901 ; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 12902 ; SKX-NEXT: retq # sched: [7:1.00] 12903 ; 12904 ; BTVER2-SSE-LABEL: test_psubw: 12905 ; BTVER2-SSE: # %bb.0: 12906 ; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] 12907 ; BTVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:1.00] 12908 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 12909 ; 12910 ; BTVER2-LABEL: test_psubw: 12911 ; BTVER2: # %bb.0: 12912 ; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 12913 ; BTVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 12914 ; BTVER2-NEXT: retq # sched: [4:1.00] 12915 ; 12916 ; ZNVER1-SSE-LABEL: test_psubw: 12917 ; ZNVER1-SSE: # %bb.0: 12918 ; ZNVER1-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.25] 12919 ; ZNVER1-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [8:0.50] 12920 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 12921 ; 12922 ; ZNVER1-LABEL: test_psubw: 12923 ; ZNVER1: # %bb.0: 12924 ; ZNVER1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 12925 ; ZNVER1-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 12926 ; ZNVER1-NEXT: retq # sched: [1:0.50] 12927 %1 = sub <8 x i16> %a0, %a1 12928 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 12929 %3 = sub <8 x i16> %1, %2 12930 ret <8 x i16> %3 12931 } 12932 12933 define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 12934 ; GENERIC-LABEL: test_punpckhbw: 12935 ; GENERIC: # %bb.0: 12936 ; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] 12937 ; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] 12938 ; GENERIC-NEXT: retq # sched: [1:1.00] 12939 ; 12940 ; ATOM-LABEL: test_punpckhbw: 12941 ; ATOM: # %bb.0: 12942 ; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 12943 ; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] 12944 ; ATOM-NEXT: nop # sched: [1:0.50] 12945 ; ATOM-NEXT: nop # sched: [1:0.50] 12946 ; ATOM-NEXT: nop # sched: [1:0.50] 12947 ; ATOM-NEXT: nop # sched: [1:0.50] 12948 ; ATOM-NEXT: retq # sched: [79:39.50] 12949 ; 12950 ; SLM-LABEL: test_punpckhbw: 12951 ; SLM: # %bb.0: 12952 ; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 12953 ; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00] 12954 ; SLM-NEXT: retq # sched: [4:1.00] 12955 ; 12956 ; SANDY-SSE-LABEL: test_punpckhbw: 12957 ; SANDY-SSE: # %bb.0: 12958 ; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] 12959 ; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] 12960 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 12961 ; 12962 ; SANDY-LABEL: test_punpckhbw: 12963 ; SANDY: # %bb.0: 12964 ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] 12965 ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] 12966 ; SANDY-NEXT: retq # sched: [1:1.00] 12967 ; 12968 ; HASWELL-SSE-LABEL: test_punpckhbw: 12969 ; HASWELL-SSE: # %bb.0: 12970 ; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 12971 ; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] 12972 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 12973 ; 12974 ; HASWELL-LABEL: test_punpckhbw: 12975 ; HASWELL: # %bb.0: 12976 ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 12977 ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] 12978 ; HASWELL-NEXT: retq # sched: [7:1.00] 12979 ; 12980 ; BROADWELL-SSE-LABEL: test_punpckhbw: 12981 ; BROADWELL-SSE: # %bb.0: 12982 ; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 12983 ; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] 12984 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 12985 ; 12986 ; BROADWELL-LABEL: test_punpckhbw: 12987 ; BROADWELL: # %bb.0: 12988 ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 12989 ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] 12990 ; BROADWELL-NEXT: retq # sched: [7:1.00] 12991 ; 12992 ; SKYLAKE-SSE-LABEL: test_punpckhbw: 12993 ; SKYLAKE-SSE: # %bb.0: 12994 ; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 12995 ; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] 12996 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 12997 ; 12998 ; SKYLAKE-LABEL: test_punpckhbw: 12999 ; SKYLAKE: # %bb.0: 13000 ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 13001 ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] 13002 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 13003 ; 13004 ; SKX-SSE-LABEL: test_punpckhbw: 13005 ; SKX-SSE: # %bb.0: 13006 ; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 13007 ; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] 13008 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 13009 ; 13010 ; SKX-LABEL: test_punpckhbw: 13011 ; SKX: # %bb.0: 13012 ; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] 13013 ; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] 13014 ; SKX-NEXT: retq # sched: [7:1.00] 13015 ; 13016 ; BTVER2-SSE-LABEL: test_punpckhbw: 13017 ; BTVER2-SSE: # %bb.0: 13018 ; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] 13019 ; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] 13020 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 13021 ; 13022 ; BTVER2-LABEL: test_punpckhbw: 13023 ; BTVER2: # %bb.0: 13024 ; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] 13025 ; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] 13026 ; BTVER2-NEXT: retq # sched: [4:1.00] 13027 ; 13028 ; ZNVER1-SSE-LABEL: test_punpckhbw: 13029 ; ZNVER1-SSE: # %bb.0: 13030 ; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] 13031 ; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50] 13032 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 13033 ; 13034 ; ZNVER1-LABEL: test_punpckhbw: 13035 ; ZNVER1: # %bb.0: 13036 ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] 13037 ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50] 13038 ; ZNVER1-NEXT: retq # sched: [1:0.50] 13039 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 13040 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 13041 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 13042 ret <16 x i8> %3 13043 } 13044 13045 define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 13046 ; GENERIC-LABEL: test_punpckhdq: 13047 ; GENERIC: # %bb.0: 13048 ; GENERIC-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13049 ; GENERIC-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] 13050 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13051 ; GENERIC-NEXT: retq # sched: [1:1.00] 13052 ; 13053 ; ATOM-LABEL: test_punpckhdq: 13054 ; ATOM: # %bb.0: 13055 ; ATOM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13056 ; ATOM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] 13057 ; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13058 ; ATOM-NEXT: nop # sched: [1:0.50] 13059 ; ATOM-NEXT: nop # sched: [1:0.50] 13060 ; ATOM-NEXT: retq # sched: [79:39.50] 13061 ; 13062 ; SLM-LABEL: test_punpckhdq: 13063 ; SLM: # %bb.0: 13064 ; SLM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13065 ; SLM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00] 13066 ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13067 ; SLM-NEXT: retq # sched: [4:1.00] 13068 ; 13069 ; SANDY-SSE-LABEL: test_punpckhdq: 13070 ; SANDY-SSE: # %bb.0: 13071 ; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13072 ; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] 13073 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13074 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 13075 ; 13076 ; SANDY-LABEL: test_punpckhdq: 13077 ; SANDY: # %bb.0: 13078 ; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13079 ; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] 13080 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13081 ; SANDY-NEXT: retq # sched: [1:1.00] 13082 ; 13083 ; HASWELL-SSE-LABEL: test_punpckhdq: 13084 ; HASWELL-SSE: # %bb.0: 13085 ; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13086 ; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 13087 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13088 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 13089 ; 13090 ; HASWELL-LABEL: test_punpckhdq: 13091 ; HASWELL: # %bb.0: 13092 ; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13093 ; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 13094 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13095 ; HASWELL-NEXT: retq # sched: [7:1.00] 13096 ; 13097 ; BROADWELL-SSE-LABEL: test_punpckhdq: 13098 ; BROADWELL-SSE: # %bb.0: 13099 ; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13100 ; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] 13101 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13102 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 13103 ; 13104 ; BROADWELL-LABEL: test_punpckhdq: 13105 ; BROADWELL: # %bb.0: 13106 ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13107 ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] 13108 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13109 ; BROADWELL-NEXT: retq # sched: [7:1.00] 13110 ; 13111 ; SKYLAKE-SSE-LABEL: test_punpckhdq: 13112 ; SKYLAKE-SSE: # %bb.0: 13113 ; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13114 ; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 13115 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 13116 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 13117 ; 13118 ; SKYLAKE-LABEL: test_punpckhdq: 13119 ; SKYLAKE: # %bb.0: 13120 ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13121 ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 13122 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13123 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 13124 ; 13125 ; SKX-SSE-LABEL: test_punpckhdq: 13126 ; SKX-SSE: # %bb.0: 13127 ; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13128 ; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 13129 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 13130 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 13131 ; 13132 ; SKX-LABEL: test_punpckhdq: 13133 ; SKX: # %bb.0: 13134 ; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13135 ; SKX-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] 13136 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13137 ; SKX-NEXT: retq # sched: [7:1.00] 13138 ; 13139 ; BTVER2-SSE-LABEL: test_punpckhdq: 13140 ; BTVER2-SSE: # %bb.0: 13141 ; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13142 ; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] 13143 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13144 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 13145 ; 13146 ; BTVER2-LABEL: test_punpckhdq: 13147 ; BTVER2: # %bb.0: 13148 ; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13149 ; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] 13150 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13151 ; BTVER2-NEXT: retq # sched: [4:1.00] 13152 ; 13153 ; ZNVER1-SSE-LABEL: test_punpckhdq: 13154 ; ZNVER1-SSE: # %bb.0: 13155 ; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] 13156 ; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] 13157 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 13158 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 13159 ; 13160 ; ZNVER1-LABEL: test_punpckhdq: 13161 ; ZNVER1: # %bb.0: 13162 ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] 13163 ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] 13164 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 13165 ; ZNVER1-NEXT: retq # sched: [1:0.50] 13166 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13167 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 13168 %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 13169 %4 = add <4 x i32> %1, %3 13170 ret <4 x i32> %4 13171 } 13172 13173 define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 13174 ; GENERIC-LABEL: test_punpckhqdq: 13175 ; GENERIC: # %bb.0: 13176 ; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] 13177 ; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] 13178 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13179 ; GENERIC-NEXT: retq # sched: [1:1.00] 13180 ; 13181 ; ATOM-LABEL: test_punpckhqdq: 13182 ; ATOM: # %bb.0: 13183 ; ATOM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13184 ; ATOM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] 13185 ; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] 13186 ; ATOM-NEXT: retq # sched: [79:39.50] 13187 ; 13188 ; SLM-LABEL: test_punpckhqdq: 13189 ; SLM: # %bb.0: 13190 ; SLM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13191 ; SLM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00] 13192 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13193 ; SLM-NEXT: retq # sched: [4:1.00] 13194 ; 13195 ; SANDY-SSE-LABEL: test_punpckhqdq: 13196 ; SANDY-SSE: # %bb.0: 13197 ; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] 13198 ; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] 13199 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13200 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 13201 ; 13202 ; SANDY-LABEL: test_punpckhqdq: 13203 ; SANDY: # %bb.0: 13204 ; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] 13205 ; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] 13206 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13207 ; SANDY-NEXT: retq # sched: [1:1.00] 13208 ; 13209 ; HASWELL-SSE-LABEL: test_punpckhqdq: 13210 ; HASWELL-SSE: # %bb.0: 13211 ; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13212 ; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 13213 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13214 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 13215 ; 13216 ; HASWELL-LABEL: test_punpckhqdq: 13217 ; HASWELL: # %bb.0: 13218 ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13219 ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 13220 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13221 ; HASWELL-NEXT: retq # sched: [7:1.00] 13222 ; 13223 ; BROADWELL-SSE-LABEL: test_punpckhqdq: 13224 ; BROADWELL-SSE: # %bb.0: 13225 ; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13226 ; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] 13227 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13228 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 13229 ; 13230 ; BROADWELL-LABEL: test_punpckhqdq: 13231 ; BROADWELL: # %bb.0: 13232 ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13233 ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] 13234 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13235 ; BROADWELL-NEXT: retq # sched: [7:1.00] 13236 ; 13237 ; SKYLAKE-SSE-LABEL: test_punpckhqdq: 13238 ; SKYLAKE-SSE: # %bb.0: 13239 ; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13240 ; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 13241 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 13242 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 13243 ; 13244 ; SKYLAKE-LABEL: test_punpckhqdq: 13245 ; SKYLAKE: # %bb.0: 13246 ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13247 ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 13248 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13249 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 13250 ; 13251 ; SKX-SSE-LABEL: test_punpckhqdq: 13252 ; SKX-SSE: # %bb.0: 13253 ; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13254 ; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 13255 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 13256 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 13257 ; 13258 ; SKX-LABEL: test_punpckhqdq: 13259 ; SKX: # %bb.0: 13260 ; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 13261 ; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 13262 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13263 ; SKX-NEXT: retq # sched: [7:1.00] 13264 ; 13265 ; BTVER2-SSE-LABEL: test_punpckhqdq: 13266 ; BTVER2-SSE: # %bb.0: 13267 ; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] 13268 ; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] 13269 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13270 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 13271 ; 13272 ; BTVER2-LABEL: test_punpckhqdq: 13273 ; BTVER2: # %bb.0: 13274 ; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] 13275 ; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] 13276 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13277 ; BTVER2-NEXT: retq # sched: [4:1.00] 13278 ; 13279 ; ZNVER1-SSE-LABEL: test_punpckhqdq: 13280 ; ZNVER1-SSE: # %bb.0: 13281 ; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] 13282 ; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] 13283 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 13284 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 13285 ; 13286 ; ZNVER1-LABEL: test_punpckhqdq: 13287 ; ZNVER1: # %bb.0: 13288 ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] 13289 ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] 13290 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 13291 ; ZNVER1-NEXT: retq # sched: [1:0.50] 13292 %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3> 13293 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 13294 %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 1, i32 3> 13295 %4 = add <2 x i64> %1, %3 13296 ret <2 x i64> %4 13297 } 13298 13299 define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 13300 ; GENERIC-LABEL: test_punpckhwd: 13301 ; GENERIC: # %bb.0: 13302 ; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13303 ; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] 13304 ; GENERIC-NEXT: retq # sched: [1:1.00] 13305 ; 13306 ; ATOM-LABEL: test_punpckhwd: 13307 ; ATOM: # %bb.0: 13308 ; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13309 ; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] 13310 ; ATOM-NEXT: nop # sched: [1:0.50] 13311 ; ATOM-NEXT: nop # sched: [1:0.50] 13312 ; ATOM-NEXT: nop # sched: [1:0.50] 13313 ; ATOM-NEXT: nop # sched: [1:0.50] 13314 ; ATOM-NEXT: retq # sched: [79:39.50] 13315 ; 13316 ; SLM-LABEL: test_punpckhwd: 13317 ; SLM: # %bb.0: 13318 ; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13319 ; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] 13320 ; SLM-NEXT: retq # sched: [4:1.00] 13321 ; 13322 ; SANDY-SSE-LABEL: test_punpckhwd: 13323 ; SANDY-SSE: # %bb.0: 13324 ; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13325 ; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] 13326 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 13327 ; 13328 ; SANDY-LABEL: test_punpckhwd: 13329 ; SANDY: # %bb.0: 13330 ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13331 ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] 13332 ; SANDY-NEXT: retq # sched: [1:1.00] 13333 ; 13334 ; HASWELL-SSE-LABEL: test_punpckhwd: 13335 ; HASWELL-SSE: # %bb.0: 13336 ; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13337 ; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13338 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 13339 ; 13340 ; HASWELL-LABEL: test_punpckhwd: 13341 ; HASWELL: # %bb.0: 13342 ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13343 ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13344 ; HASWELL-NEXT: retq # sched: [7:1.00] 13345 ; 13346 ; BROADWELL-SSE-LABEL: test_punpckhwd: 13347 ; BROADWELL-SSE: # %bb.0: 13348 ; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13349 ; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] 13350 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 13351 ; 13352 ; BROADWELL-LABEL: test_punpckhwd: 13353 ; BROADWELL: # %bb.0: 13354 ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13355 ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] 13356 ; BROADWELL-NEXT: retq # sched: [7:1.00] 13357 ; 13358 ; SKYLAKE-SSE-LABEL: test_punpckhwd: 13359 ; SKYLAKE-SSE: # %bb.0: 13360 ; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13361 ; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13362 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 13363 ; 13364 ; SKYLAKE-LABEL: test_punpckhwd: 13365 ; SKYLAKE: # %bb.0: 13366 ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13367 ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13368 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 13369 ; 13370 ; SKX-SSE-LABEL: test_punpckhwd: 13371 ; SKX-SSE: # %bb.0: 13372 ; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13373 ; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13374 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 13375 ; 13376 ; SKX-LABEL: test_punpckhwd: 13377 ; SKX: # %bb.0: 13378 ; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13379 ; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13380 ; SKX-NEXT: retq # sched: [7:1.00] 13381 ; 13382 ; BTVER2-SSE-LABEL: test_punpckhwd: 13383 ; BTVER2-SSE: # %bb.0: 13384 ; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13385 ; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] 13386 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 13387 ; 13388 ; BTVER2-LABEL: test_punpckhwd: 13389 ; BTVER2: # %bb.0: 13390 ; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13391 ; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] 13392 ; BTVER2-NEXT: retq # sched: [4:1.00] 13393 ; 13394 ; ZNVER1-SSE-LABEL: test_punpckhwd: 13395 ; ZNVER1-SSE: # %bb.0: 13396 ; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] 13397 ; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] 13398 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 13399 ; 13400 ; ZNVER1-LABEL: test_punpckhwd: 13401 ; ZNVER1: # %bb.0: 13402 ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] 13403 ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] 13404 ; ZNVER1-NEXT: retq # sched: [1:0.50] 13405 %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 13406 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 13407 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 13408 ret <8 x i16> %3 13409 } 13410 13411 define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 13412 ; GENERIC-LABEL: test_punpcklbw: 13413 ; GENERIC: # %bb.0: 13414 ; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13415 ; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] 13416 ; GENERIC-NEXT: retq # sched: [1:1.00] 13417 ; 13418 ; ATOM-LABEL: test_punpcklbw: 13419 ; ATOM: # %bb.0: 13420 ; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13421 ; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] 13422 ; ATOM-NEXT: nop # sched: [1:0.50] 13423 ; ATOM-NEXT: nop # sched: [1:0.50] 13424 ; ATOM-NEXT: nop # sched: [1:0.50] 13425 ; ATOM-NEXT: nop # sched: [1:0.50] 13426 ; ATOM-NEXT: retq # sched: [79:39.50] 13427 ; 13428 ; SLM-LABEL: test_punpcklbw: 13429 ; SLM: # %bb.0: 13430 ; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13431 ; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] 13432 ; SLM-NEXT: retq # sched: [4:1.00] 13433 ; 13434 ; SANDY-SSE-LABEL: test_punpcklbw: 13435 ; SANDY-SSE: # %bb.0: 13436 ; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13437 ; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] 13438 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 13439 ; 13440 ; SANDY-LABEL: test_punpcklbw: 13441 ; SANDY: # %bb.0: 13442 ; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13443 ; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] 13444 ; SANDY-NEXT: retq # sched: [1:1.00] 13445 ; 13446 ; HASWELL-SSE-LABEL: test_punpcklbw: 13447 ; HASWELL-SSE: # %bb.0: 13448 ; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13449 ; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13450 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 13451 ; 13452 ; HASWELL-LABEL: test_punpcklbw: 13453 ; HASWELL: # %bb.0: 13454 ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13455 ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13456 ; HASWELL-NEXT: retq # sched: [7:1.00] 13457 ; 13458 ; BROADWELL-SSE-LABEL: test_punpcklbw: 13459 ; BROADWELL-SSE: # %bb.0: 13460 ; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13461 ; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] 13462 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 13463 ; 13464 ; BROADWELL-LABEL: test_punpcklbw: 13465 ; BROADWELL: # %bb.0: 13466 ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13467 ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] 13468 ; BROADWELL-NEXT: retq # sched: [7:1.00] 13469 ; 13470 ; SKYLAKE-SSE-LABEL: test_punpcklbw: 13471 ; SKYLAKE-SSE: # %bb.0: 13472 ; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13473 ; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13474 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 13475 ; 13476 ; SKYLAKE-LABEL: test_punpcklbw: 13477 ; SKYLAKE: # %bb.0: 13478 ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13479 ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13480 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 13481 ; 13482 ; SKX-SSE-LABEL: test_punpcklbw: 13483 ; SKX-SSE: # %bb.0: 13484 ; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13485 ; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13486 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 13487 ; 13488 ; SKX-LABEL: test_punpcklbw: 13489 ; SKX: # %bb.0: 13490 ; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] 13491 ; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] 13492 ; SKX-NEXT: retq # sched: [7:1.00] 13493 ; 13494 ; BTVER2-SSE-LABEL: test_punpcklbw: 13495 ; BTVER2-SSE: # %bb.0: 13496 ; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13497 ; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] 13498 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 13499 ; 13500 ; BTVER2-LABEL: test_punpcklbw: 13501 ; BTVER2: # %bb.0: 13502 ; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] 13503 ; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] 13504 ; BTVER2-NEXT: retq # sched: [4:1.00] 13505 ; 13506 ; ZNVER1-SSE-LABEL: test_punpcklbw: 13507 ; ZNVER1-SSE: # %bb.0: 13508 ; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] 13509 ; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] 13510 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 13511 ; 13512 ; ZNVER1-LABEL: test_punpcklbw: 13513 ; ZNVER1: # %bb.0: 13514 ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] 13515 ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] 13516 ; ZNVER1-NEXT: retq # sched: [1:0.50] 13517 %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 13518 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 13519 %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 13520 ret <16 x i8> %3 13521 } 13522 13523 define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 13524 ; GENERIC-LABEL: test_punpckldq: 13525 ; GENERIC: # %bb.0: 13526 ; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] 13527 ; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] 13528 ; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13529 ; GENERIC-NEXT: retq # sched: [1:1.00] 13530 ; 13531 ; ATOM-LABEL: test_punpckldq: 13532 ; ATOM: # %bb.0: 13533 ; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13534 ; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] 13535 ; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13536 ; ATOM-NEXT: nop # sched: [1:0.50] 13537 ; ATOM-NEXT: nop # sched: [1:0.50] 13538 ; ATOM-NEXT: retq # sched: [79:39.50] 13539 ; 13540 ; SLM-LABEL: test_punpckldq: 13541 ; SLM: # %bb.0: 13542 ; SLM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13543 ; SLM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00] 13544 ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13545 ; SLM-NEXT: retq # sched: [4:1.00] 13546 ; 13547 ; SANDY-SSE-LABEL: test_punpckldq: 13548 ; SANDY-SSE: # %bb.0: 13549 ; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] 13550 ; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] 13551 ; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13552 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 13553 ; 13554 ; SANDY-LABEL: test_punpckldq: 13555 ; SANDY: # %bb.0: 13556 ; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] 13557 ; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] 13558 ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13559 ; SANDY-NEXT: retq # sched: [1:1.00] 13560 ; 13561 ; HASWELL-SSE-LABEL: test_punpckldq: 13562 ; HASWELL-SSE: # %bb.0: 13563 ; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13564 ; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 13565 ; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13566 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 13567 ; 13568 ; HASWELL-LABEL: test_punpckldq: 13569 ; HASWELL: # %bb.0: 13570 ; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13571 ; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 13572 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13573 ; HASWELL-NEXT: retq # sched: [7:1.00] 13574 ; 13575 ; BROADWELL-SSE-LABEL: test_punpckldq: 13576 ; BROADWELL-SSE: # %bb.0: 13577 ; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13578 ; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] 13579 ; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13580 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 13581 ; 13582 ; BROADWELL-LABEL: test_punpckldq: 13583 ; BROADWELL: # %bb.0: 13584 ; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13585 ; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] 13586 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13587 ; BROADWELL-NEXT: retq # sched: [7:1.00] 13588 ; 13589 ; SKYLAKE-SSE-LABEL: test_punpckldq: 13590 ; SKYLAKE-SSE: # %bb.0: 13591 ; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13592 ; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 13593 ; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 13594 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 13595 ; 13596 ; SKYLAKE-LABEL: test_punpckldq: 13597 ; SKYLAKE: # %bb.0: 13598 ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13599 ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 13600 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13601 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 13602 ; 13603 ; SKX-SSE-LABEL: test_punpckldq: 13604 ; SKX-SSE: # %bb.0: 13605 ; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13606 ; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 13607 ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] 13608 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 13609 ; 13610 ; SKX-LABEL: test_punpckldq: 13611 ; SKX: # %bb.0: 13612 ; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] 13613 ; SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] 13614 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13615 ; SKX-NEXT: retq # sched: [7:1.00] 13616 ; 13617 ; BTVER2-SSE-LABEL: test_punpckldq: 13618 ; BTVER2-SSE: # %bb.0: 13619 ; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] 13620 ; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] 13621 ; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] 13622 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 13623 ; 13624 ; BTVER2-LABEL: test_punpckldq: 13625 ; BTVER2: # %bb.0: 13626 ; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] 13627 ; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] 13628 ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13629 ; BTVER2-NEXT: retq # sched: [4:1.00] 13630 ; 13631 ; ZNVER1-SSE-LABEL: test_punpckldq: 13632 ; ZNVER1-SSE: # %bb.0: 13633 ; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] 13634 ; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] 13635 ; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] 13636 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 13637 ; 13638 ; ZNVER1-LABEL: test_punpckldq: 13639 ; ZNVER1: # %bb.0: 13640 ; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] 13641 ; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] 13642 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 13643 ; ZNVER1-NEXT: retq # sched: [1:0.50] 13644 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 13645 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 13646 %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 13647 %4 = add <4 x i32> %1, %3 13648 ret <4 x i32> %4 13649 } 13650 13651 define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 13652 ; GENERIC-LABEL: test_punpcklqdq: 13653 ; GENERIC: # %bb.0: 13654 ; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 13655 ; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] 13656 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13657 ; GENERIC-NEXT: retq # sched: [1:1.00] 13658 ; 13659 ; ATOM-LABEL: test_punpcklqdq: 13660 ; ATOM: # %bb.0: 13661 ; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13662 ; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] 13663 ; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] 13664 ; ATOM-NEXT: retq # sched: [79:39.50] 13665 ; 13666 ; SLM-LABEL: test_punpcklqdq: 13667 ; SLM: # %bb.0: 13668 ; SLM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13669 ; SLM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] 13670 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13671 ; SLM-NEXT: retq # sched: [4:1.00] 13672 ; 13673 ; SANDY-SSE-LABEL: test_punpcklqdq: 13674 ; SANDY-SSE: # %bb.0: 13675 ; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 13676 ; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] 13677 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13678 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 13679 ; 13680 ; SANDY-LABEL: test_punpcklqdq: 13681 ; SANDY: # %bb.0: 13682 ; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 13683 ; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] 13684 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13685 ; SANDY-NEXT: retq # sched: [1:1.00] 13686 ; 13687 ; HASWELL-SSE-LABEL: test_punpcklqdq: 13688 ; HASWELL-SSE: # %bb.0: 13689 ; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13690 ; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 13691 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13692 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 13693 ; 13694 ; HASWELL-LABEL: test_punpcklqdq: 13695 ; HASWELL: # %bb.0: 13696 ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13697 ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 13698 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13699 ; HASWELL-NEXT: retq # sched: [7:1.00] 13700 ; 13701 ; BROADWELL-SSE-LABEL: test_punpcklqdq: 13702 ; BROADWELL-SSE: # %bb.0: 13703 ; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13704 ; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 13705 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13706 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 13707 ; 13708 ; BROADWELL-LABEL: test_punpcklqdq: 13709 ; BROADWELL: # %bb.0: 13710 ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13711 ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 13712 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13713 ; BROADWELL-NEXT: retq # sched: [7:1.00] 13714 ; 13715 ; SKYLAKE-SSE-LABEL: test_punpcklqdq: 13716 ; SKYLAKE-SSE: # %bb.0: 13717 ; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13718 ; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 13719 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 13720 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 13721 ; 13722 ; SKYLAKE-LABEL: test_punpcklqdq: 13723 ; SKYLAKE: # %bb.0: 13724 ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13725 ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 13726 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13727 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 13728 ; 13729 ; SKX-SSE-LABEL: test_punpcklqdq: 13730 ; SKX-SSE: # %bb.0: 13731 ; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13732 ; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 13733 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 13734 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 13735 ; 13736 ; SKX-LABEL: test_punpcklqdq: 13737 ; SKX: # %bb.0: 13738 ; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 13739 ; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 13740 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13741 ; SKX-NEXT: retq # sched: [7:1.00] 13742 ; 13743 ; BTVER2-SSE-LABEL: test_punpcklqdq: 13744 ; BTVER2-SSE: # %bb.0: 13745 ; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 13746 ; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 13747 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13748 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 13749 ; 13750 ; BTVER2-LABEL: test_punpcklqdq: 13751 ; BTVER2: # %bb.0: 13752 ; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 13753 ; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 13754 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13755 ; BTVER2-NEXT: retq # sched: [4:1.00] 13756 ; 13757 ; ZNVER1-SSE-LABEL: test_punpcklqdq: 13758 ; ZNVER1-SSE: # %bb.0: 13759 ; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] 13760 ; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] 13761 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 13762 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 13763 ; 13764 ; ZNVER1-LABEL: test_punpcklqdq: 13765 ; ZNVER1: # %bb.0: 13766 ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] 13767 ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] 13768 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 13769 ; ZNVER1-NEXT: retq # sched: [1:0.50] 13770 %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> 13771 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 13772 %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 0, i32 2> 13773 %4 = add <2 x i64> %1, %3 13774 ret <2 x i64> %4 13775 } 13776 13777 define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 13778 ; GENERIC-LABEL: test_punpcklwd: 13779 ; GENERIC: # %bb.0: 13780 ; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13781 ; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] 13782 ; GENERIC-NEXT: retq # sched: [1:1.00] 13783 ; 13784 ; ATOM-LABEL: test_punpcklwd: 13785 ; ATOM: # %bb.0: 13786 ; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13787 ; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] 13788 ; ATOM-NEXT: nop # sched: [1:0.50] 13789 ; ATOM-NEXT: nop # sched: [1:0.50] 13790 ; ATOM-NEXT: nop # sched: [1:0.50] 13791 ; ATOM-NEXT: nop # sched: [1:0.50] 13792 ; ATOM-NEXT: retq # sched: [79:39.50] 13793 ; 13794 ; SLM-LABEL: test_punpcklwd: 13795 ; SLM: # %bb.0: 13796 ; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13797 ; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00] 13798 ; SLM-NEXT: retq # sched: [4:1.00] 13799 ; 13800 ; SANDY-SSE-LABEL: test_punpcklwd: 13801 ; SANDY-SSE: # %bb.0: 13802 ; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13803 ; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] 13804 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 13805 ; 13806 ; SANDY-LABEL: test_punpcklwd: 13807 ; SANDY: # %bb.0: 13808 ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13809 ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] 13810 ; SANDY-NEXT: retq # sched: [1:1.00] 13811 ; 13812 ; HASWELL-SSE-LABEL: test_punpcklwd: 13813 ; HASWELL-SSE: # %bb.0: 13814 ; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13815 ; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13816 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 13817 ; 13818 ; HASWELL-LABEL: test_punpcklwd: 13819 ; HASWELL: # %bb.0: 13820 ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13821 ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13822 ; HASWELL-NEXT: retq # sched: [7:1.00] 13823 ; 13824 ; BROADWELL-SSE-LABEL: test_punpcklwd: 13825 ; BROADWELL-SSE: # %bb.0: 13826 ; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13827 ; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] 13828 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 13829 ; 13830 ; BROADWELL-LABEL: test_punpcklwd: 13831 ; BROADWELL: # %bb.0: 13832 ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13833 ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] 13834 ; BROADWELL-NEXT: retq # sched: [7:1.00] 13835 ; 13836 ; SKYLAKE-SSE-LABEL: test_punpcklwd: 13837 ; SKYLAKE-SSE: # %bb.0: 13838 ; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13839 ; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13840 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 13841 ; 13842 ; SKYLAKE-LABEL: test_punpcklwd: 13843 ; SKYLAKE: # %bb.0: 13844 ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13845 ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13846 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 13847 ; 13848 ; SKX-SSE-LABEL: test_punpcklwd: 13849 ; SKX-SSE: # %bb.0: 13850 ; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13851 ; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13852 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 13853 ; 13854 ; SKX-LABEL: test_punpcklwd: 13855 ; SKX: # %bb.0: 13856 ; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] 13857 ; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] 13858 ; SKX-NEXT: retq # sched: [7:1.00] 13859 ; 13860 ; BTVER2-SSE-LABEL: test_punpcklwd: 13861 ; BTVER2-SSE: # %bb.0: 13862 ; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13863 ; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] 13864 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 13865 ; 13866 ; BTVER2-LABEL: test_punpcklwd: 13867 ; BTVER2: # %bb.0: 13868 ; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] 13869 ; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] 13870 ; BTVER2-NEXT: retq # sched: [4:1.00] 13871 ; 13872 ; ZNVER1-SSE-LABEL: test_punpcklwd: 13873 ; ZNVER1-SSE: # %bb.0: 13874 ; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] 13875 ; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50] 13876 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 13877 ; 13878 ; ZNVER1-LABEL: test_punpcklwd: 13879 ; ZNVER1: # %bb.0: 13880 ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] 13881 ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50] 13882 ; ZNVER1-NEXT: retq # sched: [1:0.50] 13883 %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 13884 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 13885 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 13886 ret <8 x i16> %3 13887 } 13888 13889 define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 13890 ; GENERIC-LABEL: test_pxor: 13891 ; GENERIC: # %bb.0: 13892 ; GENERIC-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] 13893 ; GENERIC-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] 13894 ; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13895 ; GENERIC-NEXT: retq # sched: [1:1.00] 13896 ; 13897 ; ATOM-LABEL: test_pxor: 13898 ; ATOM: # %bb.0: 13899 ; ATOM-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] 13900 ; ATOM-NEXT: pxor (%rdi), %xmm0 # sched: [1:1.00] 13901 ; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] 13902 ; ATOM-NEXT: retq # sched: [79:39.50] 13903 ; 13904 ; SLM-LABEL: test_pxor: 13905 ; SLM: # %bb.0: 13906 ; SLM-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] 13907 ; SLM-NEXT: pxor (%rdi), %xmm0 # sched: [4:1.00] 13908 ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13909 ; SLM-NEXT: retq # sched: [4:1.00] 13910 ; 13911 ; SANDY-SSE-LABEL: test_pxor: 13912 ; SANDY-SSE: # %bb.0: 13913 ; SANDY-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] 13914 ; SANDY-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] 13915 ; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13916 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 13917 ; 13918 ; SANDY-LABEL: test_pxor: 13919 ; SANDY: # %bb.0: 13920 ; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13921 ; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 13922 ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13923 ; SANDY-NEXT: retq # sched: [1:1.00] 13924 ; 13925 ; HASWELL-SSE-LABEL: test_pxor: 13926 ; HASWELL-SSE: # %bb.0: 13927 ; HASWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] 13928 ; HASWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] 13929 ; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13930 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 13931 ; 13932 ; HASWELL-LABEL: test_pxor: 13933 ; HASWELL: # %bb.0: 13934 ; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13935 ; HASWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 13936 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13937 ; HASWELL-NEXT: retq # sched: [7:1.00] 13938 ; 13939 ; BROADWELL-SSE-LABEL: test_pxor: 13940 ; BROADWELL-SSE: # %bb.0: 13941 ; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] 13942 ; BROADWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:0.50] 13943 ; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13944 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 13945 ; 13946 ; BROADWELL-LABEL: test_pxor: 13947 ; BROADWELL: # %bb.0: 13948 ; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13949 ; BROADWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 13950 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13951 ; BROADWELL-NEXT: retq # sched: [7:1.00] 13952 ; 13953 ; SKYLAKE-SSE-LABEL: test_pxor: 13954 ; SKYLAKE-SSE: # %bb.0: 13955 ; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] 13956 ; SKYLAKE-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] 13957 ; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 13958 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 13959 ; 13960 ; SKYLAKE-LABEL: test_pxor: 13961 ; SKYLAKE: # %bb.0: 13962 ; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13963 ; SKYLAKE-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 13964 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13965 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 13966 ; 13967 ; SKX-SSE-LABEL: test_pxor: 13968 ; SKX-SSE: # %bb.0: 13969 ; SKX-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] 13970 ; SKX-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] 13971 ; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] 13972 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 13973 ; 13974 ; SKX-LABEL: test_pxor: 13975 ; SKX: # %bb.0: 13976 ; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13977 ; SKX-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 13978 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 13979 ; SKX-NEXT: retq # sched: [7:1.00] 13980 ; 13981 ; BTVER2-SSE-LABEL: test_pxor: 13982 ; BTVER2-SSE: # %bb.0: 13983 ; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] 13984 ; BTVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:1.00] 13985 ; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] 13986 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 13987 ; 13988 ; BTVER2-LABEL: test_pxor: 13989 ; BTVER2: # %bb.0: 13990 ; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13991 ; BTVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 13992 ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 13993 ; BTVER2-NEXT: retq # sched: [4:1.00] 13994 ; 13995 ; ZNVER1-SSE-LABEL: test_pxor: 13996 ; ZNVER1-SSE: # %bb.0: 13997 ; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.25] 13998 ; ZNVER1-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [8:0.50] 13999 ; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] 14000 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 14001 ; 14002 ; ZNVER1-LABEL: test_pxor: 14003 ; ZNVER1: # %bb.0: 14004 ; ZNVER1-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 14005 ; ZNVER1-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 14006 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 14007 ; ZNVER1-NEXT: retq # sched: [1:0.50] 14008 %1 = xor <2 x i64> %a0, %a1 14009 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 14010 %3 = xor <2 x i64> %1, %2 14011 %4 = add <2 x i64> %3, %a1 14012 ret <2 x i64> %4 14013 } 14014 14015 define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 14016 ; GENERIC-LABEL: test_shufpd: 14017 ; GENERIC: # %bb.0: 14018 ; GENERIC-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14019 ; GENERIC-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] 14020 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14021 ; GENERIC-NEXT: retq # sched: [1:1.00] 14022 ; 14023 ; ATOM-LABEL: test_shufpd: 14024 ; ATOM: # %bb.0: 14025 ; ATOM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14026 ; ATOM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] 14027 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 14028 ; ATOM-NEXT: retq # sched: [79:39.50] 14029 ; 14030 ; SLM-LABEL: test_shufpd: 14031 ; SLM: # %bb.0: 14032 ; SLM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14033 ; SLM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [4:1.00] 14034 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14035 ; SLM-NEXT: retq # sched: [4:1.00] 14036 ; 14037 ; SANDY-SSE-LABEL: test_shufpd: 14038 ; SANDY-SSE: # %bb.0: 14039 ; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14040 ; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] 14041 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14042 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 14043 ; 14044 ; SANDY-LABEL: test_shufpd: 14045 ; SANDY: # %bb.0: 14046 ; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14047 ; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] 14048 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14049 ; SANDY-NEXT: retq # sched: [1:1.00] 14050 ; 14051 ; HASWELL-SSE-LABEL: test_shufpd: 14052 ; HASWELL-SSE: # %bb.0: 14053 ; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14054 ; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] 14055 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14056 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 14057 ; 14058 ; HASWELL-LABEL: test_shufpd: 14059 ; HASWELL: # %bb.0: 14060 ; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14061 ; HASWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] 14062 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14063 ; HASWELL-NEXT: retq # sched: [7:1.00] 14064 ; 14065 ; BROADWELL-SSE-LABEL: test_shufpd: 14066 ; BROADWELL-SSE: # %bb.0: 14067 ; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14068 ; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] 14069 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14070 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 14071 ; 14072 ; BROADWELL-LABEL: test_shufpd: 14073 ; BROADWELL: # %bb.0: 14074 ; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14075 ; BROADWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] 14076 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14077 ; BROADWELL-NEXT: retq # sched: [7:1.00] 14078 ; 14079 ; SKYLAKE-SSE-LABEL: test_shufpd: 14080 ; SKYLAKE-SSE: # %bb.0: 14081 ; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14082 ; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] 14083 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 14084 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 14085 ; 14086 ; SKYLAKE-LABEL: test_shufpd: 14087 ; SKYLAKE: # %bb.0: 14088 ; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14089 ; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] 14090 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14091 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 14092 ; 14093 ; SKX-SSE-LABEL: test_shufpd: 14094 ; SKX-SSE: # %bb.0: 14095 ; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14096 ; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] 14097 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 14098 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 14099 ; 14100 ; SKX-LABEL: test_shufpd: 14101 ; SKX: # %bb.0: 14102 ; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] 14103 ; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] 14104 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14105 ; SKX-NEXT: retq # sched: [7:1.00] 14106 ; 14107 ; BTVER2-SSE-LABEL: test_shufpd: 14108 ; BTVER2-SSE: # %bb.0: 14109 ; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] 14110 ; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] 14111 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14112 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 14113 ; 14114 ; BTVER2-LABEL: test_shufpd: 14115 ; BTVER2: # %bb.0: 14116 ; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] 14117 ; BTVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] 14118 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14119 ; BTVER2-NEXT: retq # sched: [4:1.00] 14120 ; 14121 ; ZNVER1-SSE-LABEL: test_shufpd: 14122 ; ZNVER1-SSE: # %bb.0: 14123 ; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] 14124 ; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50] 14125 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14126 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 14127 ; 14128 ; ZNVER1-LABEL: test_shufpd: 14129 ; ZNVER1: # %bb.0: 14130 ; ZNVER1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] 14131 ; ZNVER1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50] 14132 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14133 ; ZNVER1-NEXT: retq # sched: [1:0.50] 14134 %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> 14135 %2 = load <2 x double>, <2 x double> *%a2, align 16 14136 %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 2> 14137 %4 = fadd <2 x double> %1, %3 14138 ret <2 x double> %4 14139 } 14140 14141 define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { 14142 ; GENERIC-LABEL: test_sqrtpd: 14143 ; GENERIC: # %bb.0: 14144 ; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00] 14145 ; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00] 14146 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14147 ; GENERIC-NEXT: retq # sched: [1:1.00] 14148 ; 14149 ; ATOM-LABEL: test_sqrtpd: 14150 ; ATOM: # %bb.0: 14151 ; ATOM-NEXT: sqrtpd %xmm0, %xmm1 # sched: [125:62.50] 14152 ; ATOM-NEXT: sqrtpd (%rdi), %xmm0 # sched: [125:62.50] 14153 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 14154 ; ATOM-NEXT: retq # sched: [79:39.50] 14155 ; 14156 ; SLM-LABEL: test_sqrtpd: 14157 ; SLM: # %bb.0: 14158 ; SLM-NEXT: sqrtpd (%rdi), %xmm1 # sched: [74:70.00] 14159 ; SLM-NEXT: sqrtpd %xmm0, %xmm0 # sched: [71:70.00] 14160 ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 14161 ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] 14162 ; SLM-NEXT: retq # sched: [4:1.00] 14163 ; 14164 ; SANDY-SSE-LABEL: test_sqrtpd: 14165 ; SANDY-SSE: # %bb.0: 14166 ; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00] 14167 ; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00] 14168 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14169 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 14170 ; 14171 ; SANDY-LABEL: test_sqrtpd: 14172 ; SANDY: # %bb.0: 14173 ; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:21.00] 14174 ; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:21.00] 14175 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14176 ; SANDY-NEXT: retq # sched: [1:1.00] 14177 ; 14178 ; HASWELL-SSE-LABEL: test_sqrtpd: 14179 ; HASWELL-SSE: # %bb.0: 14180 ; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:14.00] 14181 ; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [22:14.00] 14182 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14183 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 14184 ; 14185 ; HASWELL-LABEL: test_sqrtpd: 14186 ; HASWELL: # %bb.0: 14187 ; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:14.00] 14188 ; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [22:14.00] 14189 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14190 ; HASWELL-NEXT: retq # sched: [7:1.00] 14191 ; 14192 ; BROADWELL-SSE-LABEL: test_sqrtpd: 14193 ; BROADWELL-SSE: # %bb.0: 14194 ; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:14.00] 14195 ; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [21:14.00] 14196 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14197 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 14198 ; 14199 ; BROADWELL-LABEL: test_sqrtpd: 14200 ; BROADWELL: # %bb.0: 14201 ; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:14.00] 14202 ; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:14.00] 14203 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14204 ; BROADWELL-NEXT: retq # sched: [7:1.00] 14205 ; 14206 ; SKYLAKE-SSE-LABEL: test_sqrtpd: 14207 ; SKYLAKE-SSE: # %bb.0: 14208 ; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00] 14209 ; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00] 14210 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 14211 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 14212 ; 14213 ; SKYLAKE-LABEL: test_sqrtpd: 14214 ; SKYLAKE: # %bb.0: 14215 ; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00] 14216 ; SKYLAKE-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00] 14217 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14218 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 14219 ; 14220 ; SKX-SSE-LABEL: test_sqrtpd: 14221 ; SKX-SSE: # %bb.0: 14222 ; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00] 14223 ; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00] 14224 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 14225 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 14226 ; 14227 ; SKX-LABEL: test_sqrtpd: 14228 ; SKX: # %bb.0: 14229 ; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00] 14230 ; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00] 14231 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14232 ; SKX-NEXT: retq # sched: [7:1.00] 14233 ; 14234 ; BTVER2-SSE-LABEL: test_sqrtpd: 14235 ; BTVER2-SSE: # %bb.0: 14236 ; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [27:27.00] 14237 ; BTVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [32:27.00] 14238 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14239 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 14240 ; 14241 ; BTVER2-LABEL: test_sqrtpd: 14242 ; BTVER2: # %bb.0: 14243 ; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [32:27.00] 14244 ; BTVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [27:27.00] 14245 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14246 ; BTVER2-NEXT: retq # sched: [4:1.00] 14247 ; 14248 ; ZNVER1-SSE-LABEL: test_sqrtpd: 14249 ; ZNVER1-SSE: # %bb.0: 14250 ; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:20.00] 14251 ; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:20.00] 14252 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14253 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 14254 ; 14255 ; ZNVER1-LABEL: test_sqrtpd: 14256 ; ZNVER1: # %bb.0: 14257 ; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:20.00] 14258 ; ZNVER1-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [20:20.00] 14259 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14260 ; ZNVER1-NEXT: retq # sched: [1:0.50] 14261 %1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) 14262 %2 = load <2 x double>, <2 x double> *%a1, align 16 14263 %3 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %2) 14264 %4 = fadd <2 x double> %1, %3 14265 ret <2 x double> %4 14266 } 14267 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone 14268 14269 ; TODO - sqrtsd_m 14270 14271 define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { 14272 ; GENERIC-LABEL: test_sqrtsd: 14273 ; GENERIC: # %bb.0: 14274 ; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00] 14275 ; GENERIC-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] 14276 ; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00] 14277 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14278 ; GENERIC-NEXT: retq # sched: [1:1.00] 14279 ; 14280 ; ATOM-LABEL: test_sqrtsd: 14281 ; ATOM: # %bb.0: 14282 ; ATOM-NEXT: movapd (%rdi), %xmm1 # sched: [1:1.00] 14283 ; ATOM-NEXT: sqrtsd %xmm0, %xmm0 # sched: [62:31.00] 14284 ; ATOM-NEXT: sqrtsd %xmm1, %xmm1 # sched: [62:31.00] 14285 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 14286 ; ATOM-NEXT: retq # sched: [79:39.50] 14287 ; 14288 ; SLM-LABEL: test_sqrtsd: 14289 ; SLM: # %bb.0: 14290 ; SLM-NEXT: movapd (%rdi), %xmm1 # sched: [3:1.00] 14291 ; SLM-NEXT: sqrtsd %xmm0, %xmm0 # sched: [35:35.00] 14292 ; SLM-NEXT: sqrtsd %xmm1, %xmm1 # sched: [35:35.00] 14293 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14294 ; SLM-NEXT: retq # sched: [4:1.00] 14295 ; 14296 ; SANDY-SSE-LABEL: test_sqrtsd: 14297 ; SANDY-SSE: # %bb.0: 14298 ; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00] 14299 ; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] 14300 ; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00] 14301 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14302 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 14303 ; 14304 ; SANDY-LABEL: test_sqrtsd: 14305 ; SANDY: # %bb.0: 14306 ; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00] 14307 ; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] 14308 ; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:21.00] 14309 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14310 ; SANDY-NEXT: retq # sched: [1:1.00] 14311 ; 14312 ; HASWELL-SSE-LABEL: test_sqrtsd: 14313 ; HASWELL-SSE: # %bb.0: 14314 ; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:14.00] 14315 ; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] 14316 ; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:14.00] 14317 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14318 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 14319 ; 14320 ; HASWELL-LABEL: test_sqrtsd: 14321 ; HASWELL: # %bb.0: 14322 ; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:14.00] 14323 ; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] 14324 ; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:14.00] 14325 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14326 ; HASWELL-NEXT: retq # sched: [7:1.00] 14327 ; 14328 ; BROADWELL-SSE-LABEL: test_sqrtsd: 14329 ; BROADWELL-SSE: # %bb.0: 14330 ; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:8.00] 14331 ; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50] 14332 ; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:8.00] 14333 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14334 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 14335 ; 14336 ; BROADWELL-LABEL: test_sqrtsd: 14337 ; BROADWELL: # %bb.0: 14338 ; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:8.00] 14339 ; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50] 14340 ; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:8.00] 14341 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14342 ; BROADWELL-NEXT: retq # sched: [7:1.00] 14343 ; 14344 ; SKYLAKE-SSE-LABEL: test_sqrtsd: 14345 ; SKYLAKE-SSE: # %bb.0: 14346 ; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00] 14347 ; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] 14348 ; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00] 14349 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 14350 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 14351 ; 14352 ; SKYLAKE-LABEL: test_sqrtsd: 14353 ; SKYLAKE: # %bb.0: 14354 ; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] 14355 ; SKYLAKE-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] 14356 ; SKYLAKE-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00] 14357 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14358 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 14359 ; 14360 ; SKX-SSE-LABEL: test_sqrtsd: 14361 ; SKX-SSE: # %bb.0: 14362 ; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00] 14363 ; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] 14364 ; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00] 14365 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 14366 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 14367 ; 14368 ; SKX-LABEL: test_sqrtsd: 14369 ; SKX: # %bb.0: 14370 ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] 14371 ; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] 14372 ; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00] 14373 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14374 ; SKX-NEXT: retq # sched: [7:1.00] 14375 ; 14376 ; BTVER2-SSE-LABEL: test_sqrtsd: 14377 ; BTVER2-SSE: # %bb.0: 14378 ; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00] 14379 ; BTVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [27:27.00] 14380 ; BTVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [27:27.00] 14381 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14382 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 14383 ; 14384 ; BTVER2-LABEL: test_sqrtsd: 14385 ; BTVER2: # %bb.0: 14386 ; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00] 14387 ; BTVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [27:27.00] 14388 ; BTVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [27:27.00] 14389 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14390 ; BTVER2-NEXT: retq # sched: [4:1.00] 14391 ; 14392 ; ZNVER1-SSE-LABEL: test_sqrtsd: 14393 ; ZNVER1-SSE: # %bb.0: 14394 ; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50] 14395 ; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:20.00] 14396 ; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:20.00] 14397 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14398 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 14399 ; 14400 ; ZNVER1-LABEL: test_sqrtsd: 14401 ; ZNVER1: # %bb.0: 14402 ; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50] 14403 ; ZNVER1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:20.00] 14404 ; ZNVER1-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:20.00] 14405 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14406 ; ZNVER1-NEXT: retq # sched: [1:0.50] 14407 %1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) 14408 %2 = load <2 x double>, <2 x double> *%a1, align 16 14409 %3 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) 14410 %4 = fadd <2 x double> %1, %3 14411 ret <2 x double> %4 14412 } 14413 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone 14414 14415 define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 14416 ; GENERIC-LABEL: test_subpd: 14417 ; GENERIC: # %bb.0: 14418 ; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 14419 ; GENERIC-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] 14420 ; GENERIC-NEXT: retq # sched: [1:1.00] 14421 ; 14422 ; ATOM-LABEL: test_subpd: 14423 ; ATOM: # %bb.0: 14424 ; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00] 14425 ; ATOM-NEXT: subpd (%rdi), %xmm0 # sched: [7:3.50] 14426 ; ATOM-NEXT: retq # sched: [79:39.50] 14427 ; 14428 ; SLM-LABEL: test_subpd: 14429 ; SLM: # %bb.0: 14430 ; SLM-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 14431 ; SLM-NEXT: subpd (%rdi), %xmm0 # sched: [6:1.00] 14432 ; SLM-NEXT: retq # sched: [4:1.00] 14433 ; 14434 ; SANDY-SSE-LABEL: test_subpd: 14435 ; SANDY-SSE: # %bb.0: 14436 ; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 14437 ; SANDY-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] 14438 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 14439 ; 14440 ; SANDY-LABEL: test_subpd: 14441 ; SANDY: # %bb.0: 14442 ; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14443 ; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 14444 ; SANDY-NEXT: retq # sched: [1:1.00] 14445 ; 14446 ; HASWELL-SSE-LABEL: test_subpd: 14447 ; HASWELL-SSE: # %bb.0: 14448 ; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 14449 ; HASWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] 14450 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 14451 ; 14452 ; HASWELL-LABEL: test_subpd: 14453 ; HASWELL: # %bb.0: 14454 ; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14455 ; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 14456 ; HASWELL-NEXT: retq # sched: [7:1.00] 14457 ; 14458 ; BROADWELL-SSE-LABEL: test_subpd: 14459 ; BROADWELL-SSE: # %bb.0: 14460 ; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 14461 ; BROADWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] 14462 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 14463 ; 14464 ; BROADWELL-LABEL: test_subpd: 14465 ; BROADWELL: # %bb.0: 14466 ; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14467 ; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 14468 ; BROADWELL-NEXT: retq # sched: [7:1.00] 14469 ; 14470 ; SKYLAKE-SSE-LABEL: test_subpd: 14471 ; SKYLAKE-SSE: # %bb.0: 14472 ; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] 14473 ; SKYLAKE-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] 14474 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 14475 ; 14476 ; SKYLAKE-LABEL: test_subpd: 14477 ; SKYLAKE: # %bb.0: 14478 ; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14479 ; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 14480 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 14481 ; 14482 ; SKX-SSE-LABEL: test_subpd: 14483 ; SKX-SSE: # %bb.0: 14484 ; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] 14485 ; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] 14486 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 14487 ; 14488 ; SKX-LABEL: test_subpd: 14489 ; SKX: # %bb.0: 14490 ; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14491 ; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 14492 ; SKX-NEXT: retq # sched: [7:1.00] 14493 ; 14494 ; BTVER2-SSE-LABEL: test_subpd: 14495 ; BTVER2-SSE: # %bb.0: 14496 ; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 14497 ; BTVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] 14498 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 14499 ; 14500 ; BTVER2-LABEL: test_subpd: 14501 ; BTVER2: # %bb.0: 14502 ; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14503 ; BTVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 14504 ; BTVER2-NEXT: retq # sched: [4:1.00] 14505 ; 14506 ; ZNVER1-SSE-LABEL: test_subpd: 14507 ; ZNVER1-SSE: # %bb.0: 14508 ; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] 14509 ; ZNVER1-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00] 14510 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 14511 ; 14512 ; ZNVER1-LABEL: test_subpd: 14513 ; ZNVER1: # %bb.0: 14514 ; ZNVER1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14515 ; ZNVER1-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 14516 ; ZNVER1-NEXT: retq # sched: [1:0.50] 14517 %1 = fsub <2 x double> %a0, %a1 14518 %2 = load <2 x double>, <2 x double> *%a2, align 16 14519 %3 = fsub <2 x double> %1, %2 14520 ret <2 x double> %3 14521 } 14522 14523 define double @test_subsd(double %a0, double %a1, double *%a2) { 14524 ; GENERIC-LABEL: test_subsd: 14525 ; GENERIC: # %bb.0: 14526 ; GENERIC-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] 14527 ; GENERIC-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] 14528 ; GENERIC-NEXT: retq # sched: [1:1.00] 14529 ; 14530 ; ATOM-LABEL: test_subsd: 14531 ; ATOM: # %bb.0: 14532 ; ATOM-NEXT: subsd %xmm1, %xmm0 # sched: [5:5.00] 14533 ; ATOM-NEXT: subsd (%rdi), %xmm0 # sched: [5:5.00] 14534 ; ATOM-NEXT: retq # sched: [79:39.50] 14535 ; 14536 ; SLM-LABEL: test_subsd: 14537 ; SLM: # %bb.0: 14538 ; SLM-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] 14539 ; SLM-NEXT: subsd (%rdi), %xmm0 # sched: [6:1.00] 14540 ; SLM-NEXT: retq # sched: [4:1.00] 14541 ; 14542 ; SANDY-SSE-LABEL: test_subsd: 14543 ; SANDY-SSE: # %bb.0: 14544 ; SANDY-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] 14545 ; SANDY-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] 14546 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 14547 ; 14548 ; SANDY-LABEL: test_subsd: 14549 ; SANDY: # %bb.0: 14550 ; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14551 ; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] 14552 ; SANDY-NEXT: retq # sched: [1:1.00] 14553 ; 14554 ; HASWELL-SSE-LABEL: test_subsd: 14555 ; HASWELL-SSE: # %bb.0: 14556 ; HASWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] 14557 ; HASWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] 14558 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 14559 ; 14560 ; HASWELL-LABEL: test_subsd: 14561 ; HASWELL: # %bb.0: 14562 ; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14563 ; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 14564 ; HASWELL-NEXT: retq # sched: [7:1.00] 14565 ; 14566 ; BROADWELL-SSE-LABEL: test_subsd: 14567 ; BROADWELL-SSE: # %bb.0: 14568 ; BROADWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] 14569 ; BROADWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] 14570 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 14571 ; 14572 ; BROADWELL-LABEL: test_subsd: 14573 ; BROADWELL: # %bb.0: 14574 ; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14575 ; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 14576 ; BROADWELL-NEXT: retq # sched: [7:1.00] 14577 ; 14578 ; SKYLAKE-SSE-LABEL: test_subsd: 14579 ; SKYLAKE-SSE: # %bb.0: 14580 ; SKYLAKE-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] 14581 ; SKYLAKE-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] 14582 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 14583 ; 14584 ; SKYLAKE-LABEL: test_subsd: 14585 ; SKYLAKE: # %bb.0: 14586 ; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14587 ; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 14588 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 14589 ; 14590 ; SKX-SSE-LABEL: test_subsd: 14591 ; SKX-SSE: # %bb.0: 14592 ; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] 14593 ; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] 14594 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 14595 ; 14596 ; SKX-LABEL: test_subsd: 14597 ; SKX: # %bb.0: 14598 ; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14599 ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] 14600 ; SKX-NEXT: retq # sched: [7:1.00] 14601 ; 14602 ; BTVER2-SSE-LABEL: test_subsd: 14603 ; BTVER2-SSE: # %bb.0: 14604 ; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] 14605 ; BTVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] 14606 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 14607 ; 14608 ; BTVER2-LABEL: test_subsd: 14609 ; BTVER2: # %bb.0: 14610 ; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14611 ; BTVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] 14612 ; BTVER2-NEXT: retq # sched: [4:1.00] 14613 ; 14614 ; ZNVER1-SSE-LABEL: test_subsd: 14615 ; ZNVER1-SSE: # %bb.0: 14616 ; ZNVER1-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] 14617 ; ZNVER1-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00] 14618 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 14619 ; 14620 ; ZNVER1-LABEL: test_subsd: 14621 ; ZNVER1: # %bb.0: 14622 ; ZNVER1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14623 ; ZNVER1-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 14624 ; ZNVER1-NEXT: retq # sched: [1:0.50] 14625 %1 = fsub double %a0, %a1 14626 %2 = load double, double *%a2, align 8 14627 %3 = fsub double %1, %2 14628 ret double %3 14629 } 14630 14631 define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 14632 ; GENERIC-LABEL: test_ucomisd: 14633 ; GENERIC: # %bb.0: 14634 ; GENERIC-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] 14635 ; GENERIC-NEXT: setnp %al # sched: [1:0.50] 14636 ; GENERIC-NEXT: sete %cl # sched: [1:0.50] 14637 ; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] 14638 ; GENERIC-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] 14639 ; GENERIC-NEXT: setnp %al # sched: [1:0.50] 14640 ; GENERIC-NEXT: sete %dl # sched: [1:0.50] 14641 ; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] 14642 ; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] 14643 ; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] 14644 ; GENERIC-NEXT: retq # sched: [1:1.00] 14645 ; 14646 ; ATOM-LABEL: test_ucomisd: 14647 ; ATOM: # %bb.0: 14648 ; ATOM-NEXT: ucomisd %xmm1, %xmm0 # sched: [9:4.50] 14649 ; ATOM-NEXT: setnp %al # sched: [1:0.50] 14650 ; ATOM-NEXT: sete %cl # sched: [1:0.50] 14651 ; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] 14652 ; ATOM-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:5.00] 14653 ; ATOM-NEXT: setnp %al # sched: [1:0.50] 14654 ; ATOM-NEXT: sete %dl # sched: [1:0.50] 14655 ; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] 14656 ; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] 14657 ; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] 14658 ; ATOM-NEXT: retq # sched: [79:39.50] 14659 ; 14660 ; SLM-LABEL: test_ucomisd: 14661 ; SLM: # %bb.0: 14662 ; SLM-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] 14663 ; SLM-NEXT: setnp %al # sched: [1:0.50] 14664 ; SLM-NEXT: sete %cl # sched: [1:0.50] 14665 ; SLM-NEXT: andb %al, %cl # sched: [1:0.50] 14666 ; SLM-NEXT: ucomisd (%rdi), %xmm0 # sched: [6:1.00] 14667 ; SLM-NEXT: setnp %al # sched: [1:0.50] 14668 ; SLM-NEXT: sete %dl # sched: [1:0.50] 14669 ; SLM-NEXT: andb %al, %dl # sched: [1:0.50] 14670 ; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] 14671 ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] 14672 ; SLM-NEXT: retq # sched: [4:1.00] 14673 ; 14674 ; SANDY-SSE-LABEL: test_ucomisd: 14675 ; SANDY-SSE: # %bb.0: 14676 ; SANDY-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] 14677 ; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] 14678 ; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] 14679 ; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] 14680 ; SANDY-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] 14681 ; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] 14682 ; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] 14683 ; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] 14684 ; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] 14685 ; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] 14686 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 14687 ; 14688 ; SANDY-LABEL: test_ucomisd: 14689 ; SANDY: # %bb.0: 14690 ; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 14691 ; SANDY-NEXT: setnp %al # sched: [1:0.50] 14692 ; SANDY-NEXT: sete %cl # sched: [1:0.50] 14693 ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] 14694 ; SANDY-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] 14695 ; SANDY-NEXT: setnp %al # sched: [1:0.50] 14696 ; SANDY-NEXT: sete %dl # sched: [1:0.50] 14697 ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] 14698 ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] 14699 ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] 14700 ; SANDY-NEXT: retq # sched: [1:1.00] 14701 ; 14702 ; HASWELL-SSE-LABEL: test_ucomisd: 14703 ; HASWELL-SSE: # %bb.0: 14704 ; HASWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] 14705 ; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 14706 ; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] 14707 ; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 14708 ; HASWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] 14709 ; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 14710 ; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] 14711 ; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 14712 ; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 14713 ; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14714 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 14715 ; 14716 ; HASWELL-LABEL: test_ucomisd: 14717 ; HASWELL: # %bb.0: 14718 ; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] 14719 ; HASWELL-NEXT: setnp %al # sched: [1:0.50] 14720 ; HASWELL-NEXT: sete %cl # sched: [1:0.50] 14721 ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] 14722 ; HASWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] 14723 ; HASWELL-NEXT: setnp %al # sched: [1:0.50] 14724 ; HASWELL-NEXT: sete %dl # sched: [1:0.50] 14725 ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] 14726 ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] 14727 ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14728 ; HASWELL-NEXT: retq # sched: [7:1.00] 14729 ; 14730 ; BROADWELL-SSE-LABEL: test_ucomisd: 14731 ; BROADWELL-SSE: # %bb.0: 14732 ; BROADWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] 14733 ; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 14734 ; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] 14735 ; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 14736 ; BROADWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] 14737 ; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] 14738 ; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] 14739 ; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 14740 ; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 14741 ; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14742 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 14743 ; 14744 ; BROADWELL-LABEL: test_ucomisd: 14745 ; BROADWELL: # %bb.0: 14746 ; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] 14747 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] 14748 ; BROADWELL-NEXT: sete %cl # sched: [1:0.50] 14749 ; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] 14750 ; BROADWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] 14751 ; BROADWELL-NEXT: setnp %al # sched: [1:0.50] 14752 ; BROADWELL-NEXT: sete %dl # sched: [1:0.50] 14753 ; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] 14754 ; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] 14755 ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14756 ; BROADWELL-NEXT: retq # sched: [7:1.00] 14757 ; 14758 ; SKYLAKE-SSE-LABEL: test_ucomisd: 14759 ; SKYLAKE-SSE: # %bb.0: 14760 ; SKYLAKE-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] 14761 ; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] 14762 ; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] 14763 ; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 14764 ; SKYLAKE-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] 14765 ; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] 14766 ; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] 14767 ; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 14768 ; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 14769 ; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14770 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 14771 ; 14772 ; SKYLAKE-LABEL: test_ucomisd: 14773 ; SKYLAKE: # %bb.0: 14774 ; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 14775 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] 14776 ; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] 14777 ; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] 14778 ; SKYLAKE-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] 14779 ; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] 14780 ; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] 14781 ; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] 14782 ; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] 14783 ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14784 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 14785 ; 14786 ; SKX-SSE-LABEL: test_ucomisd: 14787 ; SKX-SSE: # %bb.0: 14788 ; SKX-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] 14789 ; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] 14790 ; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] 14791 ; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 14792 ; SKX-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] 14793 ; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] 14794 ; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] 14795 ; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 14796 ; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 14797 ; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14798 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 14799 ; 14800 ; SKX-LABEL: test_ucomisd: 14801 ; SKX: # %bb.0: 14802 ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] 14803 ; SKX-NEXT: setnp %al # sched: [1:0.50] 14804 ; SKX-NEXT: sete %cl # sched: [1:0.50] 14805 ; SKX-NEXT: andb %al, %cl # sched: [1:0.25] 14806 ; SKX-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] 14807 ; SKX-NEXT: setnp %al # sched: [1:0.50] 14808 ; SKX-NEXT: sete %dl # sched: [1:0.50] 14809 ; SKX-NEXT: andb %al, %dl # sched: [1:0.25] 14810 ; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] 14811 ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14812 ; SKX-NEXT: retq # sched: [7:1.00] 14813 ; 14814 ; BTVER2-SSE-LABEL: test_ucomisd: 14815 ; BTVER2-SSE: # %bb.0: 14816 ; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] 14817 ; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] 14818 ; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] 14819 ; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] 14820 ; BTVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] 14821 ; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] 14822 ; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] 14823 ; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] 14824 ; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] 14825 ; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] 14826 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 14827 ; 14828 ; BTVER2-LABEL: test_ucomisd: 14829 ; BTVER2: # %bb.0: 14830 ; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] 14831 ; BTVER2-NEXT: setnp %al # sched: [1:0.50] 14832 ; BTVER2-NEXT: sete %cl # sched: [1:0.50] 14833 ; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] 14834 ; BTVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] 14835 ; BTVER2-NEXT: setnp %al # sched: [1:0.50] 14836 ; BTVER2-NEXT: sete %dl # sched: [1:0.50] 14837 ; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] 14838 ; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] 14839 ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] 14840 ; BTVER2-NEXT: retq # sched: [4:1.00] 14841 ; 14842 ; ZNVER1-SSE-LABEL: test_ucomisd: 14843 ; ZNVER1-SSE: # %bb.0: 14844 ; ZNVER1-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] 14845 ; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] 14846 ; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] 14847 ; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] 14848 ; ZNVER1-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:1.00] 14849 ; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] 14850 ; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] 14851 ; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] 14852 ; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] 14853 ; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14854 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 14855 ; 14856 ; ZNVER1-LABEL: test_ucomisd: 14857 ; ZNVER1: # %bb.0: 14858 ; ZNVER1-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] 14859 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25] 14860 ; ZNVER1-NEXT: sete %cl # sched: [1:0.25] 14861 ; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] 14862 ; ZNVER1-NEXT: vucomisd (%rdi), %xmm0 # sched: [10:1.00] 14863 ; ZNVER1-NEXT: setnp %al # sched: [1:0.25] 14864 ; ZNVER1-NEXT: sete %dl # sched: [1:0.25] 14865 ; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] 14866 ; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] 14867 ; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] 14868 ; ZNVER1-NEXT: retq # sched: [1:0.50] 14869 %1 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) 14870 %2 = load <2 x double>, <2 x double> *%a2, align 8 14871 %3 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %2) 14872 %4 = or i32 %1, %3 14873 ret i32 %4 14874 } 14875 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 14876 14877 define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 14878 ; GENERIC-LABEL: test_unpckhpd: 14879 ; GENERIC: # %bb.0: 14880 ; GENERIC-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14881 ; GENERIC-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 14882 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14883 ; GENERIC-NEXT: retq # sched: [1:1.00] 14884 ; 14885 ; ATOM-LABEL: test_unpckhpd: 14886 ; ATOM: # %bb.0: 14887 ; ATOM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14888 ; ATOM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] 14889 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 14890 ; ATOM-NEXT: retq # sched: [79:39.50] 14891 ; 14892 ; SLM-LABEL: test_unpckhpd: 14893 ; SLM: # %bb.0: 14894 ; SLM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14895 ; SLM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00] 14896 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14897 ; SLM-NEXT: retq # sched: [4:1.00] 14898 ; 14899 ; SANDY-SSE-LABEL: test_unpckhpd: 14900 ; SANDY-SSE: # %bb.0: 14901 ; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14902 ; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 14903 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14904 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 14905 ; 14906 ; SANDY-LABEL: test_unpckhpd: 14907 ; SANDY: # %bb.0: 14908 ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14909 ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 14910 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14911 ; SANDY-NEXT: retq # sched: [1:1.00] 14912 ; 14913 ; HASWELL-SSE-LABEL: test_unpckhpd: 14914 ; HASWELL-SSE: # %bb.0: 14915 ; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14916 ; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 14917 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14918 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 14919 ; 14920 ; HASWELL-LABEL: test_unpckhpd: 14921 ; HASWELL: # %bb.0: 14922 ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14923 ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 14924 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14925 ; HASWELL-NEXT: retq # sched: [7:1.00] 14926 ; 14927 ; BROADWELL-SSE-LABEL: test_unpckhpd: 14928 ; BROADWELL-SSE: # %bb.0: 14929 ; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14930 ; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] 14931 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14932 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 14933 ; 14934 ; BROADWELL-LABEL: test_unpckhpd: 14935 ; BROADWELL: # %bb.0: 14936 ; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14937 ; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] 14938 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14939 ; BROADWELL-NEXT: retq # sched: [7:1.00] 14940 ; 14941 ; SKYLAKE-SSE-LABEL: test_unpckhpd: 14942 ; SKYLAKE-SSE: # %bb.0: 14943 ; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14944 ; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 14945 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 14946 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 14947 ; 14948 ; SKYLAKE-LABEL: test_unpckhpd: 14949 ; SKYLAKE: # %bb.0: 14950 ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14951 ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 14952 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14953 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 14954 ; 14955 ; SKX-SSE-LABEL: test_unpckhpd: 14956 ; SKX-SSE: # %bb.0: 14957 ; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14958 ; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 14959 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 14960 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 14961 ; 14962 ; SKX-LABEL: test_unpckhpd: 14963 ; SKX: # %bb.0: 14964 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] 14965 ; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] 14966 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 14967 ; SKX-NEXT: retq # sched: [7:1.00] 14968 ; 14969 ; BTVER2-SSE-LABEL: test_unpckhpd: 14970 ; BTVER2-SSE: # %bb.0: 14971 ; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] 14972 ; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] 14973 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14974 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 14975 ; 14976 ; BTVER2-LABEL: test_unpckhpd: 14977 ; BTVER2: # %bb.0: 14978 ; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] 14979 ; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] 14980 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14981 ; BTVER2-NEXT: retq # sched: [4:1.00] 14982 ; 14983 ; ZNVER1-SSE-LABEL: test_unpckhpd: 14984 ; ZNVER1-SSE: # %bb.0: 14985 ; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] 14986 ; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] 14987 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 14988 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 14989 ; 14990 ; ZNVER1-LABEL: test_unpckhpd: 14991 ; ZNVER1: # %bb.0: 14992 ; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] 14993 ; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] 14994 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 14995 ; ZNVER1-NEXT: retq # sched: [1:0.50] 14996 %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> 14997 %2 = load <2 x double>, <2 x double> *%a2, align 16 14998 %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 3> 14999 %4 = fadd <2 x double> %1, %3 15000 ret <2 x double> %4 15001 } 15002 15003 define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 15004 ; GENERIC-LABEL: test_unpcklpd: 15005 ; GENERIC: # %bb.0: 15006 ; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15007 ; GENERIC-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] 15008 ; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 15009 ; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 15010 ; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] 15011 ; GENERIC-NEXT: retq # sched: [1:1.00] 15012 ; 15013 ; ATOM-LABEL: test_unpcklpd: 15014 ; ATOM: # %bb.0: 15015 ; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15016 ; ATOM-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50] 15017 ; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] 15018 ; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] 15019 ; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] 15020 ; ATOM-NEXT: retq # sched: [79:39.50] 15021 ; 15022 ; SLM-LABEL: test_unpcklpd: 15023 ; SLM: # %bb.0: 15024 ; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15025 ; SLM-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50] 15026 ; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] 15027 ; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 15028 ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] 15029 ; SLM-NEXT: retq # sched: [4:1.00] 15030 ; 15031 ; SANDY-SSE-LABEL: test_unpcklpd: 15032 ; SANDY-SSE: # %bb.0: 15033 ; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15034 ; SANDY-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] 15035 ; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 15036 ; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 15037 ; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] 15038 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 15039 ; 15040 ; SANDY-LABEL: test_unpcklpd: 15041 ; SANDY: # %bb.0: 15042 ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15043 ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] 15044 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 15045 ; SANDY-NEXT: retq # sched: [1:1.00] 15046 ; 15047 ; HASWELL-SSE-LABEL: test_unpcklpd: 15048 ; HASWELL-SSE: # %bb.0: 15049 ; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15050 ; HASWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] 15051 ; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 15052 ; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 15053 ; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] 15054 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 15055 ; 15056 ; HASWELL-LABEL: test_unpcklpd: 15057 ; HASWELL: # %bb.0: 15058 ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15059 ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] 15060 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 15061 ; HASWELL-NEXT: retq # sched: [7:1.00] 15062 ; 15063 ; BROADWELL-SSE-LABEL: test_unpcklpd: 15064 ; BROADWELL-SSE: # %bb.0: 15065 ; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15066 ; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] 15067 ; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 15068 ; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 15069 ; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] 15070 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 15071 ; 15072 ; BROADWELL-LABEL: test_unpcklpd: 15073 ; BROADWELL: # %bb.0: 15074 ; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15075 ; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [6:1.00] 15076 ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 15077 ; BROADWELL-NEXT: retq # sched: [7:1.00] 15078 ; 15079 ; SKYLAKE-SSE-LABEL: test_unpcklpd: 15080 ; SKYLAKE-SSE: # %bb.0: 15081 ; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15082 ; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] 15083 ; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 15084 ; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] 15085 ; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] 15086 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 15087 ; 15088 ; SKYLAKE-LABEL: test_unpcklpd: 15089 ; SKYLAKE: # %bb.0: 15090 ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15091 ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] 15092 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 15093 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 15094 ; 15095 ; SKX-SSE-LABEL: test_unpcklpd: 15096 ; SKX-SSE: # %bb.0: 15097 ; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15098 ; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] 15099 ; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] 15100 ; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] 15101 ; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] 15102 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 15103 ; 15104 ; SKX-LABEL: test_unpcklpd: 15105 ; SKX: # %bb.0: 15106 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] 15107 ; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] 15108 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 15109 ; SKX-NEXT: retq # sched: [7:1.00] 15110 ; 15111 ; BTVER2-SSE-LABEL: test_unpcklpd: 15112 ; BTVER2-SSE: # %bb.0: 15113 ; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 15114 ; BTVER2-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50] 15115 ; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] 15116 ; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 15117 ; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] 15118 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 15119 ; 15120 ; BTVER2-LABEL: test_unpcklpd: 15121 ; BTVER2: # %bb.0: 15122 ; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 15123 ; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [6:1.00] 15124 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 15125 ; BTVER2-NEXT: retq # sched: [4:1.00] 15126 ; 15127 ; ZNVER1-SSE-LABEL: test_unpcklpd: 15128 ; ZNVER1-SSE: # %bb.0: 15129 ; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 15130 ; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.25] 15131 ; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] 15132 ; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] 15133 ; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25] 15134 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 15135 ; 15136 ; ZNVER1-LABEL: test_unpcklpd: 15137 ; ZNVER1: # %bb.0: 15138 ; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] 15139 ; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [8:0.50] 15140 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 15141 ; ZNVER1-NEXT: retq # sched: [1:0.50] 15142 %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> 15143 %2 = load <2 x double>, <2 x double> *%a2, align 16 15144 %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 0, i32 2> 15145 %4 = fadd <2 x double> %1, %3 15146 ret <2 x double> %4 15147 } 15148 15149 define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { 15150 ; GENERIC-LABEL: test_xorpd: 15151 ; GENERIC: # %bb.0: 15152 ; GENERIC-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] 15153 ; GENERIC-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] 15154 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 15155 ; GENERIC-NEXT: retq # sched: [1:1.00] 15156 ; 15157 ; ATOM-LABEL: test_xorpd: 15158 ; ATOM: # %bb.0: 15159 ; ATOM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] 15160 ; ATOM-NEXT: xorpd (%rdi), %xmm0 # sched: [1:1.00] 15161 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 15162 ; ATOM-NEXT: retq # sched: [79:39.50] 15163 ; 15164 ; SLM-LABEL: test_xorpd: 15165 ; SLM: # %bb.0: 15166 ; SLM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] 15167 ; SLM-NEXT: xorpd (%rdi), %xmm0 # sched: [4:1.00] 15168 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 15169 ; SLM-NEXT: retq # sched: [4:1.00] 15170 ; 15171 ; SANDY-SSE-LABEL: test_xorpd: 15172 ; SANDY-SSE: # %bb.0: 15173 ; SANDY-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] 15174 ; SANDY-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] 15175 ; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 15176 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 15177 ; 15178 ; SANDY-LABEL: test_xorpd: 15179 ; SANDY: # %bb.0: 15180 ; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 15181 ; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 15182 ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 15183 ; SANDY-NEXT: retq # sched: [1:1.00] 15184 ; 15185 ; HASWELL-SSE-LABEL: test_xorpd: 15186 ; HASWELL-SSE: # %bb.0: 15187 ; HASWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] 15188 ; HASWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] 15189 ; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 15190 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 15191 ; 15192 ; HASWELL-LABEL: test_xorpd: 15193 ; HASWELL: # %bb.0: 15194 ; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 15195 ; HASWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 15196 ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 15197 ; HASWELL-NEXT: retq # sched: [7:1.00] 15198 ; 15199 ; BROADWELL-SSE-LABEL: test_xorpd: 15200 ; BROADWELL-SSE: # %bb.0: 15201 ; BROADWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] 15202 ; BROADWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] 15203 ; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 15204 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 15205 ; 15206 ; BROADWELL-LABEL: test_xorpd: 15207 ; BROADWELL: # %bb.0: 15208 ; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 15209 ; BROADWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 15210 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 15211 ; BROADWELL-NEXT: retq # sched: [7:1.00] 15212 ; 15213 ; SKYLAKE-SSE-LABEL: test_xorpd: 15214 ; SKYLAKE-SSE: # %bb.0: 15215 ; SKYLAKE-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] 15216 ; SKYLAKE-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] 15217 ; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 15218 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 15219 ; 15220 ; SKYLAKE-LABEL: test_xorpd: 15221 ; SKYLAKE: # %bb.0: 15222 ; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 15223 ; SKYLAKE-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 15224 ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 15225 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 15226 ; 15227 ; SKX-SSE-LABEL: test_xorpd: 15228 ; SKX-SSE: # %bb.0: 15229 ; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] 15230 ; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] 15231 ; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] 15232 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 15233 ; 15234 ; SKX-LABEL: test_xorpd: 15235 ; SKX: # %bb.0: 15236 ; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 15237 ; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 15238 ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 15239 ; SKX-NEXT: retq # sched: [7:1.00] 15240 ; 15241 ; BTVER2-SSE-LABEL: test_xorpd: 15242 ; BTVER2-SSE: # %bb.0: 15243 ; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] 15244 ; BTVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] 15245 ; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 15246 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 15247 ; 15248 ; BTVER2-LABEL: test_xorpd: 15249 ; BTVER2: # %bb.0: 15250 ; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 15251 ; BTVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 15252 ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 15253 ; BTVER2-NEXT: retq # sched: [4:1.00] 15254 ; 15255 ; ZNVER1-SSE-LABEL: test_xorpd: 15256 ; ZNVER1-SSE: # %bb.0: 15257 ; ZNVER1-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.25] 15258 ; ZNVER1-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [8:0.50] 15259 ; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 15260 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 15261 ; 15262 ; ZNVER1-LABEL: test_xorpd: 15263 ; ZNVER1: # %bb.0: 15264 ; ZNVER1-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 15265 ; ZNVER1-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 15266 ; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 15267 ; ZNVER1-NEXT: retq # sched: [1:0.50] 15268 %1 = bitcast <2 x double> %a0 to <4 x i32> 15269 %2 = bitcast <2 x double> %a1 to <4 x i32> 15270 %3 = xor <4 x i32> %1, %2 15271 %4 = load <2 x double>, <2 x double> *%a2, align 16 15272 %5 = bitcast <2 x double> %4 to <4 x i32> 15273 %6 = xor <4 x i32> %3, %5 15274 %7 = bitcast <4 x i32> %6 to <2 x double> 15275 %8 = fadd <2 x double> %a1, %7 15276 ret <2 x double> %8 15277 } 15278 15279 !0 = !{i32 1} 15280