1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE 10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 13 14 define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { 15 ; GENERIC-LABEL: test_cvtpd2pi: 16 ; GENERIC: # %bb.0: 17 ; GENERIC-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] 18 ; GENERIC-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] 19 ; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] 20 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 21 ; GENERIC-NEXT: retq # sched: [1:1.00] 22 ; 23 ; ATOM-LABEL: test_cvtpd2pi: 24 ; ATOM: # %bb.0: 25 ; ATOM-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [8:4.00] 26 ; ATOM-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [7:3.50] 27 ; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] 28 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 29 ; ATOM-NEXT: retq # sched: [79:39.50] 30 ; 31 ; SLM-LABEL: test_cvtpd2pi: 32 ; SLM: # %bb.0: 33 ; SLM-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [7:1.00] 34 ; SLM-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:0.50] 35 ; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 36 ; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] 37 ; SLM-NEXT: retq # sched: [4:1.00] 38 ; 39 ; SANDY-LABEL: test_cvtpd2pi: 40 ; SANDY: # %bb.0: 41 ; SANDY-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] 42 ; SANDY-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] 43 ; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] 44 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 45 ; SANDY-NEXT: retq # sched: [1:1.00] 46 ; 47 ; HASWELL-LABEL: test_cvtpd2pi: 48 ; HASWELL: # %bb.0: 49 ; HASWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] 50 ; HASWELL-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] 51 ; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] 52 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 53 ; HASWELL-NEXT: retq # sched: [7:1.00] 54 ; 55 ; BROADWELL-LABEL: test_cvtpd2pi: 56 ; BROADWELL: # %bb.0: 57 ; BROADWELL-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00] 58 ; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [9:1.00] 59 ; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 60 ; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 61 ; BROADWELL-NEXT: retq # sched: [7:1.00] 62 ; 63 ; SKYLAKE-LABEL: test_cvtpd2pi: 64 ; SKYLAKE: # %bb.0: 65 ; SKYLAKE-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00] 66 ; SKYLAKE-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00] 67 ; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] 68 ; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] 69 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 70 ; 71 ; SKX-LABEL: test_cvtpd2pi: 72 ; SKX: # %bb.0: 73 ; SKX-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00] 74 ; SKX-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00] 75 ; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] 76 ; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] 77 ; SKX-NEXT: retq # sched: [7:1.00] 78 ; 79 ; BTVER2-LABEL: test_cvtpd2pi: 80 ; BTVER2: # %bb.0: 81 ; BTVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [8:1.00] 82 ; BTVER2-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [3:1.00] 83 ; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] 84 ; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] 85 ; BTVER2-NEXT: retq # sched: [4:1.00] 86 ; 87 ; ZNVER1-LABEL: test_cvtpd2pi: 88 ; ZNVER1: # %bb.0: 89 ; ZNVER1-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [12:1.00] 90 ; ZNVER1-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00] 91 ; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] 92 ; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] 93 ; ZNVER1-NEXT: retq # sched: [1:0.50] 94 %1 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0) 95 %2 = load <2 x double>, <2 x double> *%a1, align 16 96 %3 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %2) 97 %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) 98 %5 = bitcast x86_mmx %4 to i64 99 ret i64 %5 100 } 101 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone 102 103 define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize { 104 ; GENERIC-LABEL: test_cvtpi2pd: 105 ; GENERIC: # %bb.0: 106 ; GENERIC-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00] 107 ; GENERIC-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [10:1.00] 108 ; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 109 ; GENERIC-NEXT: retq # sched: [1:1.00] 110 ; 111 ; ATOM-LABEL: test_cvtpi2pd: 112 ; ATOM: # %bb.0: 113 ; ATOM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [8:4.00] 114 ; ATOM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [7:3.50] 115 ; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] 116 ; ATOM-NEXT: retq # sched: [79:39.50] 117 ; 118 ; SLM-LABEL: test_cvtpi2pd: 119 ; SLM: # %bb.0: 120 ; SLM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [7:1.00] 121 ; SLM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:0.50] 122 ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] 123 ; SLM-NEXT: retq # sched: [4:1.00] 124 ; 125 ; SANDY-LABEL: test_cvtpi2pd: 126 ; SANDY: # %bb.0: 127 ; SANDY-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] 128 ; SANDY-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] 129 ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 130 ; SANDY-NEXT: retq # sched: [1:1.00] 131 ; 132 ; HASWELL-LABEL: test_cvtpi2pd: 133 ; HASWELL: # %bb.0: 134 ; HASWELL-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00] 135 ; HASWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:1.00] 136 ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 137 ; HASWELL-NEXT: retq # sched: [7:1.00] 138 ; 139 ; BROADWELL-LABEL: test_cvtpi2pd: 140 ; BROADWELL: # %bb.0: 141 ; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [9:1.00] 142 ; BROADWELL-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00] 143 ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 144 ; BROADWELL-NEXT: retq # sched: [7:1.00] 145 ; 146 ; SKYLAKE-LABEL: test_cvtpi2pd: 147 ; SKYLAKE: # %bb.0: 148 ; SKYLAKE-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00] 149 ; SKYLAKE-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] 150 ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 151 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 152 ; 153 ; SKX-LABEL: test_cvtpi2pd: 154 ; SKX: # %bb.0: 155 ; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:0.50] 156 ; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50] 157 ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 158 ; SKX-NEXT: retq # sched: [7:1.00] 159 ; 160 ; BTVER2-LABEL: test_cvtpi2pd: 161 ; BTVER2: # %bb.0: 162 ; BTVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [8:1.00] 163 ; BTVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00] 164 ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 165 ; BTVER2-NEXT: retq # sched: [4:1.00] 166 ; 167 ; ZNVER1-LABEL: test_cvtpi2pd: 168 ; ZNVER1: # %bb.0: 169 ; ZNVER1-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [12:1.00] 170 ; ZNVER1-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00] 171 ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 172 ; ZNVER1-NEXT: retq # sched: [1:0.50] 173 %1 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0) 174 %2 = load x86_mmx, x86_mmx *%a1, align 8 175 %3 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %2) 176 %4 = fadd <2 x double> %1, %3 177 ret <2 x double> %4 178 } 179 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone 180 181 define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 x float> %a3) optsize { 182 ; GENERIC-LABEL: test_cvtpi2ps: 183 ; GENERIC: # %bb.0: 184 ; GENERIC-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 185 ; GENERIC-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] 186 ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 187 ; GENERIC-NEXT: retq # sched: [1:1.00] 188 ; 189 ; ATOM-LABEL: test_cvtpi2ps: 190 ; ATOM: # %bb.0: 191 ; ATOM-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [5:5.00] 192 ; ATOM-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [5:5.00] 193 ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] 194 ; ATOM-NEXT: retq # sched: [79:39.50] 195 ; 196 ; SLM-LABEL: test_cvtpi2ps: 197 ; SLM: # %bb.0: 198 ; SLM-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [7:1.00] 199 ; SLM-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [4:0.50] 200 ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] 201 ; SLM-NEXT: retq # sched: [4:1.00] 202 ; 203 ; SANDY-LABEL: test_cvtpi2ps: 204 ; SANDY: # %bb.0: 205 ; SANDY-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 206 ; SANDY-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] 207 ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 208 ; SANDY-NEXT: retq # sched: [1:1.00] 209 ; 210 ; HASWELL-LABEL: test_cvtpi2ps: 211 ; HASWELL: # %bb.0: 212 ; HASWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 213 ; HASWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] 214 ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 215 ; HASWELL-NEXT: retq # sched: [7:1.00] 216 ; 217 ; BROADWELL-LABEL: test_cvtpi2ps: 218 ; BROADWELL: # %bb.0: 219 ; BROADWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 220 ; BROADWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] 221 ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 222 ; BROADWELL-NEXT: retq # sched: [7:1.00] 223 ; 224 ; SKYLAKE-LABEL: test_cvtpi2ps: 225 ; SKYLAKE: # %bb.0: 226 ; SKYLAKE-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] 227 ; SKYLAKE-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] 228 ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 229 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 230 ; 231 ; SKX-LABEL: test_cvtpi2ps: 232 ; SKX: # %bb.0: 233 ; SKX-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] 234 ; SKX-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] 235 ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 236 ; SKX-NEXT: retq # sched: [7:1.00] 237 ; 238 ; BTVER2-LABEL: test_cvtpi2ps: 239 ; BTVER2: # %bb.0: 240 ; BTVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00] 241 ; BTVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00] 242 ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 243 ; BTVER2-NEXT: retq # sched: [4:1.00] 244 ; 245 ; ZNVER1-LABEL: test_cvtpi2ps: 246 ; ZNVER1: # %bb.0: 247 ; ZNVER1-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [12:1.00] 248 ; ZNVER1-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [5:1.00] 249 ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] 250 ; ZNVER1-NEXT: retq # sched: [1:0.50] 251 %1 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a2, x86_mmx %a0) 252 %2 = load x86_mmx, x86_mmx *%a1, align 8 253 %3 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a3, x86_mmx %2) 254 %4 = fadd <4 x float> %1, %3 255 ret <4 x float> %4 256 } 257 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone 258 259 define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { 260 ; GENERIC-LABEL: test_cvtps2pi: 261 ; GENERIC: # %bb.0: 262 ; GENERIC-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] 263 ; GENERIC-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] 264 ; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] 265 ; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00] 266 ; GENERIC-NEXT: retq # sched: [1:1.00] 267 ; 268 ; ATOM-LABEL: test_cvtps2pi: 269 ; ATOM: # %bb.0: 270 ; ATOM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:5.00] 271 ; ATOM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [5:5.00] 272 ; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 273 ; ATOM-NEXT: movq %mm1, %rax # sched: [3:3.00] 274 ; ATOM-NEXT: retq # sched: [79:39.50] 275 ; 276 ; SLM-LABEL: test_cvtps2pi: 277 ; SLM: # %bb.0: 278 ; SLM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [7:1.00] 279 ; SLM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:0.50] 280 ; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 281 ; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] 282 ; SLM-NEXT: retq # sched: [4:1.00] 283 ; 284 ; SANDY-LABEL: test_cvtps2pi: 285 ; SANDY: # %bb.0: 286 ; SANDY-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] 287 ; SANDY-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] 288 ; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] 289 ; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00] 290 ; SANDY-NEXT: retq # sched: [1:1.00] 291 ; 292 ; HASWELL-LABEL: test_cvtps2pi: 293 ; HASWELL: # %bb.0: 294 ; HASWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] 295 ; HASWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] 296 ; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 297 ; HASWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 298 ; HASWELL-NEXT: retq # sched: [7:1.00] 299 ; 300 ; BROADWELL-LABEL: test_cvtps2pi: 301 ; BROADWELL: # %bb.0: 302 ; BROADWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] 303 ; BROADWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] 304 ; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 305 ; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 306 ; BROADWELL-NEXT: retq # sched: [7:1.00] 307 ; 308 ; SKYLAKE-LABEL: test_cvtps2pi: 309 ; SKYLAKE: # %bb.0: 310 ; SKYLAKE-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00] 311 ; SKYLAKE-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50] 312 ; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] 313 ; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] 314 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 315 ; 316 ; SKX-LABEL: test_cvtps2pi: 317 ; SKX: # %bb.0: 318 ; SKX-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00] 319 ; SKX-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50] 320 ; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] 321 ; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] 322 ; SKX-NEXT: retq # sched: [7:1.00] 323 ; 324 ; BTVER2-LABEL: test_cvtps2pi: 325 ; BTVER2: # %bb.0: 326 ; BTVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] 327 ; BTVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] 328 ; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] 329 ; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] 330 ; BTVER2-NEXT: retq # sched: [4:1.00] 331 ; 332 ; ZNVER1-LABEL: test_cvtps2pi: 333 ; ZNVER1: # %bb.0: 334 ; ZNVER1-NEXT: cvtps2pi (%rdi), %mm1 # sched: [12:1.00] 335 ; ZNVER1-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00] 336 ; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] 337 ; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] 338 ; ZNVER1-NEXT: retq # sched: [1:0.50] 339 %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0) 340 %2 = load <4 x float>, <4 x float> *%a1, align 16 341 %3 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %2) 342 %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) 343 %5 = bitcast x86_mmx %4 to i64 344 ret i64 %5 345 } 346 declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone 347 348 define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { 349 ; GENERIC-LABEL: test_cvttpd2pi: 350 ; GENERIC: # %bb.0: 351 ; GENERIC-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] 352 ; GENERIC-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] 353 ; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] 354 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 355 ; GENERIC-NEXT: retq # sched: [1:1.00] 356 ; 357 ; ATOM-LABEL: test_cvttpd2pi: 358 ; ATOM: # %bb.0: 359 ; ATOM-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [8:4.00] 360 ; ATOM-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [7:3.50] 361 ; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] 362 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 363 ; ATOM-NEXT: retq # sched: [79:39.50] 364 ; 365 ; SLM-LABEL: test_cvttpd2pi: 366 ; SLM: # %bb.0: 367 ; SLM-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [7:1.00] 368 ; SLM-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:0.50] 369 ; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 370 ; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] 371 ; SLM-NEXT: retq # sched: [4:1.00] 372 ; 373 ; SANDY-LABEL: test_cvttpd2pi: 374 ; SANDY: # %bb.0: 375 ; SANDY-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] 376 ; SANDY-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] 377 ; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] 378 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 379 ; SANDY-NEXT: retq # sched: [1:1.00] 380 ; 381 ; HASWELL-LABEL: test_cvttpd2pi: 382 ; HASWELL: # %bb.0: 383 ; HASWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] 384 ; HASWELL-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] 385 ; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] 386 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 387 ; HASWELL-NEXT: retq # sched: [7:1.00] 388 ; 389 ; BROADWELL-LABEL: test_cvttpd2pi: 390 ; BROADWELL: # %bb.0: 391 ; BROADWELL-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00] 392 ; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [9:1.00] 393 ; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 394 ; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 395 ; BROADWELL-NEXT: retq # sched: [7:1.00] 396 ; 397 ; SKYLAKE-LABEL: test_cvttpd2pi: 398 ; SKYLAKE: # %bb.0: 399 ; SKYLAKE-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00] 400 ; SKYLAKE-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00] 401 ; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] 402 ; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] 403 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 404 ; 405 ; SKX-LABEL: test_cvttpd2pi: 406 ; SKX: # %bb.0: 407 ; SKX-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00] 408 ; SKX-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00] 409 ; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] 410 ; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] 411 ; SKX-NEXT: retq # sched: [7:1.00] 412 ; 413 ; BTVER2-LABEL: test_cvttpd2pi: 414 ; BTVER2: # %bb.0: 415 ; BTVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [8:1.00] 416 ; BTVER2-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [3:1.00] 417 ; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] 418 ; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] 419 ; BTVER2-NEXT: retq # sched: [4:1.00] 420 ; 421 ; ZNVER1-LABEL: test_cvttpd2pi: 422 ; ZNVER1: # %bb.0: 423 ; ZNVER1-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [12:1.00] 424 ; ZNVER1-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00] 425 ; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] 426 ; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] 427 ; ZNVER1-NEXT: retq # sched: [1:0.50] 428 %1 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0) 429 %2 = load <2 x double>, <2 x double> *%a1, align 16 430 %3 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %2) 431 %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) 432 %5 = bitcast x86_mmx %4 to i64 433 ret i64 %5 434 } 435 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone 436 437 define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { 438 ; GENERIC-LABEL: test_cvttps2pi: 439 ; GENERIC: # %bb.0: 440 ; GENERIC-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] 441 ; GENERIC-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] 442 ; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] 443 ; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00] 444 ; GENERIC-NEXT: retq # sched: [1:1.00] 445 ; 446 ; ATOM-LABEL: test_cvttps2pi: 447 ; ATOM: # %bb.0: 448 ; ATOM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:5.00] 449 ; ATOM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [5:5.00] 450 ; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 451 ; ATOM-NEXT: movq %mm1, %rax # sched: [3:3.00] 452 ; ATOM-NEXT: retq # sched: [79:39.50] 453 ; 454 ; SLM-LABEL: test_cvttps2pi: 455 ; SLM: # %bb.0: 456 ; SLM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [7:1.00] 457 ; SLM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:0.50] 458 ; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50] 459 ; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50] 460 ; SLM-NEXT: retq # sched: [4:1.00] 461 ; 462 ; SANDY-LABEL: test_cvttps2pi: 463 ; SANDY: # %bb.0: 464 ; SANDY-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] 465 ; SANDY-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] 466 ; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] 467 ; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00] 468 ; SANDY-NEXT: retq # sched: [1:1.00] 469 ; 470 ; HASWELL-LABEL: test_cvttps2pi: 471 ; HASWELL: # %bb.0: 472 ; HASWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] 473 ; HASWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] 474 ; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 475 ; HASWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 476 ; HASWELL-NEXT: retq # sched: [7:1.00] 477 ; 478 ; BROADWELL-LABEL: test_cvttps2pi: 479 ; BROADWELL: # %bb.0: 480 ; BROADWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] 481 ; BROADWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] 482 ; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33] 483 ; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00] 484 ; BROADWELL-NEXT: retq # sched: [7:1.00] 485 ; 486 ; SKYLAKE-LABEL: test_cvttps2pi: 487 ; SKYLAKE: # %bb.0: 488 ; SKYLAKE-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00] 489 ; SKYLAKE-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50] 490 ; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50] 491 ; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00] 492 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 493 ; 494 ; SKX-LABEL: test_cvttps2pi: 495 ; SKX: # %bb.0: 496 ; SKX-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00] 497 ; SKX-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50] 498 ; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50] 499 ; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00] 500 ; SKX-NEXT: retq # sched: [7:1.00] 501 ; 502 ; BTVER2-LABEL: test_cvttps2pi: 503 ; BTVER2: # %bb.0: 504 ; BTVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] 505 ; BTVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] 506 ; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] 507 ; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] 508 ; BTVER2-NEXT: retq # sched: [4:1.00] 509 ; 510 ; ZNVER1-LABEL: test_cvttps2pi: 511 ; ZNVER1: # %bb.0: 512 ; ZNVER1-NEXT: cvttps2pi (%rdi), %mm1 # sched: [12:1.00] 513 ; ZNVER1-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00] 514 ; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25] 515 ; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00] 516 ; ZNVER1-NEXT: retq # sched: [1:0.50] 517 %1 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0) 518 %2 = load <4 x float>, <4 x float> *%a1, align 16 519 %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2) 520 %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3) 521 %5 = bitcast x86_mmx %4 to i64 522 ret i64 %5 523 } 524 declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone 525 526 define void @test_emms() optsize { 527 ; GENERIC-LABEL: test_emms: 528 ; GENERIC: # %bb.0: 529 ; GENERIC-NEXT: emms # sched: [31:10.33] 530 ; GENERIC-NEXT: retq # sched: [1:1.00] 531 ; 532 ; ATOM-LABEL: test_emms: 533 ; ATOM: # %bb.0: 534 ; ATOM-NEXT: emms # sched: [5:2.50] 535 ; ATOM-NEXT: retq # sched: [79:39.50] 536 ; 537 ; SLM-LABEL: test_emms: 538 ; SLM: # %bb.0: 539 ; SLM-NEXT: emms # sched: [10:5.00] 540 ; SLM-NEXT: retq # sched: [4:1.00] 541 ; 542 ; SANDY-LABEL: test_emms: 543 ; SANDY: # %bb.0: 544 ; SANDY-NEXT: emms # sched: [31:10.33] 545 ; SANDY-NEXT: retq # sched: [1:1.00] 546 ; 547 ; HASWELL-LABEL: test_emms: 548 ; HASWELL: # %bb.0: 549 ; HASWELL-NEXT: emms # sched: [31:10.00] 550 ; HASWELL-NEXT: retq # sched: [7:1.00] 551 ; 552 ; BROADWELL-LABEL: test_emms: 553 ; BROADWELL: # %bb.0: 554 ; BROADWELL-NEXT: emms # sched: [31:10.00] 555 ; BROADWELL-NEXT: retq # sched: [7:1.00] 556 ; 557 ; SKYLAKE-LABEL: test_emms: 558 ; SKYLAKE: # %bb.0: 559 ; SKYLAKE-NEXT: emms # sched: [10:4.50] 560 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 561 ; 562 ; SKX-LABEL: test_emms: 563 ; SKX: # %bb.0: 564 ; SKX-NEXT: emms # sched: [10:4.50] 565 ; SKX-NEXT: retq # sched: [7:1.00] 566 ; 567 ; BTVER2-LABEL: test_emms: 568 ; BTVER2: # %bb.0: 569 ; BTVER2-NEXT: emms # sched: [2:0.50] 570 ; BTVER2-NEXT: retq # sched: [4:1.00] 571 ; 572 ; ZNVER1-LABEL: test_emms: 573 ; ZNVER1: # %bb.0: 574 ; ZNVER1-NEXT: emms # sched: [2:0.25] 575 ; ZNVER1-NEXT: retq # sched: [1:0.50] 576 call void @llvm.x86.mmx.emms() 577 ret void 578 } 579 declare void @llvm.x86.mmx.emms() 580 581 define void @test_maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) optsize { 582 ; GENERIC-LABEL: test_maskmovq: 583 ; GENERIC: # %bb.0: 584 ; GENERIC-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 585 ; GENERIC-NEXT: retq # sched: [1:1.00] 586 ; 587 ; ATOM-LABEL: test_maskmovq: 588 ; ATOM: # %bb.0: 589 ; ATOM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 590 ; ATOM-NEXT: retq # sched: [79:39.50] 591 ; 592 ; SLM-LABEL: test_maskmovq: 593 ; SLM: # %bb.0: 594 ; SLM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 595 ; SLM-NEXT: retq # sched: [4:1.00] 596 ; 597 ; SANDY-LABEL: test_maskmovq: 598 ; SANDY: # %bb.0: 599 ; SANDY-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 600 ; SANDY-NEXT: retq # sched: [1:1.00] 601 ; 602 ; HASWELL-LABEL: test_maskmovq: 603 ; HASWELL: # %bb.0: 604 ; HASWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 605 ; HASWELL-NEXT: retq # sched: [7:1.00] 606 ; 607 ; BROADWELL-LABEL: test_maskmovq: 608 ; BROADWELL: # %bb.0: 609 ; BROADWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 610 ; BROADWELL-NEXT: retq # sched: [7:1.00] 611 ; 612 ; SKYLAKE-LABEL: test_maskmovq: 613 ; SKYLAKE: # %bb.0: 614 ; SKYLAKE-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 615 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 616 ; 617 ; SKX-LABEL: test_maskmovq: 618 ; SKX: # %bb.0: 619 ; SKX-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00] 620 ; SKX-NEXT: retq # sched: [7:1.00] 621 ; 622 ; BTVER2-LABEL: test_maskmovq: 623 ; BTVER2: # %bb.0: 624 ; BTVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:0.50] 625 ; BTVER2-NEXT: retq # sched: [4:1.00] 626 ; 627 ; ZNVER1-LABEL: test_maskmovq: 628 ; ZNVER1: # %bb.0: 629 ; ZNVER1-NEXT: maskmovq %mm1, %mm0 # sched: [100:0.25] 630 ; ZNVER1-NEXT: retq # sched: [1:0.50] 631 call void @llvm.x86.mmx.maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) 632 ret void 633 } 634 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind 635 636 define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { 637 ; GENERIC-LABEL: test_movd: 638 ; GENERIC: # %bb.0: 639 ; GENERIC-NEXT: movd %edi, %mm1 # sched: [1:1.00] 640 ; GENERIC-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 641 ; GENERIC-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] 642 ; GENERIC-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] 643 ; GENERIC-NEXT: movd %mm2, %ecx # sched: [2:1.00] 644 ; GENERIC-NEXT: movd %mm0, %eax # sched: [2:1.00] 645 ; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 646 ; GENERIC-NEXT: retq # sched: [1:1.00] 647 ; 648 ; ATOM-LABEL: test_movd: 649 ; ATOM: # %bb.0: 650 ; ATOM-NEXT: movd %edi, %mm1 # sched: [1:1.00] 651 ; ATOM-NEXT: movd (%rsi), %mm2 # sched: [1:1.00] 652 ; ATOM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 653 ; ATOM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 654 ; ATOM-NEXT: movd %mm2, %ecx # sched: [3:3.00] 655 ; ATOM-NEXT: movd %mm0, %eax # sched: [3:3.00] 656 ; ATOM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 657 ; ATOM-NEXT: retq # sched: [79:39.50] 658 ; 659 ; SLM-LABEL: test_movd: 660 ; SLM: # %bb.0: 661 ; SLM-NEXT: movd (%rsi), %mm2 # sched: [3:1.00] 662 ; SLM-NEXT: movd %edi, %mm1 # sched: [1:0.50] 663 ; SLM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 664 ; SLM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 665 ; SLM-NEXT: movd %mm2, %ecx # sched: [1:0.50] 666 ; SLM-NEXT: movd %mm0, %eax # sched: [1:0.50] 667 ; SLM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 668 ; SLM-NEXT: retq # sched: [4:1.00] 669 ; 670 ; SANDY-LABEL: test_movd: 671 ; SANDY: # %bb.0: 672 ; SANDY-NEXT: movd %edi, %mm1 # sched: [1:1.00] 673 ; SANDY-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 674 ; SANDY-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] 675 ; SANDY-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] 676 ; SANDY-NEXT: movd %mm2, %ecx # sched: [2:1.00] 677 ; SANDY-NEXT: movd %mm0, %eax # sched: [2:1.00] 678 ; SANDY-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 679 ; SANDY-NEXT: retq # sched: [1:1.00] 680 ; 681 ; HASWELL-LABEL: test_movd: 682 ; HASWELL: # %bb.0: 683 ; HASWELL-NEXT: movd %edi, %mm1 # sched: [1:1.00] 684 ; HASWELL-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 685 ; HASWELL-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 686 ; HASWELL-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 687 ; HASWELL-NEXT: movd %mm2, %ecx # sched: [1:1.00] 688 ; HASWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] 689 ; HASWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 690 ; HASWELL-NEXT: retq # sched: [7:1.00] 691 ; 692 ; BROADWELL-LABEL: test_movd: 693 ; BROADWELL: # %bb.0: 694 ; BROADWELL-NEXT: movd %edi, %mm1 # sched: [1:1.00] 695 ; BROADWELL-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 696 ; BROADWELL-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 697 ; BROADWELL-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 698 ; BROADWELL-NEXT: movd %mm2, %ecx # sched: [1:1.00] 699 ; BROADWELL-NEXT: movd %mm0, %eax # sched: [1:1.00] 700 ; BROADWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 701 ; BROADWELL-NEXT: retq # sched: [7:1.00] 702 ; 703 ; SKYLAKE-LABEL: test_movd: 704 ; SKYLAKE: # %bb.0: 705 ; SKYLAKE-NEXT: movd %edi, %mm1 # sched: [1:1.00] 706 ; SKYLAKE-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 707 ; SKYLAKE-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 708 ; SKYLAKE-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 709 ; SKYLAKE-NEXT: movd %mm2, %ecx # sched: [2:1.00] 710 ; SKYLAKE-NEXT: movd %mm0, %eax # sched: [2:1.00] 711 ; SKYLAKE-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 712 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 713 ; 714 ; SKX-LABEL: test_movd: 715 ; SKX: # %bb.0: 716 ; SKX-NEXT: movd %edi, %mm1 # sched: [1:1.00] 717 ; SKX-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] 718 ; SKX-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 719 ; SKX-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 720 ; SKX-NEXT: movd %mm2, %ecx # sched: [2:1.00] 721 ; SKX-NEXT: movd %mm0, %eax # sched: [2:1.00] 722 ; SKX-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 723 ; SKX-NEXT: retq # sched: [7:1.00] 724 ; 725 ; BTVER2-LABEL: test_movd: 726 ; BTVER2: # %bb.0: 727 ; BTVER2-NEXT: movd %edi, %mm1 # sched: [8:0.50] 728 ; BTVER2-NEXT: movd (%rsi), %mm2 # sched: [5:1.00] 729 ; BTVER2-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] 730 ; BTVER2-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] 731 ; BTVER2-NEXT: movd %mm2, %ecx # sched: [4:1.00] 732 ; BTVER2-NEXT: movd %mm0, %eax # sched: [4:1.00] 733 ; BTVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] 734 ; BTVER2-NEXT: retq # sched: [4:1.00] 735 ; 736 ; ZNVER1-LABEL: test_movd: 737 ; ZNVER1: # %bb.0: 738 ; ZNVER1-NEXT: movd (%rsi), %mm2 # sched: [8:0.50] 739 ; ZNVER1-NEXT: movd %edi, %mm1 # sched: [3:1.00] 740 ; ZNVER1-NEXT: paddd %mm1, %mm2 # sched: [1:0.25] 741 ; ZNVER1-NEXT: paddd %mm2, %mm0 # sched: [1:0.25] 742 ; ZNVER1-NEXT: movd %mm2, %ecx # sched: [2:1.00] 743 ; ZNVER1-NEXT: movd %mm0, %eax # sched: [2:1.00] 744 ; ZNVER1-NEXT: movl %ecx, (%rsi) # sched: [1:0.50] 745 ; ZNVER1-NEXT: retq # sched: [1:0.50] 746 %1 = insertelement <2 x i32> undef, i32 %a1, i32 0 747 %2 = bitcast <2 x i32> %1 to x86_mmx 748 %3 = load i32, i32 *%a2 749 %4 = insertelement <2 x i32> undef, i32 %3, i32 0 750 %5 = bitcast <2 x i32> %4 to x86_mmx 751 %6 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %5) 752 %7 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %6) 753 %8 = bitcast x86_mmx %6 to <2 x i32> 754 %9 = bitcast x86_mmx %7 to <2 x i32> 755 %10 = extractelement <2 x i32> %8, i32 0 756 %11 = extractelement <2 x i32> %9, i32 0 757 store i32 %10, i32* %a2 758 ret i32 %11 759 } 760 761 define i64 @test_movdq2q(<2 x i64> %a0) optsize { 762 ; GENERIC-LABEL: test_movdq2q: 763 ; GENERIC: # %bb.0: 764 ; GENERIC-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] 765 ; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] 766 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 767 ; GENERIC-NEXT: retq # sched: [1:1.00] 768 ; 769 ; ATOM-LABEL: test_movdq2q: 770 ; ATOM: # %bb.0: 771 ; ATOM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] 772 ; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 773 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 774 ; ATOM-NEXT: retq # sched: [79:39.50] 775 ; 776 ; SLM-LABEL: test_movdq2q: 777 ; SLM: # %bb.0: 778 ; SLM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] 779 ; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 780 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 781 ; SLM-NEXT: retq # sched: [4:1.00] 782 ; 783 ; SANDY-LABEL: test_movdq2q: 784 ; SANDY: # %bb.0: 785 ; SANDY-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] 786 ; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] 787 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 788 ; SANDY-NEXT: retq # sched: [1:1.00] 789 ; 790 ; HASWELL-LABEL: test_movdq2q: 791 ; HASWELL: # %bb.0: 792 ; HASWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] 793 ; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 794 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 795 ; HASWELL-NEXT: retq # sched: [7:1.00] 796 ; 797 ; BROADWELL-LABEL: test_movdq2q: 798 ; BROADWELL: # %bb.0: 799 ; BROADWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67] 800 ; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 801 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 802 ; BROADWELL-NEXT: retq # sched: [7:1.00] 803 ; 804 ; SKYLAKE-LABEL: test_movdq2q: 805 ; SKYLAKE: # %bb.0: 806 ; SKYLAKE-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] 807 ; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 808 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 809 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 810 ; 811 ; SKX-LABEL: test_movdq2q: 812 ; SKX: # %bb.0: 813 ; SKX-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] 814 ; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 815 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 816 ; SKX-NEXT: retq # sched: [7:1.00] 817 ; 818 ; BTVER2-LABEL: test_movdq2q: 819 ; BTVER2: # %bb.0: 820 ; BTVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] 821 ; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 822 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 823 ; BTVER2-NEXT: retq # sched: [4:1.00] 824 ; 825 ; ZNVER1-LABEL: test_movdq2q: 826 ; ZNVER1: # %bb.0: 827 ; ZNVER1-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.25] 828 ; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25] 829 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 830 ; ZNVER1-NEXT: retq # sched: [1:0.50] 831 %1 = extractelement <2 x i64> %a0, i32 0 832 %2 = bitcast i64 %1 to x86_mmx 833 %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) 834 %4 = bitcast x86_mmx %3 to i64 835 ret i64 %4 836 } 837 838 define void @test_movntq(x86_mmx* %a0, x86_mmx %a1) optsize { 839 ; GENERIC-LABEL: test_movntq: 840 ; GENERIC: # %bb.0: 841 ; GENERIC-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 842 ; GENERIC-NEXT: retq # sched: [1:1.00] 843 ; 844 ; ATOM-LABEL: test_movntq: 845 ; ATOM: # %bb.0: 846 ; ATOM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 847 ; ATOM-NEXT: retq # sched: [79:39.50] 848 ; 849 ; SLM-LABEL: test_movntq: 850 ; SLM: # %bb.0: 851 ; SLM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 852 ; SLM-NEXT: retq # sched: [4:1.00] 853 ; 854 ; SANDY-LABEL: test_movntq: 855 ; SANDY: # %bb.0: 856 ; SANDY-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 857 ; SANDY-NEXT: retq # sched: [1:1.00] 858 ; 859 ; HASWELL-LABEL: test_movntq: 860 ; HASWELL: # %bb.0: 861 ; HASWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 862 ; HASWELL-NEXT: retq # sched: [7:1.00] 863 ; 864 ; BROADWELL-LABEL: test_movntq: 865 ; BROADWELL: # %bb.0: 866 ; BROADWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 867 ; BROADWELL-NEXT: retq # sched: [7:1.00] 868 ; 869 ; SKYLAKE-LABEL: test_movntq: 870 ; SKYLAKE: # %bb.0: 871 ; SKYLAKE-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 872 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 873 ; 874 ; SKX-LABEL: test_movntq: 875 ; SKX: # %bb.0: 876 ; SKX-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00] 877 ; SKX-NEXT: retq # sched: [7:1.00] 878 ; 879 ; BTVER2-LABEL: test_movntq: 880 ; BTVER2: # %bb.0: 881 ; BTVER2-NEXT: movntq %mm0, (%rdi) # sched: [2:1.00] 882 ; BTVER2-NEXT: retq # sched: [4:1.00] 883 ; 884 ; ZNVER1-LABEL: test_movntq: 885 ; ZNVER1: # %bb.0: 886 ; ZNVER1-NEXT: movntq %mm0, (%rdi) # sched: [1:0.50] 887 ; ZNVER1-NEXT: retq # sched: [1:0.50] 888 call void @llvm.x86.mmx.movnt.dq(x86_mmx* %a0, x86_mmx %a1) 889 ret void 890 } 891 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind 892 893 define void @test_movq(i64 *%a0) { 894 ; GENERIC-LABEL: test_movq: 895 ; GENERIC: # %bb.0: 896 ; GENERIC-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 897 ; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] 898 ; GENERIC-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 899 ; GENERIC-NEXT: retq # sched: [1:1.00] 900 ; 901 ; ATOM-LABEL: test_movq: 902 ; ATOM: # %bb.0: 903 ; ATOM-NEXT: movq (%rdi), %mm0 # sched: [1:1.00] 904 ; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 905 ; ATOM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 906 ; ATOM-NEXT: nop # sched: [1:0.50] 907 ; ATOM-NEXT: nop # sched: [1:0.50] 908 ; ATOM-NEXT: retq # sched: [79:39.50] 909 ; 910 ; SLM-LABEL: test_movq: 911 ; SLM: # %bb.0: 912 ; SLM-NEXT: movq (%rdi), %mm0 # sched: [3:1.00] 913 ; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 914 ; SLM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 915 ; SLM-NEXT: retq # sched: [4:1.00] 916 ; 917 ; SANDY-LABEL: test_movq: 918 ; SANDY: # %bb.0: 919 ; SANDY-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 920 ; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] 921 ; SANDY-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 922 ; SANDY-NEXT: retq # sched: [1:1.00] 923 ; 924 ; HASWELL-LABEL: test_movq: 925 ; HASWELL: # %bb.0: 926 ; HASWELL-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 927 ; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 928 ; HASWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 929 ; HASWELL-NEXT: retq # sched: [7:1.00] 930 ; 931 ; BROADWELL-LABEL: test_movq: 932 ; BROADWELL: # %bb.0: 933 ; BROADWELL-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 934 ; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 935 ; BROADWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 936 ; BROADWELL-NEXT: retq # sched: [7:1.00] 937 ; 938 ; SKYLAKE-LABEL: test_movq: 939 ; SKYLAKE: # %bb.0: 940 ; SKYLAKE-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 941 ; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 942 ; SKYLAKE-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 943 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 944 ; 945 ; SKX-LABEL: test_movq: 946 ; SKX: # %bb.0: 947 ; SKX-NEXT: movq (%rdi), %mm0 # sched: [5:0.50] 948 ; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 949 ; SKX-NEXT: movq %mm0, (%rdi) # sched: [1:1.00] 950 ; SKX-NEXT: retq # sched: [7:1.00] 951 ; 952 ; BTVER2-LABEL: test_movq: 953 ; BTVER2: # %bb.0: 954 ; BTVER2-NEXT: movq (%rdi), %mm0 # sched: [5:1.00] 955 ; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] 956 ; BTVER2-NEXT: movq %mm0, (%rdi) # sched: [2:1.00] 957 ; BTVER2-NEXT: retq # sched: [4:1.00] 958 ; 959 ; ZNVER1-LABEL: test_movq: 960 ; ZNVER1: # %bb.0: 961 ; ZNVER1-NEXT: movq (%rdi), %mm0 # sched: [8:0.50] 962 ; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25] 963 ; ZNVER1-NEXT: movq %mm0, (%rdi) # sched: [1:0.50] 964 ; ZNVER1-NEXT: retq # sched: [1:0.50] 965 %1 = load i64, i64* %a0, align 8 966 %2 = bitcast i64 %1 to x86_mmx 967 %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2) 968 %4 = bitcast x86_mmx %3 to i64 969 store i64 %4, i64* %a0, align 8 970 ret void 971 } 972 973 define <2 x i64> @test_movq2dq(x86_mmx %a0) optsize { 974 ; GENERIC-LABEL: test_movq2dq: 975 ; GENERIC: # %bb.0: 976 ; GENERIC-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33] 977 ; GENERIC-NEXT: retq # sched: [1:1.00] 978 ; 979 ; ATOM-LABEL: test_movq2dq: 980 ; ATOM: # %bb.0: 981 ; ATOM-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] 982 ; ATOM-NEXT: retq # sched: [79:39.50] 983 ; 984 ; SLM-LABEL: test_movq2dq: 985 ; SLM: # %bb.0: 986 ; SLM-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] 987 ; SLM-NEXT: retq # sched: [4:1.00] 988 ; 989 ; SANDY-LABEL: test_movq2dq: 990 ; SANDY: # %bb.0: 991 ; SANDY-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33] 992 ; SANDY-NEXT: retq # sched: [1:1.00] 993 ; 994 ; HASWELL-LABEL: test_movq2dq: 995 ; HASWELL: # %bb.0: 996 ; HASWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] 997 ; HASWELL-NEXT: retq # sched: [7:1.00] 998 ; 999 ; BROADWELL-LABEL: test_movq2dq: 1000 ; BROADWELL: # %bb.0: 1001 ; BROADWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00] 1002 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1003 ; 1004 ; SKYLAKE-LABEL: test_movq2dq: 1005 ; SKYLAKE: # %bb.0: 1006 ; SKYLAKE-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00] 1007 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1008 ; 1009 ; SKX-LABEL: test_movq2dq: 1010 ; SKX: # %bb.0: 1011 ; SKX-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00] 1012 ; SKX-NEXT: retq # sched: [7:1.00] 1013 ; 1014 ; BTVER2-LABEL: test_movq2dq: 1015 ; BTVER2: # %bb.0: 1016 ; BTVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50] 1017 ; BTVER2-NEXT: retq # sched: [4:1.00] 1018 ; 1019 ; ZNVER1-LABEL: test_movq2dq: 1020 ; ZNVER1: # %bb.0: 1021 ; ZNVER1-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.25] 1022 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1023 %1 = bitcast x86_mmx %a0 to i64 1024 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 1025 ret <2 x i64> %2 1026 } 1027 1028 define i64 @test_pabsb(x86_mmx *%a0) optsize { 1029 ; GENERIC-LABEL: test_pabsb: 1030 ; GENERIC: # %bb.0: 1031 ; GENERIC-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1032 ; GENERIC-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1033 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1034 ; GENERIC-NEXT: retq # sched: [1:1.00] 1035 ; 1036 ; ATOM-LABEL: test_pabsb: 1037 ; ATOM: # %bb.0: 1038 ; ATOM-NEXT: pabsb (%rdi), %mm0 # sched: [1:1.00] 1039 ; ATOM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1040 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1041 ; ATOM-NEXT: retq # sched: [79:39.50] 1042 ; 1043 ; SLM-LABEL: test_pabsb: 1044 ; SLM: # %bb.0: 1045 ; SLM-NEXT: pabsb (%rdi), %mm0 # sched: [4:1.00] 1046 ; SLM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1047 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1048 ; SLM-NEXT: retq # sched: [4:1.00] 1049 ; 1050 ; SANDY-LABEL: test_pabsb: 1051 ; SANDY: # %bb.0: 1052 ; SANDY-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1053 ; SANDY-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1054 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1055 ; SANDY-NEXT: retq # sched: [1:1.00] 1056 ; 1057 ; HASWELL-LABEL: test_pabsb: 1058 ; HASWELL: # %bb.0: 1059 ; HASWELL-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1060 ; HASWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1061 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1062 ; HASWELL-NEXT: retq # sched: [7:1.00] 1063 ; 1064 ; BROADWELL-LABEL: test_pabsb: 1065 ; BROADWELL: # %bb.0: 1066 ; BROADWELL-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1067 ; BROADWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1068 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1069 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1070 ; 1071 ; SKYLAKE-LABEL: test_pabsb: 1072 ; SKYLAKE: # %bb.0: 1073 ; SKYLAKE-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1074 ; SKYLAKE-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1075 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1076 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1077 ; 1078 ; SKX-LABEL: test_pabsb: 1079 ; SKX: # %bb.0: 1080 ; SKX-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] 1081 ; SKX-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1082 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1083 ; SKX-NEXT: retq # sched: [7:1.00] 1084 ; 1085 ; BTVER2-LABEL: test_pabsb: 1086 ; BTVER2: # %bb.0: 1087 ; BTVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:1.00] 1088 ; BTVER2-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] 1089 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1090 ; BTVER2-NEXT: retq # sched: [4:1.00] 1091 ; 1092 ; ZNVER1-LABEL: test_pabsb: 1093 ; ZNVER1: # %bb.0: 1094 ; ZNVER1-NEXT: pabsb (%rdi), %mm0 # sched: [8:0.50] 1095 ; ZNVER1-NEXT: pabsb %mm0, %mm0 # sched: [1:0.25] 1096 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1097 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1098 %1 = load x86_mmx, x86_mmx *%a0, align 8 1099 %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) 1100 %3 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %2) 1101 %4 = bitcast x86_mmx %3 to i64 1102 ret i64 %4 1103 } 1104 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone 1105 1106 define i64 @test_pabsd(x86_mmx *%a0) optsize { 1107 ; GENERIC-LABEL: test_pabsd: 1108 ; GENERIC: # %bb.0: 1109 ; GENERIC-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1110 ; GENERIC-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1111 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1112 ; GENERIC-NEXT: retq # sched: [1:1.00] 1113 ; 1114 ; ATOM-LABEL: test_pabsd: 1115 ; ATOM: # %bb.0: 1116 ; ATOM-NEXT: pabsd (%rdi), %mm0 # sched: [1:1.00] 1117 ; ATOM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1118 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1119 ; ATOM-NEXT: retq # sched: [79:39.50] 1120 ; 1121 ; SLM-LABEL: test_pabsd: 1122 ; SLM: # %bb.0: 1123 ; SLM-NEXT: pabsd (%rdi), %mm0 # sched: [4:1.00] 1124 ; SLM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1125 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1126 ; SLM-NEXT: retq # sched: [4:1.00] 1127 ; 1128 ; SANDY-LABEL: test_pabsd: 1129 ; SANDY: # %bb.0: 1130 ; SANDY-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1131 ; SANDY-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1132 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1133 ; SANDY-NEXT: retq # sched: [1:1.00] 1134 ; 1135 ; HASWELL-LABEL: test_pabsd: 1136 ; HASWELL: # %bb.0: 1137 ; HASWELL-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1138 ; HASWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1139 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1140 ; HASWELL-NEXT: retq # sched: [7:1.00] 1141 ; 1142 ; BROADWELL-LABEL: test_pabsd: 1143 ; BROADWELL: # %bb.0: 1144 ; BROADWELL-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1145 ; BROADWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1146 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1147 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1148 ; 1149 ; SKYLAKE-LABEL: test_pabsd: 1150 ; SKYLAKE: # %bb.0: 1151 ; SKYLAKE-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1152 ; SKYLAKE-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1153 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1154 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1155 ; 1156 ; SKX-LABEL: test_pabsd: 1157 ; SKX: # %bb.0: 1158 ; SKX-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] 1159 ; SKX-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1160 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1161 ; SKX-NEXT: retq # sched: [7:1.00] 1162 ; 1163 ; BTVER2-LABEL: test_pabsd: 1164 ; BTVER2: # %bb.0: 1165 ; BTVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:1.00] 1166 ; BTVER2-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] 1167 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1168 ; BTVER2-NEXT: retq # sched: [4:1.00] 1169 ; 1170 ; ZNVER1-LABEL: test_pabsd: 1171 ; ZNVER1: # %bb.0: 1172 ; ZNVER1-NEXT: pabsd (%rdi), %mm0 # sched: [8:0.50] 1173 ; ZNVER1-NEXT: pabsd %mm0, %mm0 # sched: [1:0.25] 1174 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1175 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1176 %1 = load x86_mmx, x86_mmx *%a0, align 8 1177 %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) 1178 %3 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %2) 1179 %4 = bitcast x86_mmx %3 to i64 1180 ret i64 %4 1181 } 1182 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone 1183 1184 define i64 @test_pabsw(x86_mmx *%a0) optsize { 1185 ; GENERIC-LABEL: test_pabsw: 1186 ; GENERIC: # %bb.0: 1187 ; GENERIC-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1188 ; GENERIC-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1189 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1190 ; GENERIC-NEXT: retq # sched: [1:1.00] 1191 ; 1192 ; ATOM-LABEL: test_pabsw: 1193 ; ATOM: # %bb.0: 1194 ; ATOM-NEXT: pabsw (%rdi), %mm0 # sched: [1:1.00] 1195 ; ATOM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1196 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1197 ; ATOM-NEXT: retq # sched: [79:39.50] 1198 ; 1199 ; SLM-LABEL: test_pabsw: 1200 ; SLM: # %bb.0: 1201 ; SLM-NEXT: pabsw (%rdi), %mm0 # sched: [4:1.00] 1202 ; SLM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1203 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1204 ; SLM-NEXT: retq # sched: [4:1.00] 1205 ; 1206 ; SANDY-LABEL: test_pabsw: 1207 ; SANDY: # %bb.0: 1208 ; SANDY-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1209 ; SANDY-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1210 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1211 ; SANDY-NEXT: retq # sched: [1:1.00] 1212 ; 1213 ; HASWELL-LABEL: test_pabsw: 1214 ; HASWELL: # %bb.0: 1215 ; HASWELL-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1216 ; HASWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1217 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1218 ; HASWELL-NEXT: retq # sched: [7:1.00] 1219 ; 1220 ; BROADWELL-LABEL: test_pabsw: 1221 ; BROADWELL: # %bb.0: 1222 ; BROADWELL-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1223 ; BROADWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1224 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1225 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1226 ; 1227 ; SKYLAKE-LABEL: test_pabsw: 1228 ; SKYLAKE: # %bb.0: 1229 ; SKYLAKE-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1230 ; SKYLAKE-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1231 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1232 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1233 ; 1234 ; SKX-LABEL: test_pabsw: 1235 ; SKX: # %bb.0: 1236 ; SKX-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] 1237 ; SKX-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1238 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1239 ; SKX-NEXT: retq # sched: [7:1.00] 1240 ; 1241 ; BTVER2-LABEL: test_pabsw: 1242 ; BTVER2: # %bb.0: 1243 ; BTVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:1.00] 1244 ; BTVER2-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] 1245 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1246 ; BTVER2-NEXT: retq # sched: [4:1.00] 1247 ; 1248 ; ZNVER1-LABEL: test_pabsw: 1249 ; ZNVER1: # %bb.0: 1250 ; ZNVER1-NEXT: pabsw (%rdi), %mm0 # sched: [8:0.50] 1251 ; ZNVER1-NEXT: pabsw %mm0, %mm0 # sched: [1:0.25] 1252 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1253 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1254 %1 = load x86_mmx, x86_mmx *%a0, align 8 1255 %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) 1256 %3 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %2) 1257 %4 = bitcast x86_mmx %3 to i64 1258 ret i64 %4 1259 } 1260 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone 1261 1262 define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1263 ; GENERIC-LABEL: test_packssdw: 1264 ; GENERIC: # %bb.0: 1265 ; GENERIC-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] 1266 ; GENERIC-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] 1267 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1268 ; GENERIC-NEXT: retq # sched: [1:1.00] 1269 ; 1270 ; ATOM-LABEL: test_packssdw: 1271 ; ATOM: # %bb.0: 1272 ; ATOM-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] 1273 ; ATOM-NEXT: packssdw (%rdi), %mm0 # sched: [1:1.00] 1274 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1275 ; ATOM-NEXT: retq # sched: [79:39.50] 1276 ; 1277 ; SLM-LABEL: test_packssdw: 1278 ; SLM: # %bb.0: 1279 ; SLM-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] 1280 ; SLM-NEXT: packssdw (%rdi), %mm0 # sched: [4:1.00] 1281 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1282 ; SLM-NEXT: retq # sched: [4:1.00] 1283 ; 1284 ; SANDY-LABEL: test_packssdw: 1285 ; SANDY: # %bb.0: 1286 ; SANDY-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] 1287 ; SANDY-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] 1288 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1289 ; SANDY-NEXT: retq # sched: [1:1.00] 1290 ; 1291 ; HASWELL-LABEL: test_packssdw: 1292 ; HASWELL: # %bb.0: 1293 ; HASWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] 1294 ; HASWELL-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] 1295 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1296 ; HASWELL-NEXT: retq # sched: [7:1.00] 1297 ; 1298 ; BROADWELL-LABEL: test_packssdw: 1299 ; BROADWELL: # %bb.0: 1300 ; BROADWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] 1301 ; BROADWELL-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] 1302 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1303 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1304 ; 1305 ; SKYLAKE-LABEL: test_packssdw: 1306 ; SKYLAKE: # %bb.0: 1307 ; SKYLAKE-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] 1308 ; SKYLAKE-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] 1309 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1310 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1311 ; 1312 ; SKX-LABEL: test_packssdw: 1313 ; SKX: # %bb.0: 1314 ; SKX-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00] 1315 ; SKX-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00] 1316 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1317 ; SKX-NEXT: retq # sched: [7:1.00] 1318 ; 1319 ; BTVER2-LABEL: test_packssdw: 1320 ; BTVER2: # %bb.0: 1321 ; BTVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] 1322 ; BTVER2-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] 1323 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1324 ; BTVER2-NEXT: retq # sched: [4:1.00] 1325 ; 1326 ; ZNVER1-LABEL: test_packssdw: 1327 ; ZNVER1: # %bb.0: 1328 ; ZNVER1-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] 1329 ; ZNVER1-NEXT: packssdw (%rdi), %mm0 # sched: [1:0.50] 1330 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1331 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1332 %1 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a0, x86_mmx %a1) 1333 %2 = load x86_mmx, x86_mmx *%a2, align 8 1334 %3 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %1, x86_mmx %2) 1335 %4 = bitcast x86_mmx %3 to i64 1336 ret i64 %4 1337 } 1338 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone 1339 1340 define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1341 ; GENERIC-LABEL: test_packsswb: 1342 ; GENERIC: # %bb.0: 1343 ; GENERIC-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] 1344 ; GENERIC-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] 1345 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1346 ; GENERIC-NEXT: retq # sched: [1:1.00] 1347 ; 1348 ; ATOM-LABEL: test_packsswb: 1349 ; ATOM: # %bb.0: 1350 ; ATOM-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] 1351 ; ATOM-NEXT: packsswb (%rdi), %mm0 # sched: [1:1.00] 1352 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1353 ; ATOM-NEXT: retq # sched: [79:39.50] 1354 ; 1355 ; SLM-LABEL: test_packsswb: 1356 ; SLM: # %bb.0: 1357 ; SLM-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] 1358 ; SLM-NEXT: packsswb (%rdi), %mm0 # sched: [4:1.00] 1359 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1360 ; SLM-NEXT: retq # sched: [4:1.00] 1361 ; 1362 ; SANDY-LABEL: test_packsswb: 1363 ; SANDY: # %bb.0: 1364 ; SANDY-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] 1365 ; SANDY-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] 1366 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1367 ; SANDY-NEXT: retq # sched: [1:1.00] 1368 ; 1369 ; HASWELL-LABEL: test_packsswb: 1370 ; HASWELL: # %bb.0: 1371 ; HASWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] 1372 ; HASWELL-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] 1373 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1374 ; HASWELL-NEXT: retq # sched: [7:1.00] 1375 ; 1376 ; BROADWELL-LABEL: test_packsswb: 1377 ; BROADWELL: # %bb.0: 1378 ; BROADWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] 1379 ; BROADWELL-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] 1380 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1381 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1382 ; 1383 ; SKYLAKE-LABEL: test_packsswb: 1384 ; SKYLAKE: # %bb.0: 1385 ; SKYLAKE-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] 1386 ; SKYLAKE-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] 1387 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1388 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1389 ; 1390 ; SKX-LABEL: test_packsswb: 1391 ; SKX: # %bb.0: 1392 ; SKX-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00] 1393 ; SKX-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00] 1394 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1395 ; SKX-NEXT: retq # sched: [7:1.00] 1396 ; 1397 ; BTVER2-LABEL: test_packsswb: 1398 ; BTVER2: # %bb.0: 1399 ; BTVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] 1400 ; BTVER2-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] 1401 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1402 ; BTVER2-NEXT: retq # sched: [4:1.00] 1403 ; 1404 ; ZNVER1-LABEL: test_packsswb: 1405 ; ZNVER1: # %bb.0: 1406 ; ZNVER1-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] 1407 ; ZNVER1-NEXT: packsswb (%rdi), %mm0 # sched: [1:0.50] 1408 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1409 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1410 %1 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a0, x86_mmx %a1) 1411 %2 = load x86_mmx, x86_mmx *%a2, align 8 1412 %3 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %1, x86_mmx %2) 1413 %4 = bitcast x86_mmx %3 to i64 1414 ret i64 %4 1415 } 1416 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone 1417 1418 define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1419 ; GENERIC-LABEL: test_packuswb: 1420 ; GENERIC: # %bb.0: 1421 ; GENERIC-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] 1422 ; GENERIC-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] 1423 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1424 ; GENERIC-NEXT: retq # sched: [1:1.00] 1425 ; 1426 ; ATOM-LABEL: test_packuswb: 1427 ; ATOM: # %bb.0: 1428 ; ATOM-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] 1429 ; ATOM-NEXT: packuswb (%rdi), %mm0 # sched: [1:1.00] 1430 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1431 ; ATOM-NEXT: retq # sched: [79:39.50] 1432 ; 1433 ; SLM-LABEL: test_packuswb: 1434 ; SLM: # %bb.0: 1435 ; SLM-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] 1436 ; SLM-NEXT: packuswb (%rdi), %mm0 # sched: [4:1.00] 1437 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1438 ; SLM-NEXT: retq # sched: [4:1.00] 1439 ; 1440 ; SANDY-LABEL: test_packuswb: 1441 ; SANDY: # %bb.0: 1442 ; SANDY-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] 1443 ; SANDY-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] 1444 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1445 ; SANDY-NEXT: retq # sched: [1:1.00] 1446 ; 1447 ; HASWELL-LABEL: test_packuswb: 1448 ; HASWELL: # %bb.0: 1449 ; HASWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] 1450 ; HASWELL-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] 1451 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1452 ; HASWELL-NEXT: retq # sched: [7:1.00] 1453 ; 1454 ; BROADWELL-LABEL: test_packuswb: 1455 ; BROADWELL: # %bb.0: 1456 ; BROADWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] 1457 ; BROADWELL-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] 1458 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1459 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1460 ; 1461 ; SKYLAKE-LABEL: test_packuswb: 1462 ; SKYLAKE: # %bb.0: 1463 ; SKYLAKE-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] 1464 ; SKYLAKE-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] 1465 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1466 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1467 ; 1468 ; SKX-LABEL: test_packuswb: 1469 ; SKX: # %bb.0: 1470 ; SKX-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00] 1471 ; SKX-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00] 1472 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1473 ; SKX-NEXT: retq # sched: [7:1.00] 1474 ; 1475 ; BTVER2-LABEL: test_packuswb: 1476 ; BTVER2: # %bb.0: 1477 ; BTVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] 1478 ; BTVER2-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] 1479 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1480 ; BTVER2-NEXT: retq # sched: [4:1.00] 1481 ; 1482 ; ZNVER1-LABEL: test_packuswb: 1483 ; ZNVER1: # %bb.0: 1484 ; ZNVER1-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] 1485 ; ZNVER1-NEXT: packuswb (%rdi), %mm0 # sched: [1:0.50] 1486 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1487 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1488 %1 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a0, x86_mmx %a1) 1489 %2 = load x86_mmx, x86_mmx *%a2, align 8 1490 %3 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %1, x86_mmx %2) 1491 %4 = bitcast x86_mmx %3 to i64 1492 ret i64 %4 1493 } 1494 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone 1495 1496 define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1497 ; GENERIC-LABEL: test_paddb: 1498 ; GENERIC: # %bb.0: 1499 ; GENERIC-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] 1500 ; GENERIC-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] 1501 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1502 ; GENERIC-NEXT: retq # sched: [1:1.00] 1503 ; 1504 ; ATOM-LABEL: test_paddb: 1505 ; ATOM: # %bb.0: 1506 ; ATOM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1507 ; ATOM-NEXT: paddb (%rdi), %mm0 # sched: [1:1.00] 1508 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1509 ; ATOM-NEXT: retq # sched: [79:39.50] 1510 ; 1511 ; SLM-LABEL: test_paddb: 1512 ; SLM: # %bb.0: 1513 ; SLM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1514 ; SLM-NEXT: paddb (%rdi), %mm0 # sched: [4:1.00] 1515 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1516 ; SLM-NEXT: retq # sched: [4:1.00] 1517 ; 1518 ; SANDY-LABEL: test_paddb: 1519 ; SANDY: # %bb.0: 1520 ; SANDY-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] 1521 ; SANDY-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] 1522 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1523 ; SANDY-NEXT: retq # sched: [1:1.00] 1524 ; 1525 ; HASWELL-LABEL: test_paddb: 1526 ; HASWELL: # %bb.0: 1527 ; HASWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1528 ; HASWELL-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] 1529 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1530 ; HASWELL-NEXT: retq # sched: [7:1.00] 1531 ; 1532 ; BROADWELL-LABEL: test_paddb: 1533 ; BROADWELL: # %bb.0: 1534 ; BROADWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1535 ; BROADWELL-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] 1536 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1537 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1538 ; 1539 ; SKYLAKE-LABEL: test_paddb: 1540 ; SKYLAKE: # %bb.0: 1541 ; SKYLAKE-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1542 ; SKYLAKE-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] 1543 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1544 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1545 ; 1546 ; SKX-LABEL: test_paddb: 1547 ; SKX: # %bb.0: 1548 ; SKX-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1549 ; SKX-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50] 1550 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1551 ; SKX-NEXT: retq # sched: [7:1.00] 1552 ; 1553 ; BTVER2-LABEL: test_paddb: 1554 ; BTVER2: # %bb.0: 1555 ; BTVER2-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] 1556 ; BTVER2-NEXT: paddb (%rdi), %mm0 # sched: [6:1.00] 1557 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1558 ; BTVER2-NEXT: retq # sched: [4:1.00] 1559 ; 1560 ; ZNVER1-LABEL: test_paddb: 1561 ; ZNVER1: # %bb.0: 1562 ; ZNVER1-NEXT: paddb %mm1, %mm0 # sched: [1:0.25] 1563 ; ZNVER1-NEXT: paddb (%rdi), %mm0 # sched: [8:0.50] 1564 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1565 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1566 %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a1) 1567 %2 = load x86_mmx, x86_mmx *%a2, align 8 1568 %3 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %1, x86_mmx %2) 1569 %4 = bitcast x86_mmx %3 to i64 1570 ret i64 %4 1571 } 1572 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone 1573 1574 define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1575 ; GENERIC-LABEL: test_paddd: 1576 ; GENERIC: # %bb.0: 1577 ; GENERIC-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] 1578 ; GENERIC-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] 1579 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1580 ; GENERIC-NEXT: retq # sched: [1:1.00] 1581 ; 1582 ; ATOM-LABEL: test_paddd: 1583 ; ATOM: # %bb.0: 1584 ; ATOM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1585 ; ATOM-NEXT: paddd (%rdi), %mm0 # sched: [1:1.00] 1586 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1587 ; ATOM-NEXT: retq # sched: [79:39.50] 1588 ; 1589 ; SLM-LABEL: test_paddd: 1590 ; SLM: # %bb.0: 1591 ; SLM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1592 ; SLM-NEXT: paddd (%rdi), %mm0 # sched: [4:1.00] 1593 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1594 ; SLM-NEXT: retq # sched: [4:1.00] 1595 ; 1596 ; SANDY-LABEL: test_paddd: 1597 ; SANDY: # %bb.0: 1598 ; SANDY-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] 1599 ; SANDY-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] 1600 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1601 ; SANDY-NEXT: retq # sched: [1:1.00] 1602 ; 1603 ; HASWELL-LABEL: test_paddd: 1604 ; HASWELL: # %bb.0: 1605 ; HASWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1606 ; HASWELL-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] 1607 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1608 ; HASWELL-NEXT: retq # sched: [7:1.00] 1609 ; 1610 ; BROADWELL-LABEL: test_paddd: 1611 ; BROADWELL: # %bb.0: 1612 ; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1613 ; BROADWELL-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] 1614 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1615 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1616 ; 1617 ; SKYLAKE-LABEL: test_paddd: 1618 ; SKYLAKE: # %bb.0: 1619 ; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1620 ; SKYLAKE-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] 1621 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1622 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1623 ; 1624 ; SKX-LABEL: test_paddd: 1625 ; SKX: # %bb.0: 1626 ; SKX-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1627 ; SKX-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50] 1628 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1629 ; SKX-NEXT: retq # sched: [7:1.00] 1630 ; 1631 ; BTVER2-LABEL: test_paddd: 1632 ; BTVER2: # %bb.0: 1633 ; BTVER2-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] 1634 ; BTVER2-NEXT: paddd (%rdi), %mm0 # sched: [6:1.00] 1635 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1636 ; BTVER2-NEXT: retq # sched: [4:1.00] 1637 ; 1638 ; ZNVER1-LABEL: test_paddd: 1639 ; ZNVER1: # %bb.0: 1640 ; ZNVER1-NEXT: paddd %mm1, %mm0 # sched: [1:0.25] 1641 ; ZNVER1-NEXT: paddd (%rdi), %mm0 # sched: [8:0.50] 1642 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1643 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1644 %1 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %a1) 1645 %2 = load x86_mmx, x86_mmx *%a2, align 8 1646 %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %1, x86_mmx %2) 1647 %4 = bitcast x86_mmx %3 to i64 1648 ret i64 %4 1649 } 1650 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone 1651 1652 define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1653 ; GENERIC-LABEL: test_paddq: 1654 ; GENERIC: # %bb.0: 1655 ; GENERIC-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1656 ; GENERIC-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] 1657 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1658 ; GENERIC-NEXT: retq # sched: [1:1.00] 1659 ; 1660 ; ATOM-LABEL: test_paddq: 1661 ; ATOM: # %bb.0: 1662 ; ATOM-NEXT: paddq %mm1, %mm0 # sched: [2:1.00] 1663 ; ATOM-NEXT: paddq (%rdi), %mm0 # sched: [3:1.50] 1664 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1665 ; ATOM-NEXT: retq # sched: [79:39.50] 1666 ; 1667 ; SLM-LABEL: test_paddq: 1668 ; SLM: # %bb.0: 1669 ; SLM-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1670 ; SLM-NEXT: paddq (%rdi), %mm0 # sched: [4:1.00] 1671 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1672 ; SLM-NEXT: retq # sched: [4:1.00] 1673 ; 1674 ; SANDY-LABEL: test_paddq: 1675 ; SANDY: # %bb.0: 1676 ; SANDY-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1677 ; SANDY-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] 1678 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1679 ; SANDY-NEXT: retq # sched: [1:1.00] 1680 ; 1681 ; HASWELL-LABEL: test_paddq: 1682 ; HASWELL: # %bb.0: 1683 ; HASWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1684 ; HASWELL-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] 1685 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1686 ; HASWELL-NEXT: retq # sched: [7:1.00] 1687 ; 1688 ; BROADWELL-LABEL: test_paddq: 1689 ; BROADWELL: # %bb.0: 1690 ; BROADWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1691 ; BROADWELL-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] 1692 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1693 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1694 ; 1695 ; SKYLAKE-LABEL: test_paddq: 1696 ; SKYLAKE: # %bb.0: 1697 ; SKYLAKE-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1698 ; SKYLAKE-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] 1699 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1700 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1701 ; 1702 ; SKX-LABEL: test_paddq: 1703 ; SKX: # %bb.0: 1704 ; SKX-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1705 ; SKX-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50] 1706 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1707 ; SKX-NEXT: retq # sched: [7:1.00] 1708 ; 1709 ; BTVER2-LABEL: test_paddq: 1710 ; BTVER2: # %bb.0: 1711 ; BTVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] 1712 ; BTVER2-NEXT: paddq (%rdi), %mm0 # sched: [6:1.00] 1713 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1714 ; BTVER2-NEXT: retq # sched: [4:1.00] 1715 ; 1716 ; ZNVER1-LABEL: test_paddq: 1717 ; ZNVER1: # %bb.0: 1718 ; ZNVER1-NEXT: paddq %mm1, %mm0 # sched: [1:0.25] 1719 ; ZNVER1-NEXT: paddq (%rdi), %mm0 # sched: [8:0.50] 1720 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1721 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1722 %1 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a0, x86_mmx %a1) 1723 %2 = load x86_mmx, x86_mmx *%a2, align 8 1724 %3 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %1, x86_mmx %2) 1725 %4 = bitcast x86_mmx %3 to i64 1726 ret i64 %4 1727 } 1728 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone 1729 1730 define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1731 ; GENERIC-LABEL: test_paddsb: 1732 ; GENERIC: # %bb.0: 1733 ; GENERIC-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] 1734 ; GENERIC-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] 1735 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1736 ; GENERIC-NEXT: retq # sched: [1:1.00] 1737 ; 1738 ; ATOM-LABEL: test_paddsb: 1739 ; ATOM: # %bb.0: 1740 ; ATOM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1741 ; ATOM-NEXT: paddsb (%rdi), %mm0 # sched: [1:1.00] 1742 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1743 ; ATOM-NEXT: retq # sched: [79:39.50] 1744 ; 1745 ; SLM-LABEL: test_paddsb: 1746 ; SLM: # %bb.0: 1747 ; SLM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1748 ; SLM-NEXT: paddsb (%rdi), %mm0 # sched: [4:1.00] 1749 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1750 ; SLM-NEXT: retq # sched: [4:1.00] 1751 ; 1752 ; SANDY-LABEL: test_paddsb: 1753 ; SANDY: # %bb.0: 1754 ; SANDY-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] 1755 ; SANDY-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] 1756 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1757 ; SANDY-NEXT: retq # sched: [1:1.00] 1758 ; 1759 ; HASWELL-LABEL: test_paddsb: 1760 ; HASWELL: # %bb.0: 1761 ; HASWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1762 ; HASWELL-NEXT: paddsb (%rdi), %mm0 # sched: [6:0.50] 1763 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1764 ; HASWELL-NEXT: retq # sched: [7:1.00] 1765 ; 1766 ; BROADWELL-LABEL: test_paddsb: 1767 ; BROADWELL: # %bb.0: 1768 ; BROADWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1769 ; BROADWELL-NEXT: paddsb (%rdi), %mm0 # sched: [6:0.50] 1770 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1771 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1772 ; 1773 ; SKYLAKE-LABEL: test_paddsb: 1774 ; SKYLAKE: # %bb.0: 1775 ; SKYLAKE-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00] 1776 ; SKYLAKE-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] 1777 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1778 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1779 ; 1780 ; SKX-LABEL: test_paddsb: 1781 ; SKX: # %bb.0: 1782 ; SKX-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00] 1783 ; SKX-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] 1784 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1785 ; SKX-NEXT: retq # sched: [7:1.00] 1786 ; 1787 ; BTVER2-LABEL: test_paddsb: 1788 ; BTVER2: # %bb.0: 1789 ; BTVER2-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] 1790 ; BTVER2-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] 1791 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1792 ; BTVER2-NEXT: retq # sched: [4:1.00] 1793 ; 1794 ; ZNVER1-LABEL: test_paddsb: 1795 ; ZNVER1: # %bb.0: 1796 ; ZNVER1-NEXT: paddsb %mm1, %mm0 # sched: [1:0.25] 1797 ; ZNVER1-NEXT: paddsb (%rdi), %mm0 # sched: [8:0.50] 1798 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1799 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1800 %1 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a0, x86_mmx %a1) 1801 %2 = load x86_mmx, x86_mmx *%a2, align 8 1802 %3 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %1, x86_mmx %2) 1803 %4 = bitcast x86_mmx %3 to i64 1804 ret i64 %4 1805 } 1806 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone 1807 1808 define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1809 ; GENERIC-LABEL: test_paddsw: 1810 ; GENERIC: # %bb.0: 1811 ; GENERIC-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] 1812 ; GENERIC-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] 1813 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1814 ; GENERIC-NEXT: retq # sched: [1:1.00] 1815 ; 1816 ; ATOM-LABEL: test_paddsw: 1817 ; ATOM: # %bb.0: 1818 ; ATOM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1819 ; ATOM-NEXT: paddsw (%rdi), %mm0 # sched: [1:1.00] 1820 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1821 ; ATOM-NEXT: retq # sched: [79:39.50] 1822 ; 1823 ; SLM-LABEL: test_paddsw: 1824 ; SLM: # %bb.0: 1825 ; SLM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1826 ; SLM-NEXT: paddsw (%rdi), %mm0 # sched: [4:1.00] 1827 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1828 ; SLM-NEXT: retq # sched: [4:1.00] 1829 ; 1830 ; SANDY-LABEL: test_paddsw: 1831 ; SANDY: # %bb.0: 1832 ; SANDY-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] 1833 ; SANDY-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] 1834 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1835 ; SANDY-NEXT: retq # sched: [1:1.00] 1836 ; 1837 ; HASWELL-LABEL: test_paddsw: 1838 ; HASWELL: # %bb.0: 1839 ; HASWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1840 ; HASWELL-NEXT: paddsw (%rdi), %mm0 # sched: [6:0.50] 1841 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1842 ; HASWELL-NEXT: retq # sched: [7:1.00] 1843 ; 1844 ; BROADWELL-LABEL: test_paddsw: 1845 ; BROADWELL: # %bb.0: 1846 ; BROADWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1847 ; BROADWELL-NEXT: paddsw (%rdi), %mm0 # sched: [6:0.50] 1848 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1849 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1850 ; 1851 ; SKYLAKE-LABEL: test_paddsw: 1852 ; SKYLAKE: # %bb.0: 1853 ; SKYLAKE-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00] 1854 ; SKYLAKE-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] 1855 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1856 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1857 ; 1858 ; SKX-LABEL: test_paddsw: 1859 ; SKX: # %bb.0: 1860 ; SKX-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00] 1861 ; SKX-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] 1862 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1863 ; SKX-NEXT: retq # sched: [7:1.00] 1864 ; 1865 ; BTVER2-LABEL: test_paddsw: 1866 ; BTVER2: # %bb.0: 1867 ; BTVER2-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] 1868 ; BTVER2-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] 1869 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1870 ; BTVER2-NEXT: retq # sched: [4:1.00] 1871 ; 1872 ; ZNVER1-LABEL: test_paddsw: 1873 ; ZNVER1: # %bb.0: 1874 ; ZNVER1-NEXT: paddsw %mm1, %mm0 # sched: [1:0.25] 1875 ; ZNVER1-NEXT: paddsw (%rdi), %mm0 # sched: [8:0.50] 1876 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1877 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1878 %1 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a0, x86_mmx %a1) 1879 %2 = load x86_mmx, x86_mmx *%a2, align 8 1880 %3 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %1, x86_mmx %2) 1881 %4 = bitcast x86_mmx %3 to i64 1882 ret i64 %4 1883 } 1884 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone 1885 1886 define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1887 ; GENERIC-LABEL: test_paddusb: 1888 ; GENERIC: # %bb.0: 1889 ; GENERIC-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] 1890 ; GENERIC-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] 1891 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1892 ; GENERIC-NEXT: retq # sched: [1:1.00] 1893 ; 1894 ; ATOM-LABEL: test_paddusb: 1895 ; ATOM: # %bb.0: 1896 ; ATOM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1897 ; ATOM-NEXT: paddusb (%rdi), %mm0 # sched: [1:1.00] 1898 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1899 ; ATOM-NEXT: retq # sched: [79:39.50] 1900 ; 1901 ; SLM-LABEL: test_paddusb: 1902 ; SLM: # %bb.0: 1903 ; SLM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1904 ; SLM-NEXT: paddusb (%rdi), %mm0 # sched: [4:1.00] 1905 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1906 ; SLM-NEXT: retq # sched: [4:1.00] 1907 ; 1908 ; SANDY-LABEL: test_paddusb: 1909 ; SANDY: # %bb.0: 1910 ; SANDY-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] 1911 ; SANDY-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] 1912 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1913 ; SANDY-NEXT: retq # sched: [1:1.00] 1914 ; 1915 ; HASWELL-LABEL: test_paddusb: 1916 ; HASWELL: # %bb.0: 1917 ; HASWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1918 ; HASWELL-NEXT: paddusb (%rdi), %mm0 # sched: [6:0.50] 1919 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1920 ; HASWELL-NEXT: retq # sched: [7:1.00] 1921 ; 1922 ; BROADWELL-LABEL: test_paddusb: 1923 ; BROADWELL: # %bb.0: 1924 ; BROADWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1925 ; BROADWELL-NEXT: paddusb (%rdi), %mm0 # sched: [6:0.50] 1926 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1927 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1928 ; 1929 ; SKYLAKE-LABEL: test_paddusb: 1930 ; SKYLAKE: # %bb.0: 1931 ; SKYLAKE-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00] 1932 ; SKYLAKE-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] 1933 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 1934 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1935 ; 1936 ; SKX-LABEL: test_paddusb: 1937 ; SKX: # %bb.0: 1938 ; SKX-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00] 1939 ; SKX-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] 1940 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 1941 ; SKX-NEXT: retq # sched: [7:1.00] 1942 ; 1943 ; BTVER2-LABEL: test_paddusb: 1944 ; BTVER2: # %bb.0: 1945 ; BTVER2-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] 1946 ; BTVER2-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] 1947 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 1948 ; BTVER2-NEXT: retq # sched: [4:1.00] 1949 ; 1950 ; ZNVER1-LABEL: test_paddusb: 1951 ; ZNVER1: # %bb.0: 1952 ; ZNVER1-NEXT: paddusb %mm1, %mm0 # sched: [1:0.25] 1953 ; ZNVER1-NEXT: paddusb (%rdi), %mm0 # sched: [8:0.50] 1954 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 1955 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1956 %1 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a0, x86_mmx %a1) 1957 %2 = load x86_mmx, x86_mmx *%a2, align 8 1958 %3 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %1, x86_mmx %2) 1959 %4 = bitcast x86_mmx %3 to i64 1960 ret i64 %4 1961 } 1962 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone 1963 1964 define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 1965 ; GENERIC-LABEL: test_paddusw: 1966 ; GENERIC: # %bb.0: 1967 ; GENERIC-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] 1968 ; GENERIC-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] 1969 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 1970 ; GENERIC-NEXT: retq # sched: [1:1.00] 1971 ; 1972 ; ATOM-LABEL: test_paddusw: 1973 ; ATOM: # %bb.0: 1974 ; ATOM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 1975 ; ATOM-NEXT: paddusw (%rdi), %mm0 # sched: [1:1.00] 1976 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 1977 ; ATOM-NEXT: retq # sched: [79:39.50] 1978 ; 1979 ; SLM-LABEL: test_paddusw: 1980 ; SLM: # %bb.0: 1981 ; SLM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 1982 ; SLM-NEXT: paddusw (%rdi), %mm0 # sched: [4:1.00] 1983 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 1984 ; SLM-NEXT: retq # sched: [4:1.00] 1985 ; 1986 ; SANDY-LABEL: test_paddusw: 1987 ; SANDY: # %bb.0: 1988 ; SANDY-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] 1989 ; SANDY-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] 1990 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 1991 ; SANDY-NEXT: retq # sched: [1:1.00] 1992 ; 1993 ; HASWELL-LABEL: test_paddusw: 1994 ; HASWELL: # %bb.0: 1995 ; HASWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 1996 ; HASWELL-NEXT: paddusw (%rdi), %mm0 # sched: [6:0.50] 1997 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 1998 ; HASWELL-NEXT: retq # sched: [7:1.00] 1999 ; 2000 ; BROADWELL-LABEL: test_paddusw: 2001 ; BROADWELL: # %bb.0: 2002 ; BROADWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 2003 ; BROADWELL-NEXT: paddusw (%rdi), %mm0 # sched: [6:0.50] 2004 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2005 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2006 ; 2007 ; SKYLAKE-LABEL: test_paddusw: 2008 ; SKYLAKE: # %bb.0: 2009 ; SKYLAKE-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00] 2010 ; SKYLAKE-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] 2011 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2012 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2013 ; 2014 ; SKX-LABEL: test_paddusw: 2015 ; SKX: # %bb.0: 2016 ; SKX-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00] 2017 ; SKX-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] 2018 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2019 ; SKX-NEXT: retq # sched: [7:1.00] 2020 ; 2021 ; BTVER2-LABEL: test_paddusw: 2022 ; BTVER2: # %bb.0: 2023 ; BTVER2-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] 2024 ; BTVER2-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] 2025 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2026 ; BTVER2-NEXT: retq # sched: [4:1.00] 2027 ; 2028 ; ZNVER1-LABEL: test_paddusw: 2029 ; ZNVER1: # %bb.0: 2030 ; ZNVER1-NEXT: paddusw %mm1, %mm0 # sched: [1:0.25] 2031 ; ZNVER1-NEXT: paddusw (%rdi), %mm0 # sched: [8:0.50] 2032 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2033 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2034 %1 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a0, x86_mmx %a1) 2035 %2 = load x86_mmx, x86_mmx *%a2, align 8 2036 %3 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %1, x86_mmx %2) 2037 %4 = bitcast x86_mmx %3 to i64 2038 ret i64 %4 2039 } 2040 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone 2041 2042 define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2043 ; GENERIC-LABEL: test_paddw: 2044 ; GENERIC: # %bb.0: 2045 ; GENERIC-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] 2046 ; GENERIC-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] 2047 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2048 ; GENERIC-NEXT: retq # sched: [1:1.00] 2049 ; 2050 ; ATOM-LABEL: test_paddw: 2051 ; ATOM: # %bb.0: 2052 ; ATOM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2053 ; ATOM-NEXT: paddw (%rdi), %mm0 # sched: [1:1.00] 2054 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2055 ; ATOM-NEXT: retq # sched: [79:39.50] 2056 ; 2057 ; SLM-LABEL: test_paddw: 2058 ; SLM: # %bb.0: 2059 ; SLM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2060 ; SLM-NEXT: paddw (%rdi), %mm0 # sched: [4:1.00] 2061 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2062 ; SLM-NEXT: retq # sched: [4:1.00] 2063 ; 2064 ; SANDY-LABEL: test_paddw: 2065 ; SANDY: # %bb.0: 2066 ; SANDY-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] 2067 ; SANDY-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] 2068 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2069 ; SANDY-NEXT: retq # sched: [1:1.00] 2070 ; 2071 ; HASWELL-LABEL: test_paddw: 2072 ; HASWELL: # %bb.0: 2073 ; HASWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2074 ; HASWELL-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] 2075 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2076 ; HASWELL-NEXT: retq # sched: [7:1.00] 2077 ; 2078 ; BROADWELL-LABEL: test_paddw: 2079 ; BROADWELL: # %bb.0: 2080 ; BROADWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2081 ; BROADWELL-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] 2082 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2083 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2084 ; 2085 ; SKYLAKE-LABEL: test_paddw: 2086 ; SKYLAKE: # %bb.0: 2087 ; SKYLAKE-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2088 ; SKYLAKE-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] 2089 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2090 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2091 ; 2092 ; SKX-LABEL: test_paddw: 2093 ; SKX: # %bb.0: 2094 ; SKX-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2095 ; SKX-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50] 2096 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2097 ; SKX-NEXT: retq # sched: [7:1.00] 2098 ; 2099 ; BTVER2-LABEL: test_paddw: 2100 ; BTVER2: # %bb.0: 2101 ; BTVER2-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] 2102 ; BTVER2-NEXT: paddw (%rdi), %mm0 # sched: [6:1.00] 2103 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2104 ; BTVER2-NEXT: retq # sched: [4:1.00] 2105 ; 2106 ; ZNVER1-LABEL: test_paddw: 2107 ; ZNVER1: # %bb.0: 2108 ; ZNVER1-NEXT: paddw %mm1, %mm0 # sched: [1:0.25] 2109 ; ZNVER1-NEXT: paddw (%rdi), %mm0 # sched: [8:0.50] 2110 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2111 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2112 %1 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a0, x86_mmx %a1) 2113 %2 = load x86_mmx, x86_mmx *%a2, align 8 2114 %3 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %1, x86_mmx %2) 2115 %4 = bitcast x86_mmx %3 to i64 2116 ret i64 %4 2117 } 2118 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone 2119 2120 define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2121 ; GENERIC-LABEL: test_palignr: 2122 ; GENERIC: # %bb.0: 2123 ; GENERIC-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] 2124 ; GENERIC-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] 2125 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2126 ; GENERIC-NEXT: retq # sched: [1:1.00] 2127 ; 2128 ; ATOM-LABEL: test_palignr: 2129 ; ATOM: # %bb.0: 2130 ; ATOM-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2131 ; ATOM-NEXT: palignr $1, (%rdi), %mm0 # sched: [1:1.00] 2132 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2133 ; ATOM-NEXT: retq # sched: [79:39.50] 2134 ; 2135 ; SLM-LABEL: test_palignr: 2136 ; SLM: # %bb.0: 2137 ; SLM-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2138 ; SLM-NEXT: palignr $1, (%rdi), %mm0 # sched: [4:1.00] 2139 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2140 ; SLM-NEXT: retq # sched: [4:1.00] 2141 ; 2142 ; SANDY-LABEL: test_palignr: 2143 ; SANDY: # %bb.0: 2144 ; SANDY-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] 2145 ; SANDY-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] 2146 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2147 ; SANDY-NEXT: retq # sched: [1:1.00] 2148 ; 2149 ; HASWELL-LABEL: test_palignr: 2150 ; HASWELL: # %bb.0: 2151 ; HASWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2152 ; HASWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2153 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2154 ; HASWELL-NEXT: retq # sched: [7:1.00] 2155 ; 2156 ; BROADWELL-LABEL: test_palignr: 2157 ; BROADWELL: # %bb.0: 2158 ; BROADWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2159 ; BROADWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2160 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2161 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2162 ; 2163 ; SKYLAKE-LABEL: test_palignr: 2164 ; SKYLAKE: # %bb.0: 2165 ; SKYLAKE-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2166 ; SKYLAKE-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2167 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2168 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2169 ; 2170 ; SKX-LABEL: test_palignr: 2171 ; SKX: # %bb.0: 2172 ; SKX-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00] 2173 ; SKX-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2174 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2175 ; SKX-NEXT: retq # sched: [7:1.00] 2176 ; 2177 ; BTVER2-LABEL: test_palignr: 2178 ; BTVER2: # %bb.0: 2179 ; BTVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] 2180 ; BTVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] 2181 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2182 ; BTVER2-NEXT: retq # sched: [4:1.00] 2183 ; 2184 ; ZNVER1-LABEL: test_palignr: 2185 ; ZNVER1: # %bb.0: 2186 ; ZNVER1-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.25] 2187 ; ZNVER1-NEXT: palignr $1, (%rdi), %mm0 # sched: [8:0.50] 2188 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2189 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2190 %1 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a0, x86_mmx %a1, i8 1) 2191 %2 = load x86_mmx, x86_mmx *%a2, align 8 2192 %3 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %1, x86_mmx %2, i8 1) 2193 %4 = bitcast x86_mmx %3 to i64 2194 ret i64 %4 2195 } 2196 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone 2197 2198 define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2199 ; GENERIC-LABEL: test_pand: 2200 ; GENERIC: # %bb.0: 2201 ; GENERIC-NEXT: pand %mm1, %mm0 # sched: [1:0.33] 2202 ; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2203 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2204 ; GENERIC-NEXT: retq # sched: [1:1.00] 2205 ; 2206 ; ATOM-LABEL: test_pand: 2207 ; ATOM: # %bb.0: 2208 ; ATOM-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2209 ; ATOM-NEXT: pand (%rdi), %mm0 # sched: [1:1.00] 2210 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2211 ; ATOM-NEXT: retq # sched: [79:39.50] 2212 ; 2213 ; SLM-LABEL: test_pand: 2214 ; SLM: # %bb.0: 2215 ; SLM-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2216 ; SLM-NEXT: pand (%rdi), %mm0 # sched: [4:1.00] 2217 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2218 ; SLM-NEXT: retq # sched: [4:1.00] 2219 ; 2220 ; SANDY-LABEL: test_pand: 2221 ; SANDY: # %bb.0: 2222 ; SANDY-NEXT: pand %mm1, %mm0 # sched: [1:0.33] 2223 ; SANDY-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2224 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2225 ; SANDY-NEXT: retq # sched: [1:1.00] 2226 ; 2227 ; HASWELL-LABEL: test_pand: 2228 ; HASWELL: # %bb.0: 2229 ; HASWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] 2230 ; HASWELL-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2231 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2232 ; HASWELL-NEXT: retq # sched: [7:1.00] 2233 ; 2234 ; BROADWELL-LABEL: test_pand: 2235 ; BROADWELL: # %bb.0: 2236 ; BROADWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33] 2237 ; BROADWELL-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2238 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2239 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2240 ; 2241 ; SKYLAKE-LABEL: test_pand: 2242 ; SKYLAKE: # %bb.0: 2243 ; SKYLAKE-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2244 ; SKYLAKE-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2245 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2246 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2247 ; 2248 ; SKX-LABEL: test_pand: 2249 ; SKX: # %bb.0: 2250 ; SKX-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2251 ; SKX-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] 2252 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2253 ; SKX-NEXT: retq # sched: [7:1.00] 2254 ; 2255 ; BTVER2-LABEL: test_pand: 2256 ; BTVER2: # %bb.0: 2257 ; BTVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.50] 2258 ; BTVER2-NEXT: pand (%rdi), %mm0 # sched: [6:1.00] 2259 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2260 ; BTVER2-NEXT: retq # sched: [4:1.00] 2261 ; 2262 ; ZNVER1-LABEL: test_pand: 2263 ; ZNVER1: # %bb.0: 2264 ; ZNVER1-NEXT: pand %mm1, %mm0 # sched: [1:0.25] 2265 ; ZNVER1-NEXT: pand (%rdi), %mm0 # sched: [8:0.50] 2266 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2267 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2268 %1 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a0, x86_mmx %a1) 2269 %2 = load x86_mmx, x86_mmx *%a2, align 8 2270 %3 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %1, x86_mmx %2) 2271 %4 = bitcast x86_mmx %3 to i64 2272 ret i64 %4 2273 } 2274 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone 2275 2276 define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2277 ; GENERIC-LABEL: test_pandn: 2278 ; GENERIC: # %bb.0: 2279 ; GENERIC-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] 2280 ; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2281 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2282 ; GENERIC-NEXT: retq # sched: [1:1.00] 2283 ; 2284 ; ATOM-LABEL: test_pandn: 2285 ; ATOM: # %bb.0: 2286 ; ATOM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2287 ; ATOM-NEXT: pandn (%rdi), %mm0 # sched: [1:1.00] 2288 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2289 ; ATOM-NEXT: retq # sched: [79:39.50] 2290 ; 2291 ; SLM-LABEL: test_pandn: 2292 ; SLM: # %bb.0: 2293 ; SLM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2294 ; SLM-NEXT: pandn (%rdi), %mm0 # sched: [4:1.00] 2295 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2296 ; SLM-NEXT: retq # sched: [4:1.00] 2297 ; 2298 ; SANDY-LABEL: test_pandn: 2299 ; SANDY: # %bb.0: 2300 ; SANDY-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] 2301 ; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2302 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2303 ; SANDY-NEXT: retq # sched: [1:1.00] 2304 ; 2305 ; HASWELL-LABEL: test_pandn: 2306 ; HASWELL: # %bb.0: 2307 ; HASWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] 2308 ; HASWELL-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2309 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2310 ; HASWELL-NEXT: retq # sched: [7:1.00] 2311 ; 2312 ; BROADWELL-LABEL: test_pandn: 2313 ; BROADWELL: # %bb.0: 2314 ; BROADWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] 2315 ; BROADWELL-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2316 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2317 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2318 ; 2319 ; SKYLAKE-LABEL: test_pandn: 2320 ; SKYLAKE: # %bb.0: 2321 ; SKYLAKE-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2322 ; SKYLAKE-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2323 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2324 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2325 ; 2326 ; SKX-LABEL: test_pandn: 2327 ; SKX: # %bb.0: 2328 ; SKX-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2329 ; SKX-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] 2330 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2331 ; SKX-NEXT: retq # sched: [7:1.00] 2332 ; 2333 ; BTVER2-LABEL: test_pandn: 2334 ; BTVER2: # %bb.0: 2335 ; BTVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] 2336 ; BTVER2-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00] 2337 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2338 ; BTVER2-NEXT: retq # sched: [4:1.00] 2339 ; 2340 ; ZNVER1-LABEL: test_pandn: 2341 ; ZNVER1: # %bb.0: 2342 ; ZNVER1-NEXT: pandn %mm1, %mm0 # sched: [1:0.25] 2343 ; ZNVER1-NEXT: pandn (%rdi), %mm0 # sched: [8:0.50] 2344 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2345 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2346 %1 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a0, x86_mmx %a1) 2347 %2 = load x86_mmx, x86_mmx *%a2, align 8 2348 %3 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %1, x86_mmx %2) 2349 %4 = bitcast x86_mmx %3 to i64 2350 ret i64 %4 2351 } 2352 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone 2353 2354 define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2355 ; GENERIC-LABEL: test_pavgb: 2356 ; GENERIC: # %bb.0: 2357 ; GENERIC-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00] 2358 ; GENERIC-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00] 2359 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2360 ; GENERIC-NEXT: retq # sched: [1:1.00] 2361 ; 2362 ; ATOM-LABEL: test_pavgb: 2363 ; ATOM: # %bb.0: 2364 ; ATOM-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2365 ; ATOM-NEXT: pavgb (%rdi), %mm0 # sched: [1:1.00] 2366 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2367 ; ATOM-NEXT: retq # sched: [79:39.50] 2368 ; 2369 ; SLM-LABEL: test_pavgb: 2370 ; SLM: # %bb.0: 2371 ; SLM-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2372 ; SLM-NEXT: pavgb (%rdi), %mm0 # sched: [4:1.00] 2373 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2374 ; SLM-NEXT: retq # sched: [4:1.00] 2375 ; 2376 ; SANDY-LABEL: test_pavgb: 2377 ; SANDY: # %bb.0: 2378 ; SANDY-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00] 2379 ; SANDY-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00] 2380 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2381 ; SANDY-NEXT: retq # sched: [1:1.00] 2382 ; 2383 ; HASWELL-LABEL: test_pavgb: 2384 ; HASWELL: # %bb.0: 2385 ; HASWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2386 ; HASWELL-NEXT: pavgb (%rdi), %mm0 # sched: [6:0.50] 2387 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2388 ; HASWELL-NEXT: retq # sched: [7:1.00] 2389 ; 2390 ; BROADWELL-LABEL: test_pavgb: 2391 ; BROADWELL: # %bb.0: 2392 ; BROADWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2393 ; BROADWELL-NEXT: pavgb (%rdi), %mm0 # sched: [6:0.50] 2394 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2395 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2396 ; 2397 ; SKYLAKE-LABEL: test_pavgb: 2398 ; SKYLAKE: # %bb.0: 2399 ; SKYLAKE-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] 2400 ; SKYLAKE-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] 2401 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2402 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2403 ; 2404 ; SKX-LABEL: test_pavgb: 2405 ; SKX: # %bb.0: 2406 ; SKX-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00] 2407 ; SKX-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] 2408 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2409 ; SKX-NEXT: retq # sched: [7:1.00] 2410 ; 2411 ; BTVER2-LABEL: test_pavgb: 2412 ; BTVER2: # %bb.0: 2413 ; BTVER2-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] 2414 ; BTVER2-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] 2415 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2416 ; BTVER2-NEXT: retq # sched: [4:1.00] 2417 ; 2418 ; ZNVER1-LABEL: test_pavgb: 2419 ; ZNVER1: # %bb.0: 2420 ; ZNVER1-NEXT: pavgb %mm1, %mm0 # sched: [1:0.25] 2421 ; ZNVER1-NEXT: pavgb (%rdi), %mm0 # sched: [8:0.50] 2422 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2423 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2424 %1 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a0, x86_mmx %a1) 2425 %2 = load x86_mmx, x86_mmx *%a2, align 8 2426 %3 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %1, x86_mmx %2) 2427 %4 = bitcast x86_mmx %3 to i64 2428 ret i64 %4 2429 } 2430 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone 2431 2432 define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2433 ; GENERIC-LABEL: test_pavgw: 2434 ; GENERIC: # %bb.0: 2435 ; GENERIC-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00] 2436 ; GENERIC-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00] 2437 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2438 ; GENERIC-NEXT: retq # sched: [1:1.00] 2439 ; 2440 ; ATOM-LABEL: test_pavgw: 2441 ; ATOM: # %bb.0: 2442 ; ATOM-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2443 ; ATOM-NEXT: pavgw (%rdi), %mm0 # sched: [1:1.00] 2444 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2445 ; ATOM-NEXT: retq # sched: [79:39.50] 2446 ; 2447 ; SLM-LABEL: test_pavgw: 2448 ; SLM: # %bb.0: 2449 ; SLM-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2450 ; SLM-NEXT: pavgw (%rdi), %mm0 # sched: [4:1.00] 2451 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2452 ; SLM-NEXT: retq # sched: [4:1.00] 2453 ; 2454 ; SANDY-LABEL: test_pavgw: 2455 ; SANDY: # %bb.0: 2456 ; SANDY-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00] 2457 ; SANDY-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00] 2458 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2459 ; SANDY-NEXT: retq # sched: [1:1.00] 2460 ; 2461 ; HASWELL-LABEL: test_pavgw: 2462 ; HASWELL: # %bb.0: 2463 ; HASWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2464 ; HASWELL-NEXT: pavgw (%rdi), %mm0 # sched: [6:0.50] 2465 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2466 ; HASWELL-NEXT: retq # sched: [7:1.00] 2467 ; 2468 ; BROADWELL-LABEL: test_pavgw: 2469 ; BROADWELL: # %bb.0: 2470 ; BROADWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2471 ; BROADWELL-NEXT: pavgw (%rdi), %mm0 # sched: [6:0.50] 2472 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2473 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2474 ; 2475 ; SKYLAKE-LABEL: test_pavgw: 2476 ; SKYLAKE: # %bb.0: 2477 ; SKYLAKE-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] 2478 ; SKYLAKE-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] 2479 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2480 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2481 ; 2482 ; SKX-LABEL: test_pavgw: 2483 ; SKX: # %bb.0: 2484 ; SKX-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00] 2485 ; SKX-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] 2486 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2487 ; SKX-NEXT: retq # sched: [7:1.00] 2488 ; 2489 ; BTVER2-LABEL: test_pavgw: 2490 ; BTVER2: # %bb.0: 2491 ; BTVER2-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] 2492 ; BTVER2-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] 2493 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2494 ; BTVER2-NEXT: retq # sched: [4:1.00] 2495 ; 2496 ; ZNVER1-LABEL: test_pavgw: 2497 ; ZNVER1: # %bb.0: 2498 ; ZNVER1-NEXT: pavgw %mm1, %mm0 # sched: [1:0.25] 2499 ; ZNVER1-NEXT: pavgw (%rdi), %mm0 # sched: [8:0.50] 2500 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2501 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2502 %1 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a0, x86_mmx %a1) 2503 %2 = load x86_mmx, x86_mmx *%a2, align 8 2504 %3 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %1, x86_mmx %2) 2505 %4 = bitcast x86_mmx %3 to i64 2506 ret i64 %4 2507 } 2508 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone 2509 2510 define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2511 ; GENERIC-LABEL: test_pcmpeqb: 2512 ; GENERIC: # %bb.0: 2513 ; GENERIC-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] 2514 ; GENERIC-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] 2515 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2516 ; GENERIC-NEXT: retq # sched: [1:1.00] 2517 ; 2518 ; ATOM-LABEL: test_pcmpeqb: 2519 ; ATOM: # %bb.0: 2520 ; ATOM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2521 ; ATOM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:1.00] 2522 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2523 ; ATOM-NEXT: retq # sched: [79:39.50] 2524 ; 2525 ; SLM-LABEL: test_pcmpeqb: 2526 ; SLM: # %bb.0: 2527 ; SLM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2528 ; SLM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [4:1.00] 2529 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2530 ; SLM-NEXT: retq # sched: [4:1.00] 2531 ; 2532 ; SANDY-LABEL: test_pcmpeqb: 2533 ; SANDY: # %bb.0: 2534 ; SANDY-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] 2535 ; SANDY-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] 2536 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2537 ; SANDY-NEXT: retq # sched: [1:1.00] 2538 ; 2539 ; HASWELL-LABEL: test_pcmpeqb: 2540 ; HASWELL: # %bb.0: 2541 ; HASWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2542 ; HASWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:0.50] 2543 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2544 ; HASWELL-NEXT: retq # sched: [7:1.00] 2545 ; 2546 ; BROADWELL-LABEL: test_pcmpeqb: 2547 ; BROADWELL: # %bb.0: 2548 ; BROADWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2549 ; BROADWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:0.50] 2550 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2551 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2552 ; 2553 ; SKYLAKE-LABEL: test_pcmpeqb: 2554 ; SKYLAKE: # %bb.0: 2555 ; SKYLAKE-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00] 2556 ; SKYLAKE-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] 2557 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2558 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2559 ; 2560 ; SKX-LABEL: test_pcmpeqb: 2561 ; SKX: # %bb.0: 2562 ; SKX-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00] 2563 ; SKX-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] 2564 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2565 ; SKX-NEXT: retq # sched: [7:1.00] 2566 ; 2567 ; BTVER2-LABEL: test_pcmpeqb: 2568 ; BTVER2: # %bb.0: 2569 ; BTVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] 2570 ; BTVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] 2571 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2572 ; BTVER2-NEXT: retq # sched: [4:1.00] 2573 ; 2574 ; ZNVER1-LABEL: test_pcmpeqb: 2575 ; ZNVER1: # %bb.0: 2576 ; ZNVER1-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.25] 2577 ; ZNVER1-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:0.50] 2578 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2579 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2580 %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a0, x86_mmx %a1) 2581 %2 = load x86_mmx, x86_mmx *%a2, align 8 2582 %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %1, x86_mmx %2) 2583 %4 = bitcast x86_mmx %3 to i64 2584 ret i64 %4 2585 } 2586 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone 2587 2588 define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2589 ; GENERIC-LABEL: test_pcmpeqd: 2590 ; GENERIC: # %bb.0: 2591 ; GENERIC-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] 2592 ; GENERIC-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] 2593 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2594 ; GENERIC-NEXT: retq # sched: [1:1.00] 2595 ; 2596 ; ATOM-LABEL: test_pcmpeqd: 2597 ; ATOM: # %bb.0: 2598 ; ATOM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2599 ; ATOM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:1.00] 2600 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2601 ; ATOM-NEXT: retq # sched: [79:39.50] 2602 ; 2603 ; SLM-LABEL: test_pcmpeqd: 2604 ; SLM: # %bb.0: 2605 ; SLM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2606 ; SLM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [4:1.00] 2607 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2608 ; SLM-NEXT: retq # sched: [4:1.00] 2609 ; 2610 ; SANDY-LABEL: test_pcmpeqd: 2611 ; SANDY: # %bb.0: 2612 ; SANDY-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] 2613 ; SANDY-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] 2614 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2615 ; SANDY-NEXT: retq # sched: [1:1.00] 2616 ; 2617 ; HASWELL-LABEL: test_pcmpeqd: 2618 ; HASWELL: # %bb.0: 2619 ; HASWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2620 ; HASWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:0.50] 2621 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2622 ; HASWELL-NEXT: retq # sched: [7:1.00] 2623 ; 2624 ; BROADWELL-LABEL: test_pcmpeqd: 2625 ; BROADWELL: # %bb.0: 2626 ; BROADWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2627 ; BROADWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:0.50] 2628 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2629 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2630 ; 2631 ; SKYLAKE-LABEL: test_pcmpeqd: 2632 ; SKYLAKE: # %bb.0: 2633 ; SKYLAKE-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00] 2634 ; SKYLAKE-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] 2635 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2636 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2637 ; 2638 ; SKX-LABEL: test_pcmpeqd: 2639 ; SKX: # %bb.0: 2640 ; SKX-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00] 2641 ; SKX-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] 2642 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2643 ; SKX-NEXT: retq # sched: [7:1.00] 2644 ; 2645 ; BTVER2-LABEL: test_pcmpeqd: 2646 ; BTVER2: # %bb.0: 2647 ; BTVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] 2648 ; BTVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] 2649 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2650 ; BTVER2-NEXT: retq # sched: [4:1.00] 2651 ; 2652 ; ZNVER1-LABEL: test_pcmpeqd: 2653 ; ZNVER1: # %bb.0: 2654 ; ZNVER1-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.25] 2655 ; ZNVER1-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:0.50] 2656 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2657 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2658 %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a0, x86_mmx %a1) 2659 %2 = load x86_mmx, x86_mmx *%a2, align 8 2660 %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %1, x86_mmx %2) 2661 %4 = bitcast x86_mmx %3 to i64 2662 ret i64 %4 2663 } 2664 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone 2665 2666 define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2667 ; GENERIC-LABEL: test_pcmpeqw: 2668 ; GENERIC: # %bb.0: 2669 ; GENERIC-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] 2670 ; GENERIC-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] 2671 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2672 ; GENERIC-NEXT: retq # sched: [1:1.00] 2673 ; 2674 ; ATOM-LABEL: test_pcmpeqw: 2675 ; ATOM: # %bb.0: 2676 ; ATOM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2677 ; ATOM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:1.00] 2678 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2679 ; ATOM-NEXT: retq # sched: [79:39.50] 2680 ; 2681 ; SLM-LABEL: test_pcmpeqw: 2682 ; SLM: # %bb.0: 2683 ; SLM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2684 ; SLM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [4:1.00] 2685 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2686 ; SLM-NEXT: retq # sched: [4:1.00] 2687 ; 2688 ; SANDY-LABEL: test_pcmpeqw: 2689 ; SANDY: # %bb.0: 2690 ; SANDY-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] 2691 ; SANDY-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] 2692 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2693 ; SANDY-NEXT: retq # sched: [1:1.00] 2694 ; 2695 ; HASWELL-LABEL: test_pcmpeqw: 2696 ; HASWELL: # %bb.0: 2697 ; HASWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2698 ; HASWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:0.50] 2699 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2700 ; HASWELL-NEXT: retq # sched: [7:1.00] 2701 ; 2702 ; BROADWELL-LABEL: test_pcmpeqw: 2703 ; BROADWELL: # %bb.0: 2704 ; BROADWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2705 ; BROADWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:0.50] 2706 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2707 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2708 ; 2709 ; SKYLAKE-LABEL: test_pcmpeqw: 2710 ; SKYLAKE: # %bb.0: 2711 ; SKYLAKE-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00] 2712 ; SKYLAKE-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] 2713 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2714 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2715 ; 2716 ; SKX-LABEL: test_pcmpeqw: 2717 ; SKX: # %bb.0: 2718 ; SKX-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00] 2719 ; SKX-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] 2720 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2721 ; SKX-NEXT: retq # sched: [7:1.00] 2722 ; 2723 ; BTVER2-LABEL: test_pcmpeqw: 2724 ; BTVER2: # %bb.0: 2725 ; BTVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] 2726 ; BTVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] 2727 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2728 ; BTVER2-NEXT: retq # sched: [4:1.00] 2729 ; 2730 ; ZNVER1-LABEL: test_pcmpeqw: 2731 ; ZNVER1: # %bb.0: 2732 ; ZNVER1-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.25] 2733 ; ZNVER1-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:0.50] 2734 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2735 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2736 %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a0, x86_mmx %a1) 2737 %2 = load x86_mmx, x86_mmx *%a2, align 8 2738 %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %1, x86_mmx %2) 2739 %4 = bitcast x86_mmx %3 to i64 2740 ret i64 %4 2741 } 2742 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone 2743 2744 define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2745 ; GENERIC-LABEL: test_pcmpgtb: 2746 ; GENERIC: # %bb.0: 2747 ; GENERIC-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] 2748 ; GENERIC-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] 2749 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2750 ; GENERIC-NEXT: retq # sched: [1:1.00] 2751 ; 2752 ; ATOM-LABEL: test_pcmpgtb: 2753 ; ATOM: # %bb.0: 2754 ; ATOM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2755 ; ATOM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:1.00] 2756 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2757 ; ATOM-NEXT: retq # sched: [79:39.50] 2758 ; 2759 ; SLM-LABEL: test_pcmpgtb: 2760 ; SLM: # %bb.0: 2761 ; SLM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2762 ; SLM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [4:1.00] 2763 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2764 ; SLM-NEXT: retq # sched: [4:1.00] 2765 ; 2766 ; SANDY-LABEL: test_pcmpgtb: 2767 ; SANDY: # %bb.0: 2768 ; SANDY-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] 2769 ; SANDY-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] 2770 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2771 ; SANDY-NEXT: retq # sched: [1:1.00] 2772 ; 2773 ; HASWELL-LABEL: test_pcmpgtb: 2774 ; HASWELL: # %bb.0: 2775 ; HASWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2776 ; HASWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:0.50] 2777 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2778 ; HASWELL-NEXT: retq # sched: [7:1.00] 2779 ; 2780 ; BROADWELL-LABEL: test_pcmpgtb: 2781 ; BROADWELL: # %bb.0: 2782 ; BROADWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2783 ; BROADWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:0.50] 2784 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2785 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2786 ; 2787 ; SKYLAKE-LABEL: test_pcmpgtb: 2788 ; SKYLAKE: # %bb.0: 2789 ; SKYLAKE-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00] 2790 ; SKYLAKE-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] 2791 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2792 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2793 ; 2794 ; SKX-LABEL: test_pcmpgtb: 2795 ; SKX: # %bb.0: 2796 ; SKX-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00] 2797 ; SKX-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] 2798 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2799 ; SKX-NEXT: retq # sched: [7:1.00] 2800 ; 2801 ; BTVER2-LABEL: test_pcmpgtb: 2802 ; BTVER2: # %bb.0: 2803 ; BTVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] 2804 ; BTVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] 2805 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2806 ; BTVER2-NEXT: retq # sched: [4:1.00] 2807 ; 2808 ; ZNVER1-LABEL: test_pcmpgtb: 2809 ; ZNVER1: # %bb.0: 2810 ; ZNVER1-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.25] 2811 ; ZNVER1-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:0.50] 2812 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2813 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2814 %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a0, x86_mmx %a1) 2815 %2 = load x86_mmx, x86_mmx *%a2, align 8 2816 %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %1, x86_mmx %2) 2817 %4 = bitcast x86_mmx %3 to i64 2818 ret i64 %4 2819 } 2820 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone 2821 2822 define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2823 ; GENERIC-LABEL: test_pcmpgtd: 2824 ; GENERIC: # %bb.0: 2825 ; GENERIC-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] 2826 ; GENERIC-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] 2827 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2828 ; GENERIC-NEXT: retq # sched: [1:1.00] 2829 ; 2830 ; ATOM-LABEL: test_pcmpgtd: 2831 ; ATOM: # %bb.0: 2832 ; ATOM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2833 ; ATOM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:1.00] 2834 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2835 ; ATOM-NEXT: retq # sched: [79:39.50] 2836 ; 2837 ; SLM-LABEL: test_pcmpgtd: 2838 ; SLM: # %bb.0: 2839 ; SLM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2840 ; SLM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [4:1.00] 2841 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2842 ; SLM-NEXT: retq # sched: [4:1.00] 2843 ; 2844 ; SANDY-LABEL: test_pcmpgtd: 2845 ; SANDY: # %bb.0: 2846 ; SANDY-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] 2847 ; SANDY-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] 2848 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2849 ; SANDY-NEXT: retq # sched: [1:1.00] 2850 ; 2851 ; HASWELL-LABEL: test_pcmpgtd: 2852 ; HASWELL: # %bb.0: 2853 ; HASWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2854 ; HASWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:0.50] 2855 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2856 ; HASWELL-NEXT: retq # sched: [7:1.00] 2857 ; 2858 ; BROADWELL-LABEL: test_pcmpgtd: 2859 ; BROADWELL: # %bb.0: 2860 ; BROADWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2861 ; BROADWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:0.50] 2862 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2863 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2864 ; 2865 ; SKYLAKE-LABEL: test_pcmpgtd: 2866 ; SKYLAKE: # %bb.0: 2867 ; SKYLAKE-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00] 2868 ; SKYLAKE-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] 2869 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2870 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2871 ; 2872 ; SKX-LABEL: test_pcmpgtd: 2873 ; SKX: # %bb.0: 2874 ; SKX-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00] 2875 ; SKX-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] 2876 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2877 ; SKX-NEXT: retq # sched: [7:1.00] 2878 ; 2879 ; BTVER2-LABEL: test_pcmpgtd: 2880 ; BTVER2: # %bb.0: 2881 ; BTVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] 2882 ; BTVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] 2883 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2884 ; BTVER2-NEXT: retq # sched: [4:1.00] 2885 ; 2886 ; ZNVER1-LABEL: test_pcmpgtd: 2887 ; ZNVER1: # %bb.0: 2888 ; ZNVER1-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.25] 2889 ; ZNVER1-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:0.50] 2890 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2891 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2892 %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a0, x86_mmx %a1) 2893 %2 = load x86_mmx, x86_mmx *%a2, align 8 2894 %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %1, x86_mmx %2) 2895 %4 = bitcast x86_mmx %3 to i64 2896 ret i64 %4 2897 } 2898 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone 2899 2900 define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 2901 ; GENERIC-LABEL: test_pcmpgtw: 2902 ; GENERIC: # %bb.0: 2903 ; GENERIC-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] 2904 ; GENERIC-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] 2905 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 2906 ; GENERIC-NEXT: retq # sched: [1:1.00] 2907 ; 2908 ; ATOM-LABEL: test_pcmpgtw: 2909 ; ATOM: # %bb.0: 2910 ; ATOM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2911 ; ATOM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:1.00] 2912 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 2913 ; ATOM-NEXT: retq # sched: [79:39.50] 2914 ; 2915 ; SLM-LABEL: test_pcmpgtw: 2916 ; SLM: # %bb.0: 2917 ; SLM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2918 ; SLM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [4:1.00] 2919 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 2920 ; SLM-NEXT: retq # sched: [4:1.00] 2921 ; 2922 ; SANDY-LABEL: test_pcmpgtw: 2923 ; SANDY: # %bb.0: 2924 ; SANDY-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] 2925 ; SANDY-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] 2926 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 2927 ; SANDY-NEXT: retq # sched: [1:1.00] 2928 ; 2929 ; HASWELL-LABEL: test_pcmpgtw: 2930 ; HASWELL: # %bb.0: 2931 ; HASWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2932 ; HASWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:0.50] 2933 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2934 ; HASWELL-NEXT: retq # sched: [7:1.00] 2935 ; 2936 ; BROADWELL-LABEL: test_pcmpgtw: 2937 ; BROADWELL: # %bb.0: 2938 ; BROADWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2939 ; BROADWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:0.50] 2940 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 2941 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2942 ; 2943 ; SKYLAKE-LABEL: test_pcmpgtw: 2944 ; SKYLAKE: # %bb.0: 2945 ; SKYLAKE-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00] 2946 ; SKYLAKE-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] 2947 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 2948 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2949 ; 2950 ; SKX-LABEL: test_pcmpgtw: 2951 ; SKX: # %bb.0: 2952 ; SKX-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00] 2953 ; SKX-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] 2954 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 2955 ; SKX-NEXT: retq # sched: [7:1.00] 2956 ; 2957 ; BTVER2-LABEL: test_pcmpgtw: 2958 ; BTVER2: # %bb.0: 2959 ; BTVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] 2960 ; BTVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] 2961 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 2962 ; BTVER2-NEXT: retq # sched: [4:1.00] 2963 ; 2964 ; ZNVER1-LABEL: test_pcmpgtw: 2965 ; ZNVER1: # %bb.0: 2966 ; ZNVER1-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.25] 2967 ; ZNVER1-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:0.50] 2968 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 2969 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2970 %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a0, x86_mmx %a1) 2971 %2 = load x86_mmx, x86_mmx *%a2, align 8 2972 %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %1, x86_mmx %2) 2973 %4 = bitcast x86_mmx %3 to i64 2974 ret i64 %4 2975 } 2976 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone 2977 2978 define i32 @test_pextrw(x86_mmx %a0) optsize { 2979 ; GENERIC-LABEL: test_pextrw: 2980 ; GENERIC: # %bb.0: 2981 ; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 2982 ; GENERIC-NEXT: retq # sched: [1:1.00] 2983 ; 2984 ; ATOM-LABEL: test_pextrw: 2985 ; ATOM: # %bb.0: 2986 ; ATOM-NEXT: pextrw $0, %mm0, %eax # sched: [4:2.00] 2987 ; ATOM-NEXT: retq # sched: [79:39.50] 2988 ; 2989 ; SLM-LABEL: test_pextrw: 2990 ; SLM: # %bb.0: 2991 ; SLM-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00] 2992 ; SLM-NEXT: retq # sched: [4:1.00] 2993 ; 2994 ; SANDY-LABEL: test_pextrw: 2995 ; SANDY: # %bb.0: 2996 ; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 2997 ; SANDY-NEXT: retq # sched: [1:1.00] 2998 ; 2999 ; HASWELL-LABEL: test_pextrw: 3000 ; HASWELL: # %bb.0: 3001 ; HASWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] 3002 ; HASWELL-NEXT: retq # sched: [7:1.00] 3003 ; 3004 ; BROADWELL-LABEL: test_pextrw: 3005 ; BROADWELL: # %bb.0: 3006 ; BROADWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00] 3007 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3008 ; 3009 ; SKYLAKE-LABEL: test_pextrw: 3010 ; SKYLAKE: # %bb.0: 3011 ; SKYLAKE-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 3012 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3013 ; 3014 ; SKX-LABEL: test_pextrw: 3015 ; SKX: # %bb.0: 3016 ; SKX-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 3017 ; SKX-NEXT: retq # sched: [7:1.00] 3018 ; 3019 ; BTVER2-LABEL: test_pextrw: 3020 ; BTVER2: # %bb.0: 3021 ; BTVER2-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00] 3022 ; BTVER2-NEXT: retq # sched: [4:1.00] 3023 ; 3024 ; ZNVER1-LABEL: test_pextrw: 3025 ; ZNVER1: # %bb.0: 3026 ; ZNVER1-NEXT: pextrw $0, %mm0, %eax # sched: [2:2.00] 3027 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3028 %1 = call i32 @llvm.x86.mmx.pextr.w(x86_mmx %a0, i32 0) 3029 ret i32 %1 3030 } 3031 declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32) nounwind readnone 3032 3033 define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3034 ; GENERIC-LABEL: test_phaddd: 3035 ; GENERIC: # %bb.0: 3036 ; GENERIC-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] 3037 ; GENERIC-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] 3038 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3039 ; GENERIC-NEXT: retq # sched: [1:1.00] 3040 ; 3041 ; ATOM-LABEL: test_phaddd: 3042 ; ATOM: # %bb.0: 3043 ; ATOM-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] 3044 ; ATOM-NEXT: phaddd (%rdi), %mm0 # sched: [4:2.00] 3045 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3046 ; ATOM-NEXT: retq # sched: [79:39.50] 3047 ; 3048 ; SLM-LABEL: test_phaddd: 3049 ; SLM: # %bb.0: 3050 ; SLM-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] 3051 ; SLM-NEXT: phaddd (%rdi), %mm0 # sched: [4:1.00] 3052 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3053 ; SLM-NEXT: retq # sched: [4:1.00] 3054 ; 3055 ; SANDY-LABEL: test_phaddd: 3056 ; SANDY: # %bb.0: 3057 ; SANDY-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] 3058 ; SANDY-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] 3059 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3060 ; SANDY-NEXT: retq # sched: [1:1.00] 3061 ; 3062 ; HASWELL-LABEL: test_phaddd: 3063 ; HASWELL: # %bb.0: 3064 ; HASWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] 3065 ; HASWELL-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] 3066 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3067 ; HASWELL-NEXT: retq # sched: [7:1.00] 3068 ; 3069 ; BROADWELL-LABEL: test_phaddd: 3070 ; BROADWELL: # %bb.0: 3071 ; BROADWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] 3072 ; BROADWELL-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] 3073 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3074 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3075 ; 3076 ; SKYLAKE-LABEL: test_phaddd: 3077 ; SKYLAKE: # %bb.0: 3078 ; SKYLAKE-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] 3079 ; SKYLAKE-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] 3080 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3081 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3082 ; 3083 ; SKX-LABEL: test_phaddd: 3084 ; SKX: # %bb.0: 3085 ; SKX-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00] 3086 ; SKX-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00] 3087 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3088 ; SKX-NEXT: retq # sched: [7:1.00] 3089 ; 3090 ; BTVER2-LABEL: test_phaddd: 3091 ; BTVER2: # %bb.0: 3092 ; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] 3093 ; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [6:1.00] 3094 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3095 ; BTVER2-NEXT: retq # sched: [4:1.00] 3096 ; 3097 ; ZNVER1-LABEL: test_phaddd: 3098 ; ZNVER1: # %bb.0: 3099 ; ZNVER1-NEXT: phaddd %mm1, %mm0 # sched: [100:0.25] 3100 ; ZNVER1-NEXT: phaddd (%rdi), %mm0 # sched: [100:0.25] 3101 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3102 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3103 %1 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a0, x86_mmx %a1) 3104 %2 = load x86_mmx, x86_mmx *%a2, align 8 3105 %3 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %1, x86_mmx %2) 3106 %4 = bitcast x86_mmx %3 to i64 3107 ret i64 %4 3108 } 3109 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone 3110 3111 define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3112 ; GENERIC-LABEL: test_phaddsw: 3113 ; GENERIC: # %bb.0: 3114 ; GENERIC-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] 3115 ; GENERIC-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] 3116 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3117 ; GENERIC-NEXT: retq # sched: [1:1.00] 3118 ; 3119 ; ATOM-LABEL: test_phaddsw: 3120 ; ATOM: # %bb.0: 3121 ; ATOM-NEXT: phaddsw %mm1, %mm0 # sched: [5:2.50] 3122 ; ATOM-NEXT: phaddsw (%rdi), %mm0 # sched: [6:3.00] 3123 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3124 ; ATOM-NEXT: retq # sched: [79:39.50] 3125 ; 3126 ; SLM-LABEL: test_phaddsw: 3127 ; SLM: # %bb.0: 3128 ; SLM-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] 3129 ; SLM-NEXT: phaddsw (%rdi), %mm0 # sched: [4:1.00] 3130 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3131 ; SLM-NEXT: retq # sched: [4:1.00] 3132 ; 3133 ; SANDY-LABEL: test_phaddsw: 3134 ; SANDY: # %bb.0: 3135 ; SANDY-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] 3136 ; SANDY-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] 3137 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3138 ; SANDY-NEXT: retq # sched: [1:1.00] 3139 ; 3140 ; HASWELL-LABEL: test_phaddsw: 3141 ; HASWELL: # %bb.0: 3142 ; HASWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] 3143 ; HASWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] 3144 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3145 ; HASWELL-NEXT: retq # sched: [7:1.00] 3146 ; 3147 ; BROADWELL-LABEL: test_phaddsw: 3148 ; BROADWELL: # %bb.0: 3149 ; BROADWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] 3150 ; BROADWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] 3151 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3152 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3153 ; 3154 ; SKYLAKE-LABEL: test_phaddsw: 3155 ; SKYLAKE: # %bb.0: 3156 ; SKYLAKE-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] 3157 ; SKYLAKE-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] 3158 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3159 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3160 ; 3161 ; SKX-LABEL: test_phaddsw: 3162 ; SKX: # %bb.0: 3163 ; SKX-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00] 3164 ; SKX-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00] 3165 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3166 ; SKX-NEXT: retq # sched: [7:1.00] 3167 ; 3168 ; BTVER2-LABEL: test_phaddsw: 3169 ; BTVER2: # %bb.0: 3170 ; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] 3171 ; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [6:1.00] 3172 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3173 ; BTVER2-NEXT: retq # sched: [4:1.00] 3174 ; 3175 ; ZNVER1-LABEL: test_phaddsw: 3176 ; ZNVER1: # %bb.0: 3177 ; ZNVER1-NEXT: phaddsw %mm1, %mm0 # sched: [100:0.25] 3178 ; ZNVER1-NEXT: phaddsw (%rdi), %mm0 # sched: [100:0.25] 3179 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3180 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3181 %1 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a0, x86_mmx %a1) 3182 %2 = load x86_mmx, x86_mmx *%a2, align 8 3183 %3 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %1, x86_mmx %2) 3184 %4 = bitcast x86_mmx %3 to i64 3185 ret i64 %4 3186 } 3187 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone 3188 3189 define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3190 ; GENERIC-LABEL: test_phaddw: 3191 ; GENERIC: # %bb.0: 3192 ; GENERIC-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] 3193 ; GENERIC-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] 3194 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3195 ; GENERIC-NEXT: retq # sched: [1:1.00] 3196 ; 3197 ; ATOM-LABEL: test_phaddw: 3198 ; ATOM: # %bb.0: 3199 ; ATOM-NEXT: phaddw %mm1, %mm0 # sched: [5:2.50] 3200 ; ATOM-NEXT: phaddw (%rdi), %mm0 # sched: [6:3.00] 3201 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3202 ; ATOM-NEXT: retq # sched: [79:39.50] 3203 ; 3204 ; SLM-LABEL: test_phaddw: 3205 ; SLM: # %bb.0: 3206 ; SLM-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] 3207 ; SLM-NEXT: phaddw (%rdi), %mm0 # sched: [4:1.00] 3208 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3209 ; SLM-NEXT: retq # sched: [4:1.00] 3210 ; 3211 ; SANDY-LABEL: test_phaddw: 3212 ; SANDY: # %bb.0: 3213 ; SANDY-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] 3214 ; SANDY-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] 3215 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3216 ; SANDY-NEXT: retq # sched: [1:1.00] 3217 ; 3218 ; HASWELL-LABEL: test_phaddw: 3219 ; HASWELL: # %bb.0: 3220 ; HASWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] 3221 ; HASWELL-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] 3222 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3223 ; HASWELL-NEXT: retq # sched: [7:1.00] 3224 ; 3225 ; BROADWELL-LABEL: test_phaddw: 3226 ; BROADWELL: # %bb.0: 3227 ; BROADWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] 3228 ; BROADWELL-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] 3229 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3230 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3231 ; 3232 ; SKYLAKE-LABEL: test_phaddw: 3233 ; SKYLAKE: # %bb.0: 3234 ; SKYLAKE-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] 3235 ; SKYLAKE-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] 3236 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3237 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3238 ; 3239 ; SKX-LABEL: test_phaddw: 3240 ; SKX: # %bb.0: 3241 ; SKX-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00] 3242 ; SKX-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00] 3243 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3244 ; SKX-NEXT: retq # sched: [7:1.00] 3245 ; 3246 ; BTVER2-LABEL: test_phaddw: 3247 ; BTVER2: # %bb.0: 3248 ; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] 3249 ; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [6:1.00] 3250 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3251 ; BTVER2-NEXT: retq # sched: [4:1.00] 3252 ; 3253 ; ZNVER1-LABEL: test_phaddw: 3254 ; ZNVER1: # %bb.0: 3255 ; ZNVER1-NEXT: phaddw %mm1, %mm0 # sched: [100:0.25] 3256 ; ZNVER1-NEXT: phaddw (%rdi), %mm0 # sched: [100:0.25] 3257 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3258 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3259 %1 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a0, x86_mmx %a1) 3260 %2 = load x86_mmx, x86_mmx *%a2, align 8 3261 %3 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %1, x86_mmx %2) 3262 %4 = bitcast x86_mmx %3 to i64 3263 ret i64 %4 3264 } 3265 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone 3266 3267 define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3268 ; GENERIC-LABEL: test_phsubd: 3269 ; GENERIC: # %bb.0: 3270 ; GENERIC-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] 3271 ; GENERIC-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] 3272 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3273 ; GENERIC-NEXT: retq # sched: [1:1.00] 3274 ; 3275 ; ATOM-LABEL: test_phsubd: 3276 ; ATOM: # %bb.0: 3277 ; ATOM-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] 3278 ; ATOM-NEXT: phsubd (%rdi), %mm0 # sched: [4:2.00] 3279 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3280 ; ATOM-NEXT: retq # sched: [79:39.50] 3281 ; 3282 ; SLM-LABEL: test_phsubd: 3283 ; SLM: # %bb.0: 3284 ; SLM-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] 3285 ; SLM-NEXT: phsubd (%rdi), %mm0 # sched: [4:1.00] 3286 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3287 ; SLM-NEXT: retq # sched: [4:1.00] 3288 ; 3289 ; SANDY-LABEL: test_phsubd: 3290 ; SANDY: # %bb.0: 3291 ; SANDY-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] 3292 ; SANDY-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] 3293 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3294 ; SANDY-NEXT: retq # sched: [1:1.00] 3295 ; 3296 ; HASWELL-LABEL: test_phsubd: 3297 ; HASWELL: # %bb.0: 3298 ; HASWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] 3299 ; HASWELL-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] 3300 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3301 ; HASWELL-NEXT: retq # sched: [7:1.00] 3302 ; 3303 ; BROADWELL-LABEL: test_phsubd: 3304 ; BROADWELL: # %bb.0: 3305 ; BROADWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] 3306 ; BROADWELL-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] 3307 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3308 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3309 ; 3310 ; SKYLAKE-LABEL: test_phsubd: 3311 ; SKYLAKE: # %bb.0: 3312 ; SKYLAKE-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] 3313 ; SKYLAKE-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] 3314 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3315 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3316 ; 3317 ; SKX-LABEL: test_phsubd: 3318 ; SKX: # %bb.0: 3319 ; SKX-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00] 3320 ; SKX-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00] 3321 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3322 ; SKX-NEXT: retq # sched: [7:1.00] 3323 ; 3324 ; BTVER2-LABEL: test_phsubd: 3325 ; BTVER2: # %bb.0: 3326 ; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] 3327 ; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [6:1.00] 3328 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3329 ; BTVER2-NEXT: retq # sched: [4:1.00] 3330 ; 3331 ; ZNVER1-LABEL: test_phsubd: 3332 ; ZNVER1: # %bb.0: 3333 ; ZNVER1-NEXT: phsubd %mm1, %mm0 # sched: [100:0.25] 3334 ; ZNVER1-NEXT: phsubd (%rdi), %mm0 # sched: [100:0.25] 3335 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3336 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3337 %1 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a0, x86_mmx %a1) 3338 %2 = load x86_mmx, x86_mmx *%a2, align 8 3339 %3 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %1, x86_mmx %2) 3340 %4 = bitcast x86_mmx %3 to i64 3341 ret i64 %4 3342 } 3343 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone 3344 3345 define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3346 ; GENERIC-LABEL: test_phsubsw: 3347 ; GENERIC: # %bb.0: 3348 ; GENERIC-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] 3349 ; GENERIC-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] 3350 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3351 ; GENERIC-NEXT: retq # sched: [1:1.00] 3352 ; 3353 ; ATOM-LABEL: test_phsubsw: 3354 ; ATOM: # %bb.0: 3355 ; ATOM-NEXT: phsubsw %mm1, %mm0 # sched: [5:2.50] 3356 ; ATOM-NEXT: phsubsw (%rdi), %mm0 # sched: [6:3.00] 3357 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3358 ; ATOM-NEXT: retq # sched: [79:39.50] 3359 ; 3360 ; SLM-LABEL: test_phsubsw: 3361 ; SLM: # %bb.0: 3362 ; SLM-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] 3363 ; SLM-NEXT: phsubsw (%rdi), %mm0 # sched: [4:1.00] 3364 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3365 ; SLM-NEXT: retq # sched: [4:1.00] 3366 ; 3367 ; SANDY-LABEL: test_phsubsw: 3368 ; SANDY: # %bb.0: 3369 ; SANDY-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] 3370 ; SANDY-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] 3371 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3372 ; SANDY-NEXT: retq # sched: [1:1.00] 3373 ; 3374 ; HASWELL-LABEL: test_phsubsw: 3375 ; HASWELL: # %bb.0: 3376 ; HASWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] 3377 ; HASWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] 3378 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3379 ; HASWELL-NEXT: retq # sched: [7:1.00] 3380 ; 3381 ; BROADWELL-LABEL: test_phsubsw: 3382 ; BROADWELL: # %bb.0: 3383 ; BROADWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] 3384 ; BROADWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] 3385 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3386 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3387 ; 3388 ; SKYLAKE-LABEL: test_phsubsw: 3389 ; SKYLAKE: # %bb.0: 3390 ; SKYLAKE-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] 3391 ; SKYLAKE-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] 3392 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3393 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3394 ; 3395 ; SKX-LABEL: test_phsubsw: 3396 ; SKX: # %bb.0: 3397 ; SKX-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00] 3398 ; SKX-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00] 3399 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3400 ; SKX-NEXT: retq # sched: [7:1.00] 3401 ; 3402 ; BTVER2-LABEL: test_phsubsw: 3403 ; BTVER2: # %bb.0: 3404 ; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] 3405 ; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [6:1.00] 3406 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3407 ; BTVER2-NEXT: retq # sched: [4:1.00] 3408 ; 3409 ; ZNVER1-LABEL: test_phsubsw: 3410 ; ZNVER1: # %bb.0: 3411 ; ZNVER1-NEXT: phsubsw %mm1, %mm0 # sched: [100:0.25] 3412 ; ZNVER1-NEXT: phsubsw (%rdi), %mm0 # sched: [100:0.25] 3413 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3414 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3415 %1 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a0, x86_mmx %a1) 3416 %2 = load x86_mmx, x86_mmx *%a2, align 8 3417 %3 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %1, x86_mmx %2) 3418 %4 = bitcast x86_mmx %3 to i64 3419 ret i64 %4 3420 } 3421 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone 3422 3423 define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3424 ; GENERIC-LABEL: test_phsubw: 3425 ; GENERIC: # %bb.0: 3426 ; GENERIC-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] 3427 ; GENERIC-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] 3428 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3429 ; GENERIC-NEXT: retq # sched: [1:1.00] 3430 ; 3431 ; ATOM-LABEL: test_phsubw: 3432 ; ATOM: # %bb.0: 3433 ; ATOM-NEXT: phsubw %mm1, %mm0 # sched: [5:2.50] 3434 ; ATOM-NEXT: phsubw (%rdi), %mm0 # sched: [6:3.00] 3435 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3436 ; ATOM-NEXT: retq # sched: [79:39.50] 3437 ; 3438 ; SLM-LABEL: test_phsubw: 3439 ; SLM: # %bb.0: 3440 ; SLM-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] 3441 ; SLM-NEXT: phsubw (%rdi), %mm0 # sched: [4:1.00] 3442 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3443 ; SLM-NEXT: retq # sched: [4:1.00] 3444 ; 3445 ; SANDY-LABEL: test_phsubw: 3446 ; SANDY: # %bb.0: 3447 ; SANDY-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] 3448 ; SANDY-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] 3449 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3450 ; SANDY-NEXT: retq # sched: [1:1.00] 3451 ; 3452 ; HASWELL-LABEL: test_phsubw: 3453 ; HASWELL: # %bb.0: 3454 ; HASWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] 3455 ; HASWELL-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] 3456 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3457 ; HASWELL-NEXT: retq # sched: [7:1.00] 3458 ; 3459 ; BROADWELL-LABEL: test_phsubw: 3460 ; BROADWELL: # %bb.0: 3461 ; BROADWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] 3462 ; BROADWELL-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] 3463 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3464 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3465 ; 3466 ; SKYLAKE-LABEL: test_phsubw: 3467 ; SKYLAKE: # %bb.0: 3468 ; SKYLAKE-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] 3469 ; SKYLAKE-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] 3470 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3471 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3472 ; 3473 ; SKX-LABEL: test_phsubw: 3474 ; SKX: # %bb.0: 3475 ; SKX-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00] 3476 ; SKX-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00] 3477 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3478 ; SKX-NEXT: retq # sched: [7:1.00] 3479 ; 3480 ; BTVER2-LABEL: test_phsubw: 3481 ; BTVER2: # %bb.0: 3482 ; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] 3483 ; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [6:1.00] 3484 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3485 ; BTVER2-NEXT: retq # sched: [4:1.00] 3486 ; 3487 ; ZNVER1-LABEL: test_phsubw: 3488 ; ZNVER1: # %bb.0: 3489 ; ZNVER1-NEXT: phsubw %mm1, %mm0 # sched: [100:0.25] 3490 ; ZNVER1-NEXT: phsubw (%rdi), %mm0 # sched: [100:0.25] 3491 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3492 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3493 %1 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a0, x86_mmx %a1) 3494 %2 = load x86_mmx, x86_mmx *%a2, align 8 3495 %3 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %1, x86_mmx %2) 3496 %4 = bitcast x86_mmx %3 to i64 3497 ret i64 %4 3498 } 3499 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone 3500 3501 define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { 3502 ; GENERIC-LABEL: test_pinsrw: 3503 ; GENERIC: # %bb.0: 3504 ; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] 3505 ; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3506 ; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] 3507 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3508 ; GENERIC-NEXT: retq # sched: [1:1.00] 3509 ; 3510 ; ATOM-LABEL: test_pinsrw: 3511 ; ATOM: # %bb.0: 3512 ; ATOM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] 3513 ; ATOM-NEXT: movswl (%rsi), %eax # sched: [1:1.00] 3514 ; ATOM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] 3515 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3516 ; ATOM-NEXT: retq # sched: [79:39.50] 3517 ; 3518 ; SLM-LABEL: test_pinsrw: 3519 ; SLM: # %bb.0: 3520 ; SLM-NEXT: movswl (%rsi), %eax # sched: [4:1.00] 3521 ; SLM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00] 3522 ; SLM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00] 3523 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3524 ; SLM-NEXT: retq # sched: [4:1.00] 3525 ; 3526 ; SANDY-LABEL: test_pinsrw: 3527 ; SANDY: # %bb.0: 3528 ; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] 3529 ; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3530 ; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] 3531 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3532 ; SANDY-NEXT: retq # sched: [1:1.00] 3533 ; 3534 ; HASWELL-LABEL: test_pinsrw: 3535 ; HASWELL: # %bb.0: 3536 ; HASWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] 3537 ; HASWELL-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3538 ; HASWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] 3539 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3540 ; HASWELL-NEXT: retq # sched: [7:1.00] 3541 ; 3542 ; BROADWELL-LABEL: test_pinsrw: 3543 ; BROADWELL: # %bb.0: 3544 ; BROADWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] 3545 ; BROADWELL-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3546 ; BROADWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] 3547 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3548 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3549 ; 3550 ; SKYLAKE-LABEL: test_pinsrw: 3551 ; SKYLAKE: # %bb.0: 3552 ; SKYLAKE-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] 3553 ; SKYLAKE-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3554 ; SKYLAKE-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] 3555 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3556 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3557 ; 3558 ; SKX-LABEL: test_pinsrw: 3559 ; SKX: # %bb.0: 3560 ; SKX-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00] 3561 ; SKX-NEXT: movswl (%rsi), %eax # sched: [5:0.50] 3562 ; SKX-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00] 3563 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3564 ; SKX-NEXT: retq # sched: [7:1.00] 3565 ; 3566 ; BTVER2-LABEL: test_pinsrw: 3567 ; BTVER2: # %bb.0: 3568 ; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [7:0.50] 3569 ; BTVER2-NEXT: movswl (%rsi), %eax # sched: [4:1.00] 3570 ; BTVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [7:0.50] 3571 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3572 ; BTVER2-NEXT: retq # sched: [4:1.00] 3573 ; 3574 ; ZNVER1-LABEL: test_pinsrw: 3575 ; ZNVER1: # %bb.0: 3576 ; ZNVER1-NEXT: movswl (%rsi), %eax # sched: [8:0.50] 3577 ; ZNVER1-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:0.25] 3578 ; ZNVER1-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:0.25] 3579 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3580 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3581 %1 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %a0, i32 %a1, i32 0) 3582 %2 = load i16, i16 *%a2, align 2 3583 %3 = sext i16 %2 to i32 3584 %4 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %1, i32 %3, i32 1) 3585 %5 = bitcast x86_mmx %4 to i64 3586 ret i64 %5 3587 } 3588 declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32) nounwind readnone 3589 3590 define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3591 ; GENERIC-LABEL: test_pmaddwd: 3592 ; GENERIC: # %bb.0: 3593 ; GENERIC-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] 3594 ; GENERIC-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] 3595 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3596 ; GENERIC-NEXT: retq # sched: [1:1.00] 3597 ; 3598 ; ATOM-LABEL: test_pmaddwd: 3599 ; ATOM: # %bb.0: 3600 ; ATOM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:4.00] 3601 ; ATOM-NEXT: pmaddwd (%rdi), %mm0 # sched: [4:4.00] 3602 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3603 ; ATOM-NEXT: retq # sched: [79:39.50] 3604 ; 3605 ; SLM-LABEL: test_pmaddwd: 3606 ; SLM: # %bb.0: 3607 ; SLM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] 3608 ; SLM-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00] 3609 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3610 ; SLM-NEXT: retq # sched: [4:1.00] 3611 ; 3612 ; SANDY-LABEL: test_pmaddwd: 3613 ; SANDY: # %bb.0: 3614 ; SANDY-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] 3615 ; SANDY-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] 3616 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3617 ; SANDY-NEXT: retq # sched: [1:1.00] 3618 ; 3619 ; HASWELL-LABEL: test_pmaddwd: 3620 ; HASWELL: # %bb.0: 3621 ; HASWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] 3622 ; HASWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] 3623 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3624 ; HASWELL-NEXT: retq # sched: [7:1.00] 3625 ; 3626 ; BROADWELL-LABEL: test_pmaddwd: 3627 ; BROADWELL: # %bb.0: 3628 ; BROADWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] 3629 ; BROADWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] 3630 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3631 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3632 ; 3633 ; SKYLAKE-LABEL: test_pmaddwd: 3634 ; SKYLAKE: # %bb.0: 3635 ; SKYLAKE-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] 3636 ; SKYLAKE-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] 3637 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3638 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3639 ; 3640 ; SKX-LABEL: test_pmaddwd: 3641 ; SKX: # %bb.0: 3642 ; SKX-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] 3643 ; SKX-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00] 3644 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3645 ; SKX-NEXT: retq # sched: [7:1.00] 3646 ; 3647 ; BTVER2-LABEL: test_pmaddwd: 3648 ; BTVER2: # %bb.0: 3649 ; BTVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [2:1.00] 3650 ; BTVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00] 3651 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3652 ; BTVER2-NEXT: retq # sched: [4:1.00] 3653 ; 3654 ; ZNVER1-LABEL: test_pmaddwd: 3655 ; ZNVER1: # %bb.0: 3656 ; ZNVER1-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00] 3657 ; ZNVER1-NEXT: pmaddwd (%rdi), %mm0 # sched: [11:1.00] 3658 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3659 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3660 %1 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a0, x86_mmx %a1) 3661 %2 = load x86_mmx, x86_mmx *%a2, align 8 3662 %3 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %1, x86_mmx %2) 3663 %4 = bitcast x86_mmx %3 to i64 3664 ret i64 %4 3665 } 3666 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone 3667 3668 define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3669 ; GENERIC-LABEL: test_pmaddubsw: 3670 ; GENERIC: # %bb.0: 3671 ; GENERIC-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] 3672 ; GENERIC-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] 3673 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3674 ; GENERIC-NEXT: retq # sched: [1:1.00] 3675 ; 3676 ; ATOM-LABEL: test_pmaddubsw: 3677 ; ATOM: # %bb.0: 3678 ; ATOM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:4.00] 3679 ; ATOM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [4:4.00] 3680 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3681 ; ATOM-NEXT: retq # sched: [79:39.50] 3682 ; 3683 ; SLM-LABEL: test_pmaddubsw: 3684 ; SLM: # %bb.0: 3685 ; SLM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] 3686 ; SLM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00] 3687 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3688 ; SLM-NEXT: retq # sched: [4:1.00] 3689 ; 3690 ; SANDY-LABEL: test_pmaddubsw: 3691 ; SANDY: # %bb.0: 3692 ; SANDY-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] 3693 ; SANDY-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] 3694 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3695 ; SANDY-NEXT: retq # sched: [1:1.00] 3696 ; 3697 ; HASWELL-LABEL: test_pmaddubsw: 3698 ; HASWELL: # %bb.0: 3699 ; HASWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] 3700 ; HASWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] 3701 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3702 ; HASWELL-NEXT: retq # sched: [7:1.00] 3703 ; 3704 ; BROADWELL-LABEL: test_pmaddubsw: 3705 ; BROADWELL: # %bb.0: 3706 ; BROADWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] 3707 ; BROADWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] 3708 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3709 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3710 ; 3711 ; SKYLAKE-LABEL: test_pmaddubsw: 3712 ; SKYLAKE: # %bb.0: 3713 ; SKYLAKE-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] 3714 ; SKYLAKE-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] 3715 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3716 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3717 ; 3718 ; SKX-LABEL: test_pmaddubsw: 3719 ; SKX: # %bb.0: 3720 ; SKX-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] 3721 ; SKX-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00] 3722 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3723 ; SKX-NEXT: retq # sched: [7:1.00] 3724 ; 3725 ; BTVER2-LABEL: test_pmaddubsw: 3726 ; BTVER2: # %bb.0: 3727 ; BTVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [2:1.00] 3728 ; BTVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00] 3729 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3730 ; BTVER2-NEXT: retq # sched: [4:1.00] 3731 ; 3732 ; ZNVER1-LABEL: test_pmaddubsw: 3733 ; ZNVER1: # %bb.0: 3734 ; ZNVER1-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00] 3735 ; ZNVER1-NEXT: pmaddubsw (%rdi), %mm0 # sched: [11:1.00] 3736 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3737 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3738 %1 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a0, x86_mmx %a1) 3739 %2 = load x86_mmx, x86_mmx *%a2, align 8 3740 %3 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %1, x86_mmx %2) 3741 %4 = bitcast x86_mmx %3 to i64 3742 ret i64 %4 3743 } 3744 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone 3745 3746 define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3747 ; GENERIC-LABEL: test_pmaxsw: 3748 ; GENERIC: # %bb.0: 3749 ; GENERIC-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00] 3750 ; GENERIC-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00] 3751 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3752 ; GENERIC-NEXT: retq # sched: [1:1.00] 3753 ; 3754 ; ATOM-LABEL: test_pmaxsw: 3755 ; ATOM: # %bb.0: 3756 ; ATOM-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3757 ; ATOM-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:1.00] 3758 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3759 ; ATOM-NEXT: retq # sched: [79:39.50] 3760 ; 3761 ; SLM-LABEL: test_pmaxsw: 3762 ; SLM: # %bb.0: 3763 ; SLM-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3764 ; SLM-NEXT: pmaxsw (%rdi), %mm0 # sched: [4:1.00] 3765 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3766 ; SLM-NEXT: retq # sched: [4:1.00] 3767 ; 3768 ; SANDY-LABEL: test_pmaxsw: 3769 ; SANDY: # %bb.0: 3770 ; SANDY-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00] 3771 ; SANDY-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00] 3772 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3773 ; SANDY-NEXT: retq # sched: [1:1.00] 3774 ; 3775 ; HASWELL-LABEL: test_pmaxsw: 3776 ; HASWELL: # %bb.0: 3777 ; HASWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3778 ; HASWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:0.50] 3779 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3780 ; HASWELL-NEXT: retq # sched: [7:1.00] 3781 ; 3782 ; BROADWELL-LABEL: test_pmaxsw: 3783 ; BROADWELL: # %bb.0: 3784 ; BROADWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3785 ; BROADWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:0.50] 3786 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3787 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3788 ; 3789 ; SKYLAKE-LABEL: test_pmaxsw: 3790 ; SKYLAKE: # %bb.0: 3791 ; SKYLAKE-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] 3792 ; SKYLAKE-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] 3793 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3794 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3795 ; 3796 ; SKX-LABEL: test_pmaxsw: 3797 ; SKX: # %bb.0: 3798 ; SKX-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00] 3799 ; SKX-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] 3800 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3801 ; SKX-NEXT: retq # sched: [7:1.00] 3802 ; 3803 ; BTVER2-LABEL: test_pmaxsw: 3804 ; BTVER2: # %bb.0: 3805 ; BTVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] 3806 ; BTVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] 3807 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3808 ; BTVER2-NEXT: retq # sched: [4:1.00] 3809 ; 3810 ; ZNVER1-LABEL: test_pmaxsw: 3811 ; ZNVER1: # %bb.0: 3812 ; ZNVER1-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.25] 3813 ; ZNVER1-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:0.50] 3814 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3815 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3816 %1 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a0, x86_mmx %a1) 3817 %2 = load x86_mmx, x86_mmx *%a2, align 8 3818 %3 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %1, x86_mmx %2) 3819 %4 = bitcast x86_mmx %3 to i64 3820 ret i64 %4 3821 } 3822 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone 3823 3824 define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3825 ; GENERIC-LABEL: test_pmaxub: 3826 ; GENERIC: # %bb.0: 3827 ; GENERIC-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00] 3828 ; GENERIC-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00] 3829 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3830 ; GENERIC-NEXT: retq # sched: [1:1.00] 3831 ; 3832 ; ATOM-LABEL: test_pmaxub: 3833 ; ATOM: # %bb.0: 3834 ; ATOM-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3835 ; ATOM-NEXT: pmaxub (%rdi), %mm0 # sched: [1:1.00] 3836 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3837 ; ATOM-NEXT: retq # sched: [79:39.50] 3838 ; 3839 ; SLM-LABEL: test_pmaxub: 3840 ; SLM: # %bb.0: 3841 ; SLM-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3842 ; SLM-NEXT: pmaxub (%rdi), %mm0 # sched: [4:1.00] 3843 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3844 ; SLM-NEXT: retq # sched: [4:1.00] 3845 ; 3846 ; SANDY-LABEL: test_pmaxub: 3847 ; SANDY: # %bb.0: 3848 ; SANDY-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00] 3849 ; SANDY-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00] 3850 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3851 ; SANDY-NEXT: retq # sched: [1:1.00] 3852 ; 3853 ; HASWELL-LABEL: test_pmaxub: 3854 ; HASWELL: # %bb.0: 3855 ; HASWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3856 ; HASWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [6:0.50] 3857 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3858 ; HASWELL-NEXT: retq # sched: [7:1.00] 3859 ; 3860 ; BROADWELL-LABEL: test_pmaxub: 3861 ; BROADWELL: # %bb.0: 3862 ; BROADWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3863 ; BROADWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [6:0.50] 3864 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3865 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3866 ; 3867 ; SKYLAKE-LABEL: test_pmaxub: 3868 ; SKYLAKE: # %bb.0: 3869 ; SKYLAKE-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] 3870 ; SKYLAKE-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] 3871 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3872 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3873 ; 3874 ; SKX-LABEL: test_pmaxub: 3875 ; SKX: # %bb.0: 3876 ; SKX-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00] 3877 ; SKX-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] 3878 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3879 ; SKX-NEXT: retq # sched: [7:1.00] 3880 ; 3881 ; BTVER2-LABEL: test_pmaxub: 3882 ; BTVER2: # %bb.0: 3883 ; BTVER2-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] 3884 ; BTVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] 3885 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3886 ; BTVER2-NEXT: retq # sched: [4:1.00] 3887 ; 3888 ; ZNVER1-LABEL: test_pmaxub: 3889 ; ZNVER1: # %bb.0: 3890 ; ZNVER1-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.25] 3891 ; ZNVER1-NEXT: pmaxub (%rdi), %mm0 # sched: [8:0.50] 3892 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3893 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3894 %1 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a0, x86_mmx %a1) 3895 %2 = load x86_mmx, x86_mmx *%a2, align 8 3896 %3 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %1, x86_mmx %2) 3897 %4 = bitcast x86_mmx %3 to i64 3898 ret i64 %4 3899 } 3900 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone 3901 3902 define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3903 ; GENERIC-LABEL: test_pminsw: 3904 ; GENERIC: # %bb.0: 3905 ; GENERIC-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00] 3906 ; GENERIC-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00] 3907 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3908 ; GENERIC-NEXT: retq # sched: [1:1.00] 3909 ; 3910 ; ATOM-LABEL: test_pminsw: 3911 ; ATOM: # %bb.0: 3912 ; ATOM-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3913 ; ATOM-NEXT: pminsw (%rdi), %mm0 # sched: [1:1.00] 3914 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3915 ; ATOM-NEXT: retq # sched: [79:39.50] 3916 ; 3917 ; SLM-LABEL: test_pminsw: 3918 ; SLM: # %bb.0: 3919 ; SLM-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3920 ; SLM-NEXT: pminsw (%rdi), %mm0 # sched: [4:1.00] 3921 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 3922 ; SLM-NEXT: retq # sched: [4:1.00] 3923 ; 3924 ; SANDY-LABEL: test_pminsw: 3925 ; SANDY: # %bb.0: 3926 ; SANDY-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00] 3927 ; SANDY-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00] 3928 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 3929 ; SANDY-NEXT: retq # sched: [1:1.00] 3930 ; 3931 ; HASWELL-LABEL: test_pminsw: 3932 ; HASWELL: # %bb.0: 3933 ; HASWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3934 ; HASWELL-NEXT: pminsw (%rdi), %mm0 # sched: [6:0.50] 3935 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3936 ; HASWELL-NEXT: retq # sched: [7:1.00] 3937 ; 3938 ; BROADWELL-LABEL: test_pminsw: 3939 ; BROADWELL: # %bb.0: 3940 ; BROADWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3941 ; BROADWELL-NEXT: pminsw (%rdi), %mm0 # sched: [6:0.50] 3942 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 3943 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3944 ; 3945 ; SKYLAKE-LABEL: test_pminsw: 3946 ; SKYLAKE: # %bb.0: 3947 ; SKYLAKE-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] 3948 ; SKYLAKE-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] 3949 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 3950 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3951 ; 3952 ; SKX-LABEL: test_pminsw: 3953 ; SKX: # %bb.0: 3954 ; SKX-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00] 3955 ; SKX-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] 3956 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 3957 ; SKX-NEXT: retq # sched: [7:1.00] 3958 ; 3959 ; BTVER2-LABEL: test_pminsw: 3960 ; BTVER2: # %bb.0: 3961 ; BTVER2-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] 3962 ; BTVER2-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] 3963 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 3964 ; BTVER2-NEXT: retq # sched: [4:1.00] 3965 ; 3966 ; ZNVER1-LABEL: test_pminsw: 3967 ; ZNVER1: # %bb.0: 3968 ; ZNVER1-NEXT: pminsw %mm1, %mm0 # sched: [1:0.25] 3969 ; ZNVER1-NEXT: pminsw (%rdi), %mm0 # sched: [8:0.50] 3970 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 3971 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3972 %1 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a0, x86_mmx %a1) 3973 %2 = load x86_mmx, x86_mmx *%a2, align 8 3974 %3 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %1, x86_mmx %2) 3975 %4 = bitcast x86_mmx %3 to i64 3976 ret i64 %4 3977 } 3978 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone 3979 3980 define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 3981 ; GENERIC-LABEL: test_pminub: 3982 ; GENERIC: # %bb.0: 3983 ; GENERIC-NEXT: pminub %mm1, %mm0 # sched: [3:1.00] 3984 ; GENERIC-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00] 3985 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 3986 ; GENERIC-NEXT: retq # sched: [1:1.00] 3987 ; 3988 ; ATOM-LABEL: test_pminub: 3989 ; ATOM: # %bb.0: 3990 ; ATOM-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 3991 ; ATOM-NEXT: pminub (%rdi), %mm0 # sched: [1:1.00] 3992 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 3993 ; ATOM-NEXT: retq # sched: [79:39.50] 3994 ; 3995 ; SLM-LABEL: test_pminub: 3996 ; SLM: # %bb.0: 3997 ; SLM-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 3998 ; SLM-NEXT: pminub (%rdi), %mm0 # sched: [4:1.00] 3999 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4000 ; SLM-NEXT: retq # sched: [4:1.00] 4001 ; 4002 ; SANDY-LABEL: test_pminub: 4003 ; SANDY: # %bb.0: 4004 ; SANDY-NEXT: pminub %mm1, %mm0 # sched: [3:1.00] 4005 ; SANDY-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00] 4006 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4007 ; SANDY-NEXT: retq # sched: [1:1.00] 4008 ; 4009 ; HASWELL-LABEL: test_pminub: 4010 ; HASWELL: # %bb.0: 4011 ; HASWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 4012 ; HASWELL-NEXT: pminub (%rdi), %mm0 # sched: [6:0.50] 4013 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4014 ; HASWELL-NEXT: retq # sched: [7:1.00] 4015 ; 4016 ; BROADWELL-LABEL: test_pminub: 4017 ; BROADWELL: # %bb.0: 4018 ; BROADWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 4019 ; BROADWELL-NEXT: pminub (%rdi), %mm0 # sched: [6:0.50] 4020 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4021 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4022 ; 4023 ; SKYLAKE-LABEL: test_pminub: 4024 ; SKYLAKE: # %bb.0: 4025 ; SKYLAKE-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] 4026 ; SKYLAKE-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] 4027 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4028 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4029 ; 4030 ; SKX-LABEL: test_pminub: 4031 ; SKX: # %bb.0: 4032 ; SKX-NEXT: pminub %mm1, %mm0 # sched: [1:1.00] 4033 ; SKX-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] 4034 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4035 ; SKX-NEXT: retq # sched: [7:1.00] 4036 ; 4037 ; BTVER2-LABEL: test_pminub: 4038 ; BTVER2: # %bb.0: 4039 ; BTVER2-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] 4040 ; BTVER2-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] 4041 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4042 ; BTVER2-NEXT: retq # sched: [4:1.00] 4043 ; 4044 ; ZNVER1-LABEL: test_pminub: 4045 ; ZNVER1: # %bb.0: 4046 ; ZNVER1-NEXT: pminub %mm1, %mm0 # sched: [1:0.25] 4047 ; ZNVER1-NEXT: pminub (%rdi), %mm0 # sched: [8:0.50] 4048 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4049 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4050 %1 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a0, x86_mmx %a1) 4051 %2 = load x86_mmx, x86_mmx *%a2, align 8 4052 %3 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %1, x86_mmx %2) 4053 %4 = bitcast x86_mmx %3 to i64 4054 ret i64 %4 4055 } 4056 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone 4057 4058 define i32 @test_pmovmskb(x86_mmx %a0) optsize { 4059 ; GENERIC-LABEL: test_pmovmskb: 4060 ; GENERIC: # %bb.0: 4061 ; GENERIC-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4062 ; GENERIC-NEXT: retq # sched: [1:1.00] 4063 ; 4064 ; ATOM-LABEL: test_pmovmskb: 4065 ; ATOM: # %bb.0: 4066 ; ATOM-NEXT: pmovmskb %mm0, %eax # sched: [3:3.00] 4067 ; ATOM-NEXT: retq # sched: [79:39.50] 4068 ; 4069 ; SLM-LABEL: test_pmovmskb: 4070 ; SLM: # %bb.0: 4071 ; SLM-NEXT: pmovmskb %mm0, %eax # sched: [4:1.00] 4072 ; SLM-NEXT: retq # sched: [4:1.00] 4073 ; 4074 ; SANDY-LABEL: test_pmovmskb: 4075 ; SANDY: # %bb.0: 4076 ; SANDY-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4077 ; SANDY-NEXT: retq # sched: [1:1.00] 4078 ; 4079 ; HASWELL-LABEL: test_pmovmskb: 4080 ; HASWELL: # %bb.0: 4081 ; HASWELL-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4082 ; HASWELL-NEXT: retq # sched: [7:1.00] 4083 ; 4084 ; BROADWELL-LABEL: test_pmovmskb: 4085 ; BROADWELL: # %bb.0: 4086 ; BROADWELL-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4087 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4088 ; 4089 ; SKYLAKE-LABEL: test_pmovmskb: 4090 ; SKYLAKE: # %bb.0: 4091 ; SKYLAKE-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00] 4092 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4093 ; 4094 ; SKX-LABEL: test_pmovmskb: 4095 ; SKX: # %bb.0: 4096 ; SKX-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00] 4097 ; SKX-NEXT: retq # sched: [7:1.00] 4098 ; 4099 ; BTVER2-LABEL: test_pmovmskb: 4100 ; BTVER2: # %bb.0: 4101 ; BTVER2-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00] 4102 ; BTVER2-NEXT: retq # sched: [4:1.00] 4103 ; 4104 ; ZNVER1-LABEL: test_pmovmskb: 4105 ; ZNVER1: # %bb.0: 4106 ; ZNVER1-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00] 4107 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4108 %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0) 4109 ret i32 %1 4110 } 4111 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone 4112 4113 define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4114 ; GENERIC-LABEL: test_pmulhrsw: 4115 ; GENERIC: # %bb.0: 4116 ; GENERIC-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] 4117 ; GENERIC-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] 4118 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4119 ; GENERIC-NEXT: retq # sched: [1:1.00] 4120 ; 4121 ; ATOM-LABEL: test_pmulhrsw: 4122 ; ATOM: # %bb.0: 4123 ; ATOM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:4.00] 4124 ; ATOM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [4:4.00] 4125 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4126 ; ATOM-NEXT: retq # sched: [79:39.50] 4127 ; 4128 ; SLM-LABEL: test_pmulhrsw: 4129 ; SLM: # %bb.0: 4130 ; SLM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] 4131 ; SLM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00] 4132 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4133 ; SLM-NEXT: retq # sched: [4:1.00] 4134 ; 4135 ; SANDY-LABEL: test_pmulhrsw: 4136 ; SANDY: # %bb.0: 4137 ; SANDY-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] 4138 ; SANDY-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] 4139 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4140 ; SANDY-NEXT: retq # sched: [1:1.00] 4141 ; 4142 ; HASWELL-LABEL: test_pmulhrsw: 4143 ; HASWELL: # %bb.0: 4144 ; HASWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] 4145 ; HASWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] 4146 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4147 ; HASWELL-NEXT: retq # sched: [7:1.00] 4148 ; 4149 ; BROADWELL-LABEL: test_pmulhrsw: 4150 ; BROADWELL: # %bb.0: 4151 ; BROADWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] 4152 ; BROADWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] 4153 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4154 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4155 ; 4156 ; SKYLAKE-LABEL: test_pmulhrsw: 4157 ; SKYLAKE: # %bb.0: 4158 ; SKYLAKE-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] 4159 ; SKYLAKE-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] 4160 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4161 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4162 ; 4163 ; SKX-LABEL: test_pmulhrsw: 4164 ; SKX: # %bb.0: 4165 ; SKX-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] 4166 ; SKX-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00] 4167 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4168 ; SKX-NEXT: retq # sched: [7:1.00] 4169 ; 4170 ; BTVER2-LABEL: test_pmulhrsw: 4171 ; BTVER2: # %bb.0: 4172 ; BTVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [2:1.00] 4173 ; BTVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00] 4174 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4175 ; BTVER2-NEXT: retq # sched: [4:1.00] 4176 ; 4177 ; ZNVER1-LABEL: test_pmulhrsw: 4178 ; ZNVER1: # %bb.0: 4179 ; ZNVER1-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00] 4180 ; ZNVER1-NEXT: pmulhrsw (%rdi), %mm0 # sched: [11:1.00] 4181 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4182 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4183 %1 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a0, x86_mmx %a1) 4184 %2 = load x86_mmx, x86_mmx *%a2, align 8 4185 %3 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %1, x86_mmx %2) 4186 %4 = bitcast x86_mmx %3 to i64 4187 ret i64 %4 4188 } 4189 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone 4190 4191 define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4192 ; GENERIC-LABEL: test_pmulhw: 4193 ; GENERIC: # %bb.0: 4194 ; GENERIC-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] 4195 ; GENERIC-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] 4196 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4197 ; GENERIC-NEXT: retq # sched: [1:1.00] 4198 ; 4199 ; ATOM-LABEL: test_pmulhw: 4200 ; ATOM: # %bb.0: 4201 ; ATOM-NEXT: pmulhw %mm1, %mm0 # sched: [4:4.00] 4202 ; ATOM-NEXT: pmulhw (%rdi), %mm0 # sched: [4:4.00] 4203 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4204 ; ATOM-NEXT: retq # sched: [79:39.50] 4205 ; 4206 ; SLM-LABEL: test_pmulhw: 4207 ; SLM: # %bb.0: 4208 ; SLM-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] 4209 ; SLM-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00] 4210 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4211 ; SLM-NEXT: retq # sched: [4:1.00] 4212 ; 4213 ; SANDY-LABEL: test_pmulhw: 4214 ; SANDY: # %bb.0: 4215 ; SANDY-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] 4216 ; SANDY-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] 4217 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4218 ; SANDY-NEXT: retq # sched: [1:1.00] 4219 ; 4220 ; HASWELL-LABEL: test_pmulhw: 4221 ; HASWELL: # %bb.0: 4222 ; HASWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] 4223 ; HASWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] 4224 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4225 ; HASWELL-NEXT: retq # sched: [7:1.00] 4226 ; 4227 ; BROADWELL-LABEL: test_pmulhw: 4228 ; BROADWELL: # %bb.0: 4229 ; BROADWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] 4230 ; BROADWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] 4231 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4232 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4233 ; 4234 ; SKYLAKE-LABEL: test_pmulhw: 4235 ; SKYLAKE: # %bb.0: 4236 ; SKYLAKE-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] 4237 ; SKYLAKE-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] 4238 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4239 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4240 ; 4241 ; SKX-LABEL: test_pmulhw: 4242 ; SKX: # %bb.0: 4243 ; SKX-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] 4244 ; SKX-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00] 4245 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4246 ; SKX-NEXT: retq # sched: [7:1.00] 4247 ; 4248 ; BTVER2-LABEL: test_pmulhw: 4249 ; BTVER2: # %bb.0: 4250 ; BTVER2-NEXT: pmulhw %mm1, %mm0 # sched: [2:1.00] 4251 ; BTVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00] 4252 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4253 ; BTVER2-NEXT: retq # sched: [4:1.00] 4254 ; 4255 ; ZNVER1-LABEL: test_pmulhw: 4256 ; ZNVER1: # %bb.0: 4257 ; ZNVER1-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00] 4258 ; ZNVER1-NEXT: pmulhw (%rdi), %mm0 # sched: [11:1.00] 4259 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4260 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4261 %1 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a0, x86_mmx %a1) 4262 %2 = load x86_mmx, x86_mmx *%a2, align 8 4263 %3 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %1, x86_mmx %2) 4264 %4 = bitcast x86_mmx %3 to i64 4265 ret i64 %4 4266 } 4267 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone 4268 4269 define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4270 ; GENERIC-LABEL: test_pmulhuw: 4271 ; GENERIC: # %bb.0: 4272 ; GENERIC-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] 4273 ; GENERIC-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] 4274 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4275 ; GENERIC-NEXT: retq # sched: [1:1.00] 4276 ; 4277 ; ATOM-LABEL: test_pmulhuw: 4278 ; ATOM: # %bb.0: 4279 ; ATOM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:4.00] 4280 ; ATOM-NEXT: pmulhuw (%rdi), %mm0 # sched: [4:4.00] 4281 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4282 ; ATOM-NEXT: retq # sched: [79:39.50] 4283 ; 4284 ; SLM-LABEL: test_pmulhuw: 4285 ; SLM: # %bb.0: 4286 ; SLM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] 4287 ; SLM-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00] 4288 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4289 ; SLM-NEXT: retq # sched: [4:1.00] 4290 ; 4291 ; SANDY-LABEL: test_pmulhuw: 4292 ; SANDY: # %bb.0: 4293 ; SANDY-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] 4294 ; SANDY-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] 4295 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4296 ; SANDY-NEXT: retq # sched: [1:1.00] 4297 ; 4298 ; HASWELL-LABEL: test_pmulhuw: 4299 ; HASWELL: # %bb.0: 4300 ; HASWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] 4301 ; HASWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] 4302 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4303 ; HASWELL-NEXT: retq # sched: [7:1.00] 4304 ; 4305 ; BROADWELL-LABEL: test_pmulhuw: 4306 ; BROADWELL: # %bb.0: 4307 ; BROADWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] 4308 ; BROADWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] 4309 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4310 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4311 ; 4312 ; SKYLAKE-LABEL: test_pmulhuw: 4313 ; SKYLAKE: # %bb.0: 4314 ; SKYLAKE-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] 4315 ; SKYLAKE-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] 4316 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4317 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4318 ; 4319 ; SKX-LABEL: test_pmulhuw: 4320 ; SKX: # %bb.0: 4321 ; SKX-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] 4322 ; SKX-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00] 4323 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4324 ; SKX-NEXT: retq # sched: [7:1.00] 4325 ; 4326 ; BTVER2-LABEL: test_pmulhuw: 4327 ; BTVER2: # %bb.0: 4328 ; BTVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [2:1.00] 4329 ; BTVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00] 4330 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4331 ; BTVER2-NEXT: retq # sched: [4:1.00] 4332 ; 4333 ; ZNVER1-LABEL: test_pmulhuw: 4334 ; ZNVER1: # %bb.0: 4335 ; ZNVER1-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00] 4336 ; ZNVER1-NEXT: pmulhuw (%rdi), %mm0 # sched: [11:1.00] 4337 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4338 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4339 %1 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a0, x86_mmx %a1) 4340 %2 = load x86_mmx, x86_mmx *%a2, align 8 4341 %3 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %1, x86_mmx %2) 4342 %4 = bitcast x86_mmx %3 to i64 4343 ret i64 %4 4344 } 4345 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone 4346 4347 define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4348 ; GENERIC-LABEL: test_pmullw: 4349 ; GENERIC: # %bb.0: 4350 ; GENERIC-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] 4351 ; GENERIC-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] 4352 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4353 ; GENERIC-NEXT: retq # sched: [1:1.00] 4354 ; 4355 ; ATOM-LABEL: test_pmullw: 4356 ; ATOM: # %bb.0: 4357 ; ATOM-NEXT: pmullw %mm1, %mm0 # sched: [4:4.00] 4358 ; ATOM-NEXT: pmullw (%rdi), %mm0 # sched: [4:4.00] 4359 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4360 ; ATOM-NEXT: retq # sched: [79:39.50] 4361 ; 4362 ; SLM-LABEL: test_pmullw: 4363 ; SLM: # %bb.0: 4364 ; SLM-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] 4365 ; SLM-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00] 4366 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4367 ; SLM-NEXT: retq # sched: [4:1.00] 4368 ; 4369 ; SANDY-LABEL: test_pmullw: 4370 ; SANDY: # %bb.0: 4371 ; SANDY-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] 4372 ; SANDY-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] 4373 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4374 ; SANDY-NEXT: retq # sched: [1:1.00] 4375 ; 4376 ; HASWELL-LABEL: test_pmullw: 4377 ; HASWELL: # %bb.0: 4378 ; HASWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] 4379 ; HASWELL-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] 4380 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4381 ; HASWELL-NEXT: retq # sched: [7:1.00] 4382 ; 4383 ; BROADWELL-LABEL: test_pmullw: 4384 ; BROADWELL: # %bb.0: 4385 ; BROADWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] 4386 ; BROADWELL-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] 4387 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4388 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4389 ; 4390 ; SKYLAKE-LABEL: test_pmullw: 4391 ; SKYLAKE: # %bb.0: 4392 ; SKYLAKE-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] 4393 ; SKYLAKE-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] 4394 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4395 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4396 ; 4397 ; SKX-LABEL: test_pmullw: 4398 ; SKX: # %bb.0: 4399 ; SKX-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] 4400 ; SKX-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00] 4401 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4402 ; SKX-NEXT: retq # sched: [7:1.00] 4403 ; 4404 ; BTVER2-LABEL: test_pmullw: 4405 ; BTVER2: # %bb.0: 4406 ; BTVER2-NEXT: pmullw %mm1, %mm0 # sched: [2:1.00] 4407 ; BTVER2-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00] 4408 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4409 ; BTVER2-NEXT: retq # sched: [4:1.00] 4410 ; 4411 ; ZNVER1-LABEL: test_pmullw: 4412 ; ZNVER1: # %bb.0: 4413 ; ZNVER1-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00] 4414 ; ZNVER1-NEXT: pmullw (%rdi), %mm0 # sched: [11:1.00] 4415 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4416 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4417 %1 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a0, x86_mmx %a1) 4418 %2 = load x86_mmx, x86_mmx *%a2, align 8 4419 %3 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %1, x86_mmx %2) 4420 %4 = bitcast x86_mmx %3 to i64 4421 ret i64 %4 4422 } 4423 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone 4424 4425 define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4426 ; GENERIC-LABEL: test_pmuludq: 4427 ; GENERIC: # %bb.0: 4428 ; GENERIC-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] 4429 ; GENERIC-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] 4430 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4431 ; GENERIC-NEXT: retq # sched: [1:1.00] 4432 ; 4433 ; ATOM-LABEL: test_pmuludq: 4434 ; ATOM: # %bb.0: 4435 ; ATOM-NEXT: pmuludq %mm1, %mm0 # sched: [4:4.00] 4436 ; ATOM-NEXT: pmuludq (%rdi), %mm0 # sched: [4:4.00] 4437 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4438 ; ATOM-NEXT: retq # sched: [79:39.50] 4439 ; 4440 ; SLM-LABEL: test_pmuludq: 4441 ; SLM: # %bb.0: 4442 ; SLM-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] 4443 ; SLM-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00] 4444 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4445 ; SLM-NEXT: retq # sched: [4:1.00] 4446 ; 4447 ; SANDY-LABEL: test_pmuludq: 4448 ; SANDY: # %bb.0: 4449 ; SANDY-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] 4450 ; SANDY-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] 4451 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4452 ; SANDY-NEXT: retq # sched: [1:1.00] 4453 ; 4454 ; HASWELL-LABEL: test_pmuludq: 4455 ; HASWELL: # %bb.0: 4456 ; HASWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] 4457 ; HASWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] 4458 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4459 ; HASWELL-NEXT: retq # sched: [7:1.00] 4460 ; 4461 ; BROADWELL-LABEL: test_pmuludq: 4462 ; BROADWELL: # %bb.0: 4463 ; BROADWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] 4464 ; BROADWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] 4465 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4466 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4467 ; 4468 ; SKYLAKE-LABEL: test_pmuludq: 4469 ; SKYLAKE: # %bb.0: 4470 ; SKYLAKE-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] 4471 ; SKYLAKE-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] 4472 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4473 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4474 ; 4475 ; SKX-LABEL: test_pmuludq: 4476 ; SKX: # %bb.0: 4477 ; SKX-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] 4478 ; SKX-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00] 4479 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4480 ; SKX-NEXT: retq # sched: [7:1.00] 4481 ; 4482 ; BTVER2-LABEL: test_pmuludq: 4483 ; BTVER2: # %bb.0: 4484 ; BTVER2-NEXT: pmuludq %mm1, %mm0 # sched: [2:1.00] 4485 ; BTVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00] 4486 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4487 ; BTVER2-NEXT: retq # sched: [4:1.00] 4488 ; 4489 ; ZNVER1-LABEL: test_pmuludq: 4490 ; ZNVER1: # %bb.0: 4491 ; ZNVER1-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00] 4492 ; ZNVER1-NEXT: pmuludq (%rdi), %mm0 # sched: [11:1.00] 4493 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4494 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4495 %1 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a0, x86_mmx %a1) 4496 %2 = load x86_mmx, x86_mmx *%a2, align 8 4497 %3 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %1, x86_mmx %2) 4498 %4 = bitcast x86_mmx %3 to i64 4499 ret i64 %4 4500 } 4501 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone 4502 4503 define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4504 ; GENERIC-LABEL: test_por: 4505 ; GENERIC: # %bb.0: 4506 ; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] 4507 ; GENERIC-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4508 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4509 ; GENERIC-NEXT: retq # sched: [1:1.00] 4510 ; 4511 ; ATOM-LABEL: test_por: 4512 ; ATOM: # %bb.0: 4513 ; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4514 ; ATOM-NEXT: por (%rdi), %mm0 # sched: [1:1.00] 4515 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4516 ; ATOM-NEXT: retq # sched: [79:39.50] 4517 ; 4518 ; SLM-LABEL: test_por: 4519 ; SLM: # %bb.0: 4520 ; SLM-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4521 ; SLM-NEXT: por (%rdi), %mm0 # sched: [4:1.00] 4522 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4523 ; SLM-NEXT: retq # sched: [4:1.00] 4524 ; 4525 ; SANDY-LABEL: test_por: 4526 ; SANDY: # %bb.0: 4527 ; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] 4528 ; SANDY-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4529 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4530 ; SANDY-NEXT: retq # sched: [1:1.00] 4531 ; 4532 ; HASWELL-LABEL: test_por: 4533 ; HASWELL: # %bb.0: 4534 ; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] 4535 ; HASWELL-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4536 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4537 ; HASWELL-NEXT: retq # sched: [7:1.00] 4538 ; 4539 ; BROADWELL-LABEL: test_por: 4540 ; BROADWELL: # %bb.0: 4541 ; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33] 4542 ; BROADWELL-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4543 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4544 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4545 ; 4546 ; SKYLAKE-LABEL: test_por: 4547 ; SKYLAKE: # %bb.0: 4548 ; SKYLAKE-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4549 ; SKYLAKE-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4550 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4551 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4552 ; 4553 ; SKX-LABEL: test_por: 4554 ; SKX: # %bb.0: 4555 ; SKX-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4556 ; SKX-NEXT: por (%rdi), %mm0 # sched: [6:0.50] 4557 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4558 ; SKX-NEXT: retq # sched: [7:1.00] 4559 ; 4560 ; BTVER2-LABEL: test_por: 4561 ; BTVER2: # %bb.0: 4562 ; BTVER2-NEXT: por %mm1, %mm0 # sched: [1:0.50] 4563 ; BTVER2-NEXT: por (%rdi), %mm0 # sched: [6:1.00] 4564 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4565 ; BTVER2-NEXT: retq # sched: [4:1.00] 4566 ; 4567 ; ZNVER1-LABEL: test_por: 4568 ; ZNVER1: # %bb.0: 4569 ; ZNVER1-NEXT: por %mm1, %mm0 # sched: [1:0.25] 4570 ; ZNVER1-NEXT: por (%rdi), %mm0 # sched: [8:0.50] 4571 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4572 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4573 %1 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a0, x86_mmx %a1) 4574 %2 = load x86_mmx, x86_mmx *%a2, align 8 4575 %3 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %2) 4576 %4 = bitcast x86_mmx %3 to i64 4577 ret i64 %4 4578 } 4579 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone 4580 4581 define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4582 ; GENERIC-LABEL: test_psadbw: 4583 ; GENERIC: # %bb.0: 4584 ; GENERIC-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] 4585 ; GENERIC-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4586 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4587 ; GENERIC-NEXT: retq # sched: [1:1.00] 4588 ; 4589 ; ATOM-LABEL: test_psadbw: 4590 ; ATOM: # %bb.0: 4591 ; ATOM-NEXT: psadbw %mm1, %mm0 # sched: [4:2.00] 4592 ; ATOM-NEXT: psadbw (%rdi), %mm0 # sched: [4:2.00] 4593 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4594 ; ATOM-NEXT: retq # sched: [79:39.50] 4595 ; 4596 ; SLM-LABEL: test_psadbw: 4597 ; SLM: # %bb.0: 4598 ; SLM-NEXT: psadbw %mm1, %mm0 # sched: [4:1.00] 4599 ; SLM-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00] 4600 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4601 ; SLM-NEXT: retq # sched: [4:1.00] 4602 ; 4603 ; SANDY-LABEL: test_psadbw: 4604 ; SANDY: # %bb.0: 4605 ; SANDY-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] 4606 ; SANDY-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4607 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4608 ; SANDY-NEXT: retq # sched: [1:1.00] 4609 ; 4610 ; HASWELL-LABEL: test_psadbw: 4611 ; HASWELL: # %bb.0: 4612 ; HASWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] 4613 ; HASWELL-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4614 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4615 ; HASWELL-NEXT: retq # sched: [7:1.00] 4616 ; 4617 ; BROADWELL-LABEL: test_psadbw: 4618 ; BROADWELL: # %bb.0: 4619 ; BROADWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] 4620 ; BROADWELL-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4621 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4622 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4623 ; 4624 ; SKYLAKE-LABEL: test_psadbw: 4625 ; SKYLAKE: # %bb.0: 4626 ; SKYLAKE-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] 4627 ; SKYLAKE-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00] 4628 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4629 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4630 ; 4631 ; SKX-LABEL: test_psadbw: 4632 ; SKX: # %bb.0: 4633 ; SKX-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] 4634 ; SKX-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00] 4635 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4636 ; SKX-NEXT: retq # sched: [7:1.00] 4637 ; 4638 ; BTVER2-LABEL: test_psadbw: 4639 ; BTVER2: # %bb.0: 4640 ; BTVER2-NEXT: psadbw %mm1, %mm0 # sched: [2:0.50] 4641 ; BTVER2-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00] 4642 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4643 ; BTVER2-NEXT: retq # sched: [4:1.00] 4644 ; 4645 ; ZNVER1-LABEL: test_psadbw: 4646 ; ZNVER1: # %bb.0: 4647 ; ZNVER1-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00] 4648 ; ZNVER1-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] 4649 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4650 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4651 %1 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a0, x86_mmx %a1) 4652 %2 = load x86_mmx, x86_mmx *%a2, align 8 4653 %3 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %1, x86_mmx %2) 4654 %4 = bitcast x86_mmx %3 to i64 4655 ret i64 %4 4656 } 4657 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone 4658 4659 define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize { 4660 ; GENERIC-LABEL: test_pshufb: 4661 ; GENERIC: # %bb.0: 4662 ; GENERIC-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] 4663 ; GENERIC-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] 4664 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4665 ; GENERIC-NEXT: retq # sched: [1:1.00] 4666 ; 4667 ; ATOM-LABEL: test_pshufb: 4668 ; ATOM: # %bb.0: 4669 ; ATOM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4670 ; ATOM-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00] 4671 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4672 ; ATOM-NEXT: retq # sched: [79:39.50] 4673 ; 4674 ; SLM-LABEL: test_pshufb: 4675 ; SLM: # %bb.0: 4676 ; SLM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4677 ; SLM-NEXT: pshufb (%rdi), %mm0 # sched: [4:1.00] 4678 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4679 ; SLM-NEXT: retq # sched: [4:1.00] 4680 ; 4681 ; SANDY-LABEL: test_pshufb: 4682 ; SANDY: # %bb.0: 4683 ; SANDY-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] 4684 ; SANDY-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] 4685 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4686 ; SANDY-NEXT: retq # sched: [1:1.00] 4687 ; 4688 ; HASWELL-LABEL: test_pshufb: 4689 ; HASWELL: # %bb.0: 4690 ; HASWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4691 ; HASWELL-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] 4692 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4693 ; HASWELL-NEXT: retq # sched: [7:1.00] 4694 ; 4695 ; BROADWELL-LABEL: test_pshufb: 4696 ; BROADWELL: # %bb.0: 4697 ; BROADWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4698 ; BROADWELL-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] 4699 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4700 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4701 ; 4702 ; SKYLAKE-LABEL: test_pshufb: 4703 ; SKYLAKE: # %bb.0: 4704 ; SKYLAKE-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4705 ; SKYLAKE-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] 4706 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4707 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4708 ; 4709 ; SKX-LABEL: test_pshufb: 4710 ; SKX: # %bb.0: 4711 ; SKX-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00] 4712 ; SKX-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00] 4713 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4714 ; SKX-NEXT: retq # sched: [7:1.00] 4715 ; 4716 ; BTVER2-LABEL: test_pshufb: 4717 ; BTVER2: # %bb.0: 4718 ; BTVER2-NEXT: pshufb %mm1, %mm0 # sched: [2:2.00] 4719 ; BTVER2-NEXT: pshufb (%rdi), %mm0 # sched: [7:2.00] 4720 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4721 ; BTVER2-NEXT: retq # sched: [4:1.00] 4722 ; 4723 ; ZNVER1-LABEL: test_pshufb: 4724 ; ZNVER1: # %bb.0: 4725 ; ZNVER1-NEXT: pshufb %mm1, %mm0 # sched: [1:0.25] 4726 ; ZNVER1-NEXT: pshufb (%rdi), %mm0 # sched: [8:0.50] 4727 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4728 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4729 %1 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a0, x86_mmx %a1) 4730 %2 = load x86_mmx, x86_mmx *%a2, align 8 4731 %3 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %1, x86_mmx %2) 4732 %4 = bitcast x86_mmx %3 to i64 4733 ret i64 %4 4734 } 4735 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone 4736 4737 define i64 @test_pshufw(x86_mmx *%a0) optsize { 4738 ; GENERIC-LABEL: test_pshufw: 4739 ; GENERIC: # %bb.0: 4740 ; GENERIC-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4741 ; GENERIC-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4742 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4743 ; GENERIC-NEXT: retq # sched: [1:1.00] 4744 ; 4745 ; ATOM-LABEL: test_pshufw: 4746 ; ATOM: # %bb.0: 4747 ; ATOM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00] 4748 ; ATOM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4749 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4750 ; ATOM-NEXT: retq # sched: [79:39.50] 4751 ; 4752 ; SLM-LABEL: test_pshufw: 4753 ; SLM: # %bb.0: 4754 ; SLM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [4:1.00] 4755 ; SLM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4756 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4757 ; SLM-NEXT: retq # sched: [4:1.00] 4758 ; 4759 ; SANDY-LABEL: test_pshufw: 4760 ; SANDY: # %bb.0: 4761 ; SANDY-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4762 ; SANDY-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4763 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4764 ; SANDY-NEXT: retq # sched: [1:1.00] 4765 ; 4766 ; HASWELL-LABEL: test_pshufw: 4767 ; HASWELL: # %bb.0: 4768 ; HASWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4769 ; HASWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4770 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4771 ; HASWELL-NEXT: retq # sched: [7:1.00] 4772 ; 4773 ; BROADWELL-LABEL: test_pshufw: 4774 ; BROADWELL: # %bb.0: 4775 ; BROADWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4776 ; BROADWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4777 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4778 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4779 ; 4780 ; SKYLAKE-LABEL: test_pshufw: 4781 ; SKYLAKE: # %bb.0: 4782 ; SKYLAKE-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4783 ; SKYLAKE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4784 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4785 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4786 ; 4787 ; SKX-LABEL: test_pshufw: 4788 ; SKX: # %bb.0: 4789 ; SKX-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4790 ; SKX-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] 4791 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4792 ; SKX-NEXT: retq # sched: [7:1.00] 4793 ; 4794 ; BTVER2-LABEL: test_pshufw: 4795 ; BTVER2: # %bb.0: 4796 ; BTVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] 4797 ; BTVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50] 4798 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4799 ; BTVER2-NEXT: retq # sched: [4:1.00] 4800 ; 4801 ; ZNVER1-LABEL: test_pshufw: 4802 ; ZNVER1: # %bb.0: 4803 ; ZNVER1-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [8:0.50] 4804 ; ZNVER1-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.25] 4805 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4806 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4807 %1 = load x86_mmx, x86_mmx *%a0, align 8 4808 %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0) 4809 %3 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %2, i8 0) 4810 %4 = bitcast x86_mmx %3 to i64 4811 ret i64 %4 4812 } 4813 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone 4814 4815 define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4816 ; GENERIC-LABEL: test_psignb: 4817 ; GENERIC: # %bb.0: 4818 ; GENERIC-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4819 ; GENERIC-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4820 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4821 ; GENERIC-NEXT: retq # sched: [1:1.00] 4822 ; 4823 ; ATOM-LABEL: test_psignb: 4824 ; ATOM: # %bb.0: 4825 ; ATOM-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4826 ; ATOM-NEXT: psignb (%rdi), %mm0 # sched: [1:1.00] 4827 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4828 ; ATOM-NEXT: retq # sched: [79:39.50] 4829 ; 4830 ; SLM-LABEL: test_psignb: 4831 ; SLM: # %bb.0: 4832 ; SLM-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4833 ; SLM-NEXT: psignb (%rdi), %mm0 # sched: [4:1.00] 4834 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4835 ; SLM-NEXT: retq # sched: [4:1.00] 4836 ; 4837 ; SANDY-LABEL: test_psignb: 4838 ; SANDY: # %bb.0: 4839 ; SANDY-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4840 ; SANDY-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4841 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4842 ; SANDY-NEXT: retq # sched: [1:1.00] 4843 ; 4844 ; HASWELL-LABEL: test_psignb: 4845 ; HASWELL: # %bb.0: 4846 ; HASWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4847 ; HASWELL-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4848 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4849 ; HASWELL-NEXT: retq # sched: [7:1.00] 4850 ; 4851 ; BROADWELL-LABEL: test_psignb: 4852 ; BROADWELL: # %bb.0: 4853 ; BROADWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4854 ; BROADWELL-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4855 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4856 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4857 ; 4858 ; SKYLAKE-LABEL: test_psignb: 4859 ; SKYLAKE: # %bb.0: 4860 ; SKYLAKE-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4861 ; SKYLAKE-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4862 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4863 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4864 ; 4865 ; SKX-LABEL: test_psignb: 4866 ; SKX: # %bb.0: 4867 ; SKX-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4868 ; SKX-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] 4869 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4870 ; SKX-NEXT: retq # sched: [7:1.00] 4871 ; 4872 ; BTVER2-LABEL: test_psignb: 4873 ; BTVER2: # %bb.0: 4874 ; BTVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] 4875 ; BTVER2-NEXT: psignb (%rdi), %mm0 # sched: [6:1.00] 4876 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4877 ; BTVER2-NEXT: retq # sched: [4:1.00] 4878 ; 4879 ; ZNVER1-LABEL: test_psignb: 4880 ; ZNVER1: # %bb.0: 4881 ; ZNVER1-NEXT: psignb %mm1, %mm0 # sched: [1:0.25] 4882 ; ZNVER1-NEXT: psignb (%rdi), %mm0 # sched: [8:0.50] 4883 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4884 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4885 %1 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) 4886 %2 = load x86_mmx, x86_mmx *%a2, align 8 4887 %3 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %1, x86_mmx %2) 4888 %4 = bitcast x86_mmx %3 to i64 4889 ret i64 %4 4890 } 4891 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone 4892 4893 define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4894 ; GENERIC-LABEL: test_psignd: 4895 ; GENERIC: # %bb.0: 4896 ; GENERIC-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4897 ; GENERIC-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4898 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4899 ; GENERIC-NEXT: retq # sched: [1:1.00] 4900 ; 4901 ; ATOM-LABEL: test_psignd: 4902 ; ATOM: # %bb.0: 4903 ; ATOM-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4904 ; ATOM-NEXT: psignd (%rdi), %mm0 # sched: [1:1.00] 4905 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4906 ; ATOM-NEXT: retq # sched: [79:39.50] 4907 ; 4908 ; SLM-LABEL: test_psignd: 4909 ; SLM: # %bb.0: 4910 ; SLM-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4911 ; SLM-NEXT: psignd (%rdi), %mm0 # sched: [4:1.00] 4912 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4913 ; SLM-NEXT: retq # sched: [4:1.00] 4914 ; 4915 ; SANDY-LABEL: test_psignd: 4916 ; SANDY: # %bb.0: 4917 ; SANDY-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4918 ; SANDY-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4919 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4920 ; SANDY-NEXT: retq # sched: [1:1.00] 4921 ; 4922 ; HASWELL-LABEL: test_psignd: 4923 ; HASWELL: # %bb.0: 4924 ; HASWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4925 ; HASWELL-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4926 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4927 ; HASWELL-NEXT: retq # sched: [7:1.00] 4928 ; 4929 ; BROADWELL-LABEL: test_psignd: 4930 ; BROADWELL: # %bb.0: 4931 ; BROADWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4932 ; BROADWELL-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4933 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 4934 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4935 ; 4936 ; SKYLAKE-LABEL: test_psignd: 4937 ; SKYLAKE: # %bb.0: 4938 ; SKYLAKE-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4939 ; SKYLAKE-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4940 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 4941 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4942 ; 4943 ; SKX-LABEL: test_psignd: 4944 ; SKX: # %bb.0: 4945 ; SKX-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4946 ; SKX-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] 4947 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 4948 ; SKX-NEXT: retq # sched: [7:1.00] 4949 ; 4950 ; BTVER2-LABEL: test_psignd: 4951 ; BTVER2: # %bb.0: 4952 ; BTVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] 4953 ; BTVER2-NEXT: psignd (%rdi), %mm0 # sched: [6:1.00] 4954 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 4955 ; BTVER2-NEXT: retq # sched: [4:1.00] 4956 ; 4957 ; ZNVER1-LABEL: test_psignd: 4958 ; ZNVER1: # %bb.0: 4959 ; ZNVER1-NEXT: psignd %mm1, %mm0 # sched: [1:0.25] 4960 ; ZNVER1-NEXT: psignd (%rdi), %mm0 # sched: [8:0.50] 4961 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 4962 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4963 %1 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) 4964 %2 = load x86_mmx, x86_mmx *%a2, align 8 4965 %3 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %1, x86_mmx %2) 4966 %4 = bitcast x86_mmx %3 to i64 4967 ret i64 %4 4968 } 4969 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone 4970 4971 define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 4972 ; GENERIC-LABEL: test_psignw: 4973 ; GENERIC: # %bb.0: 4974 ; GENERIC-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 4975 ; GENERIC-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 4976 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 4977 ; GENERIC-NEXT: retq # sched: [1:1.00] 4978 ; 4979 ; ATOM-LABEL: test_psignw: 4980 ; ATOM: # %bb.0: 4981 ; ATOM-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 4982 ; ATOM-NEXT: psignw (%rdi), %mm0 # sched: [1:1.00] 4983 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 4984 ; ATOM-NEXT: retq # sched: [79:39.50] 4985 ; 4986 ; SLM-LABEL: test_psignw: 4987 ; SLM: # %bb.0: 4988 ; SLM-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 4989 ; SLM-NEXT: psignw (%rdi), %mm0 # sched: [4:1.00] 4990 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 4991 ; SLM-NEXT: retq # sched: [4:1.00] 4992 ; 4993 ; SANDY-LABEL: test_psignw: 4994 ; SANDY: # %bb.0: 4995 ; SANDY-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 4996 ; SANDY-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 4997 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 4998 ; SANDY-NEXT: retq # sched: [1:1.00] 4999 ; 5000 ; HASWELL-LABEL: test_psignw: 5001 ; HASWELL: # %bb.0: 5002 ; HASWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5003 ; HASWELL-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 5004 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5005 ; HASWELL-NEXT: retq # sched: [7:1.00] 5006 ; 5007 ; BROADWELL-LABEL: test_psignw: 5008 ; BROADWELL: # %bb.0: 5009 ; BROADWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5010 ; BROADWELL-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 5011 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5012 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5013 ; 5014 ; SKYLAKE-LABEL: test_psignw: 5015 ; SKYLAKE: # %bb.0: 5016 ; SKYLAKE-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5017 ; SKYLAKE-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 5018 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5019 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5020 ; 5021 ; SKX-LABEL: test_psignw: 5022 ; SKX: # %bb.0: 5023 ; SKX-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5024 ; SKX-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] 5025 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5026 ; SKX-NEXT: retq # sched: [7:1.00] 5027 ; 5028 ; BTVER2-LABEL: test_psignw: 5029 ; BTVER2: # %bb.0: 5030 ; BTVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] 5031 ; BTVER2-NEXT: psignw (%rdi), %mm0 # sched: [6:1.00] 5032 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5033 ; BTVER2-NEXT: retq # sched: [4:1.00] 5034 ; 5035 ; ZNVER1-LABEL: test_psignw: 5036 ; ZNVER1: # %bb.0: 5037 ; ZNVER1-NEXT: psignw %mm1, %mm0 # sched: [1:0.25] 5038 ; ZNVER1-NEXT: psignw (%rdi), %mm0 # sched: [8:0.50] 5039 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5040 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5041 %1 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) 5042 %2 = load x86_mmx, x86_mmx *%a2, align 8 5043 %3 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %1, x86_mmx %2) 5044 %4 = bitcast x86_mmx %3 to i64 5045 ret i64 %4 5046 } 5047 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone 5048 5049 define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5050 ; GENERIC-LABEL: test_pslld: 5051 ; GENERIC: # %bb.0: 5052 ; GENERIC-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5053 ; GENERIC-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5054 ; GENERIC-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5055 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5056 ; GENERIC-NEXT: retq # sched: [1:1.00] 5057 ; 5058 ; ATOM-LABEL: test_pslld: 5059 ; ATOM: # %bb.0: 5060 ; ATOM-NEXT: pslld %mm1, %mm0 # sched: [2:1.00] 5061 ; ATOM-NEXT: pslld (%rdi), %mm0 # sched: [3:1.50] 5062 ; ATOM-NEXT: pslld $7, %mm0 # sched: [1:0.50] 5063 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5064 ; ATOM-NEXT: retq # sched: [79:39.50] 5065 ; 5066 ; SLM-LABEL: test_pslld: 5067 ; SLM: # %bb.0: 5068 ; SLM-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5069 ; SLM-NEXT: pslld (%rdi), %mm0 # sched: [4:1.00] 5070 ; SLM-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5071 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5072 ; SLM-NEXT: retq # sched: [4:1.00] 5073 ; 5074 ; SANDY-LABEL: test_pslld: 5075 ; SANDY: # %bb.0: 5076 ; SANDY-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5077 ; SANDY-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5078 ; SANDY-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5079 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5080 ; SANDY-NEXT: retq # sched: [1:1.00] 5081 ; 5082 ; HASWELL-LABEL: test_pslld: 5083 ; HASWELL: # %bb.0: 5084 ; HASWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5085 ; HASWELL-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5086 ; HASWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5087 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5088 ; HASWELL-NEXT: retq # sched: [7:1.00] 5089 ; 5090 ; BROADWELL-LABEL: test_pslld: 5091 ; BROADWELL: # %bb.0: 5092 ; BROADWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5093 ; BROADWELL-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5094 ; BROADWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5095 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5096 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5097 ; 5098 ; SKYLAKE-LABEL: test_pslld: 5099 ; SKYLAKE: # %bb.0: 5100 ; SKYLAKE-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5101 ; SKYLAKE-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5102 ; SKYLAKE-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5103 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5104 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5105 ; 5106 ; SKX-LABEL: test_pslld: 5107 ; SKX: # %bb.0: 5108 ; SKX-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] 5109 ; SKX-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5110 ; SKX-NEXT: pslld $7, %mm0 # sched: [1:1.00] 5111 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5112 ; SKX-NEXT: retq # sched: [7:1.00] 5113 ; 5114 ; BTVER2-LABEL: test_pslld: 5115 ; BTVER2: # %bb.0: 5116 ; BTVER2-NEXT: pslld %mm1, %mm0 # sched: [1:0.50] 5117 ; BTVER2-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] 5118 ; BTVER2-NEXT: pslld $7, %mm0 # sched: [1:0.50] 5119 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5120 ; BTVER2-NEXT: retq # sched: [4:1.00] 5121 ; 5122 ; ZNVER1-LABEL: test_pslld: 5123 ; ZNVER1: # %bb.0: 5124 ; ZNVER1-NEXT: pslld %mm1, %mm0 # sched: [1:0.25] 5125 ; ZNVER1-NEXT: pslld (%rdi), %mm0 # sched: [8:0.50] 5126 ; ZNVER1-NEXT: pslld $7, %mm0 # sched: [1:0.25] 5127 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5128 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5129 %1 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a0, x86_mmx %a1) 5130 %2 = load x86_mmx, x86_mmx *%a2, align 8 5131 %3 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %1, x86_mmx %2) 5132 %4 = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %3, i32 7) 5133 %5 = bitcast x86_mmx %4 to i64 5134 ret i64 %5 5135 } 5136 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone 5137 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone 5138 5139 define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5140 ; GENERIC-LABEL: test_psllq: 5141 ; GENERIC: # %bb.0: 5142 ; GENERIC-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5143 ; GENERIC-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5144 ; GENERIC-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5145 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5146 ; GENERIC-NEXT: retq # sched: [1:1.00] 5147 ; 5148 ; ATOM-LABEL: test_psllq: 5149 ; ATOM: # %bb.0: 5150 ; ATOM-NEXT: psllq %mm1, %mm0 # sched: [2:1.00] 5151 ; ATOM-NEXT: psllq (%rdi), %mm0 # sched: [3:1.50] 5152 ; ATOM-NEXT: psllq $7, %mm0 # sched: [1:0.50] 5153 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5154 ; ATOM-NEXT: retq # sched: [79:39.50] 5155 ; 5156 ; SLM-LABEL: test_psllq: 5157 ; SLM: # %bb.0: 5158 ; SLM-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5159 ; SLM-NEXT: psllq (%rdi), %mm0 # sched: [4:1.00] 5160 ; SLM-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5161 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5162 ; SLM-NEXT: retq # sched: [4:1.00] 5163 ; 5164 ; SANDY-LABEL: test_psllq: 5165 ; SANDY: # %bb.0: 5166 ; SANDY-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5167 ; SANDY-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5168 ; SANDY-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5169 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5170 ; SANDY-NEXT: retq # sched: [1:1.00] 5171 ; 5172 ; HASWELL-LABEL: test_psllq: 5173 ; HASWELL: # %bb.0: 5174 ; HASWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5175 ; HASWELL-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5176 ; HASWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5177 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5178 ; HASWELL-NEXT: retq # sched: [7:1.00] 5179 ; 5180 ; BROADWELL-LABEL: test_psllq: 5181 ; BROADWELL: # %bb.0: 5182 ; BROADWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5183 ; BROADWELL-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5184 ; BROADWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5185 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5186 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5187 ; 5188 ; SKYLAKE-LABEL: test_psllq: 5189 ; SKYLAKE: # %bb.0: 5190 ; SKYLAKE-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5191 ; SKYLAKE-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5192 ; SKYLAKE-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5193 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5194 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5195 ; 5196 ; SKX-LABEL: test_psllq: 5197 ; SKX: # %bb.0: 5198 ; SKX-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] 5199 ; SKX-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5200 ; SKX-NEXT: psllq $7, %mm0 # sched: [1:1.00] 5201 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5202 ; SKX-NEXT: retq # sched: [7:1.00] 5203 ; 5204 ; BTVER2-LABEL: test_psllq: 5205 ; BTVER2: # %bb.0: 5206 ; BTVER2-NEXT: psllq %mm1, %mm0 # sched: [1:0.50] 5207 ; BTVER2-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] 5208 ; BTVER2-NEXT: psllq $7, %mm0 # sched: [1:0.50] 5209 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5210 ; BTVER2-NEXT: retq # sched: [4:1.00] 5211 ; 5212 ; ZNVER1-LABEL: test_psllq: 5213 ; ZNVER1: # %bb.0: 5214 ; ZNVER1-NEXT: psllq %mm1, %mm0 # sched: [1:0.25] 5215 ; ZNVER1-NEXT: psllq (%rdi), %mm0 # sched: [8:0.50] 5216 ; ZNVER1-NEXT: psllq $7, %mm0 # sched: [1:0.25] 5217 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5218 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5219 %1 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a0, x86_mmx %a1) 5220 %2 = load x86_mmx, x86_mmx *%a2, align 8 5221 %3 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %1, x86_mmx %2) 5222 %4 = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %3, i32 7) 5223 %5 = bitcast x86_mmx %4 to i64 5224 ret i64 %5 5225 } 5226 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone 5227 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 5228 5229 define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5230 ; GENERIC-LABEL: test_psllw: 5231 ; GENERIC: # %bb.0: 5232 ; GENERIC-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5233 ; GENERIC-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5234 ; GENERIC-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5235 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5236 ; GENERIC-NEXT: retq # sched: [1:1.00] 5237 ; 5238 ; ATOM-LABEL: test_psllw: 5239 ; ATOM: # %bb.0: 5240 ; ATOM-NEXT: psllw %mm1, %mm0 # sched: [2:1.00] 5241 ; ATOM-NEXT: psllw (%rdi), %mm0 # sched: [3:1.50] 5242 ; ATOM-NEXT: psllw $7, %mm0 # sched: [1:0.50] 5243 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5244 ; ATOM-NEXT: retq # sched: [79:39.50] 5245 ; 5246 ; SLM-LABEL: test_psllw: 5247 ; SLM: # %bb.0: 5248 ; SLM-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5249 ; SLM-NEXT: psllw (%rdi), %mm0 # sched: [4:1.00] 5250 ; SLM-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5251 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5252 ; SLM-NEXT: retq # sched: [4:1.00] 5253 ; 5254 ; SANDY-LABEL: test_psllw: 5255 ; SANDY: # %bb.0: 5256 ; SANDY-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5257 ; SANDY-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5258 ; SANDY-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5259 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5260 ; SANDY-NEXT: retq # sched: [1:1.00] 5261 ; 5262 ; HASWELL-LABEL: test_psllw: 5263 ; HASWELL: # %bb.0: 5264 ; HASWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5265 ; HASWELL-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5266 ; HASWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5267 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5268 ; HASWELL-NEXT: retq # sched: [7:1.00] 5269 ; 5270 ; BROADWELL-LABEL: test_psllw: 5271 ; BROADWELL: # %bb.0: 5272 ; BROADWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5273 ; BROADWELL-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5274 ; BROADWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5275 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5276 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5277 ; 5278 ; SKYLAKE-LABEL: test_psllw: 5279 ; SKYLAKE: # %bb.0: 5280 ; SKYLAKE-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5281 ; SKYLAKE-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5282 ; SKYLAKE-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5283 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5284 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5285 ; 5286 ; SKX-LABEL: test_psllw: 5287 ; SKX: # %bb.0: 5288 ; SKX-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] 5289 ; SKX-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5290 ; SKX-NEXT: psllw $7, %mm0 # sched: [1:1.00] 5291 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5292 ; SKX-NEXT: retq # sched: [7:1.00] 5293 ; 5294 ; BTVER2-LABEL: test_psllw: 5295 ; BTVER2: # %bb.0: 5296 ; BTVER2-NEXT: psllw %mm1, %mm0 # sched: [1:0.50] 5297 ; BTVER2-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] 5298 ; BTVER2-NEXT: psllw $7, %mm0 # sched: [1:0.50] 5299 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5300 ; BTVER2-NEXT: retq # sched: [4:1.00] 5301 ; 5302 ; ZNVER1-LABEL: test_psllw: 5303 ; ZNVER1: # %bb.0: 5304 ; ZNVER1-NEXT: psllw %mm1, %mm0 # sched: [1:0.25] 5305 ; ZNVER1-NEXT: psllw (%rdi), %mm0 # sched: [8:0.50] 5306 ; ZNVER1-NEXT: psllw $7, %mm0 # sched: [1:0.25] 5307 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5308 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5309 %1 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a0, x86_mmx %a1) 5310 %2 = load x86_mmx, x86_mmx *%a2, align 8 5311 %3 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %1, x86_mmx %2) 5312 %4 = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %3, i32 7) 5313 %5 = bitcast x86_mmx %4 to i64 5314 ret i64 %5 5315 } 5316 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone 5317 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone 5318 5319 define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5320 ; GENERIC-LABEL: test_psrad: 5321 ; GENERIC: # %bb.0: 5322 ; GENERIC-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5323 ; GENERIC-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5324 ; GENERIC-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5325 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5326 ; GENERIC-NEXT: retq # sched: [1:1.00] 5327 ; 5328 ; ATOM-LABEL: test_psrad: 5329 ; ATOM: # %bb.0: 5330 ; ATOM-NEXT: psrad %mm1, %mm0 # sched: [2:1.00] 5331 ; ATOM-NEXT: psrad (%rdi), %mm0 # sched: [3:1.50] 5332 ; ATOM-NEXT: psrad $7, %mm0 # sched: [1:0.50] 5333 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5334 ; ATOM-NEXT: retq # sched: [79:39.50] 5335 ; 5336 ; SLM-LABEL: test_psrad: 5337 ; SLM: # %bb.0: 5338 ; SLM-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5339 ; SLM-NEXT: psrad (%rdi), %mm0 # sched: [4:1.00] 5340 ; SLM-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5341 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5342 ; SLM-NEXT: retq # sched: [4:1.00] 5343 ; 5344 ; SANDY-LABEL: test_psrad: 5345 ; SANDY: # %bb.0: 5346 ; SANDY-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5347 ; SANDY-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5348 ; SANDY-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5349 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5350 ; SANDY-NEXT: retq # sched: [1:1.00] 5351 ; 5352 ; HASWELL-LABEL: test_psrad: 5353 ; HASWELL: # %bb.0: 5354 ; HASWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5355 ; HASWELL-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5356 ; HASWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5357 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5358 ; HASWELL-NEXT: retq # sched: [7:1.00] 5359 ; 5360 ; BROADWELL-LABEL: test_psrad: 5361 ; BROADWELL: # %bb.0: 5362 ; BROADWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5363 ; BROADWELL-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5364 ; BROADWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5365 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5366 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5367 ; 5368 ; SKYLAKE-LABEL: test_psrad: 5369 ; SKYLAKE: # %bb.0: 5370 ; SKYLAKE-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5371 ; SKYLAKE-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5372 ; SKYLAKE-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5373 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5374 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5375 ; 5376 ; SKX-LABEL: test_psrad: 5377 ; SKX: # %bb.0: 5378 ; SKX-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] 5379 ; SKX-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5380 ; SKX-NEXT: psrad $7, %mm0 # sched: [1:1.00] 5381 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5382 ; SKX-NEXT: retq # sched: [7:1.00] 5383 ; 5384 ; BTVER2-LABEL: test_psrad: 5385 ; BTVER2: # %bb.0: 5386 ; BTVER2-NEXT: psrad %mm1, %mm0 # sched: [1:0.50] 5387 ; BTVER2-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] 5388 ; BTVER2-NEXT: psrad $7, %mm0 # sched: [1:0.50] 5389 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5390 ; BTVER2-NEXT: retq # sched: [4:1.00] 5391 ; 5392 ; ZNVER1-LABEL: test_psrad: 5393 ; ZNVER1: # %bb.0: 5394 ; ZNVER1-NEXT: psrad %mm1, %mm0 # sched: [1:0.25] 5395 ; ZNVER1-NEXT: psrad (%rdi), %mm0 # sched: [8:0.50] 5396 ; ZNVER1-NEXT: psrad $7, %mm0 # sched: [1:0.25] 5397 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5398 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5399 %1 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a0, x86_mmx %a1) 5400 %2 = load x86_mmx, x86_mmx *%a2, align 8 5401 %3 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %1, x86_mmx %2) 5402 %4 = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %3, i32 7) 5403 %5 = bitcast x86_mmx %4 to i64 5404 ret i64 %5 5405 } 5406 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 5407 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone 5408 5409 define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5410 ; GENERIC-LABEL: test_psraw: 5411 ; GENERIC: # %bb.0: 5412 ; GENERIC-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5413 ; GENERIC-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5414 ; GENERIC-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5415 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5416 ; GENERIC-NEXT: retq # sched: [1:1.00] 5417 ; 5418 ; ATOM-LABEL: test_psraw: 5419 ; ATOM: # %bb.0: 5420 ; ATOM-NEXT: psraw %mm1, %mm0 # sched: [2:1.00] 5421 ; ATOM-NEXT: psraw (%rdi), %mm0 # sched: [3:1.50] 5422 ; ATOM-NEXT: psraw $7, %mm0 # sched: [1:0.50] 5423 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5424 ; ATOM-NEXT: retq # sched: [79:39.50] 5425 ; 5426 ; SLM-LABEL: test_psraw: 5427 ; SLM: # %bb.0: 5428 ; SLM-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5429 ; SLM-NEXT: psraw (%rdi), %mm0 # sched: [4:1.00] 5430 ; SLM-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5431 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5432 ; SLM-NEXT: retq # sched: [4:1.00] 5433 ; 5434 ; SANDY-LABEL: test_psraw: 5435 ; SANDY: # %bb.0: 5436 ; SANDY-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5437 ; SANDY-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5438 ; SANDY-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5439 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5440 ; SANDY-NEXT: retq # sched: [1:1.00] 5441 ; 5442 ; HASWELL-LABEL: test_psraw: 5443 ; HASWELL: # %bb.0: 5444 ; HASWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5445 ; HASWELL-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5446 ; HASWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5447 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5448 ; HASWELL-NEXT: retq # sched: [7:1.00] 5449 ; 5450 ; BROADWELL-LABEL: test_psraw: 5451 ; BROADWELL: # %bb.0: 5452 ; BROADWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5453 ; BROADWELL-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5454 ; BROADWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5455 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5456 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5457 ; 5458 ; SKYLAKE-LABEL: test_psraw: 5459 ; SKYLAKE: # %bb.0: 5460 ; SKYLAKE-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5461 ; SKYLAKE-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5462 ; SKYLAKE-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5463 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5464 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5465 ; 5466 ; SKX-LABEL: test_psraw: 5467 ; SKX: # %bb.0: 5468 ; SKX-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] 5469 ; SKX-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5470 ; SKX-NEXT: psraw $7, %mm0 # sched: [1:1.00] 5471 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5472 ; SKX-NEXT: retq # sched: [7:1.00] 5473 ; 5474 ; BTVER2-LABEL: test_psraw: 5475 ; BTVER2: # %bb.0: 5476 ; BTVER2-NEXT: psraw %mm1, %mm0 # sched: [1:0.50] 5477 ; BTVER2-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] 5478 ; BTVER2-NEXT: psraw $7, %mm0 # sched: [1:0.50] 5479 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5480 ; BTVER2-NEXT: retq # sched: [4:1.00] 5481 ; 5482 ; ZNVER1-LABEL: test_psraw: 5483 ; ZNVER1: # %bb.0: 5484 ; ZNVER1-NEXT: psraw %mm1, %mm0 # sched: [1:0.25] 5485 ; ZNVER1-NEXT: psraw (%rdi), %mm0 # sched: [8:0.50] 5486 ; ZNVER1-NEXT: psraw $7, %mm0 # sched: [1:0.25] 5487 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5488 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5489 %1 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a0, x86_mmx %a1) 5490 %2 = load x86_mmx, x86_mmx *%a2, align 8 5491 %3 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %1, x86_mmx %2) 5492 %4 = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %3, i32 7) 5493 %5 = bitcast x86_mmx %4 to i64 5494 ret i64 %5 5495 } 5496 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone 5497 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone 5498 5499 define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5500 ; GENERIC-LABEL: test_psrld: 5501 ; GENERIC: # %bb.0: 5502 ; GENERIC-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5503 ; GENERIC-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5504 ; GENERIC-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5505 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5506 ; GENERIC-NEXT: retq # sched: [1:1.00] 5507 ; 5508 ; ATOM-LABEL: test_psrld: 5509 ; ATOM: # %bb.0: 5510 ; ATOM-NEXT: psrld %mm1, %mm0 # sched: [2:1.00] 5511 ; ATOM-NEXT: psrld (%rdi), %mm0 # sched: [3:1.50] 5512 ; ATOM-NEXT: psrld $7, %mm0 # sched: [1:0.50] 5513 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5514 ; ATOM-NEXT: retq # sched: [79:39.50] 5515 ; 5516 ; SLM-LABEL: test_psrld: 5517 ; SLM: # %bb.0: 5518 ; SLM-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5519 ; SLM-NEXT: psrld (%rdi), %mm0 # sched: [4:1.00] 5520 ; SLM-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5521 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5522 ; SLM-NEXT: retq # sched: [4:1.00] 5523 ; 5524 ; SANDY-LABEL: test_psrld: 5525 ; SANDY: # %bb.0: 5526 ; SANDY-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5527 ; SANDY-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5528 ; SANDY-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5529 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5530 ; SANDY-NEXT: retq # sched: [1:1.00] 5531 ; 5532 ; HASWELL-LABEL: test_psrld: 5533 ; HASWELL: # %bb.0: 5534 ; HASWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5535 ; HASWELL-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5536 ; HASWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5537 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5538 ; HASWELL-NEXT: retq # sched: [7:1.00] 5539 ; 5540 ; BROADWELL-LABEL: test_psrld: 5541 ; BROADWELL: # %bb.0: 5542 ; BROADWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5543 ; BROADWELL-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5544 ; BROADWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5545 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5546 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5547 ; 5548 ; SKYLAKE-LABEL: test_psrld: 5549 ; SKYLAKE: # %bb.0: 5550 ; SKYLAKE-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5551 ; SKYLAKE-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5552 ; SKYLAKE-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5553 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5554 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5555 ; 5556 ; SKX-LABEL: test_psrld: 5557 ; SKX: # %bb.0: 5558 ; SKX-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] 5559 ; SKX-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5560 ; SKX-NEXT: psrld $7, %mm0 # sched: [1:1.00] 5561 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5562 ; SKX-NEXT: retq # sched: [7:1.00] 5563 ; 5564 ; BTVER2-LABEL: test_psrld: 5565 ; BTVER2: # %bb.0: 5566 ; BTVER2-NEXT: psrld %mm1, %mm0 # sched: [1:0.50] 5567 ; BTVER2-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] 5568 ; BTVER2-NEXT: psrld $7, %mm0 # sched: [1:0.50] 5569 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5570 ; BTVER2-NEXT: retq # sched: [4:1.00] 5571 ; 5572 ; ZNVER1-LABEL: test_psrld: 5573 ; ZNVER1: # %bb.0: 5574 ; ZNVER1-NEXT: psrld %mm1, %mm0 # sched: [1:0.25] 5575 ; ZNVER1-NEXT: psrld (%rdi), %mm0 # sched: [8:0.50] 5576 ; ZNVER1-NEXT: psrld $7, %mm0 # sched: [1:0.25] 5577 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5578 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5579 %1 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a0, x86_mmx %a1) 5580 %2 = load x86_mmx, x86_mmx *%a2, align 8 5581 %3 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %1, x86_mmx %2) 5582 %4 = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %3, i32 7) 5583 %5 = bitcast x86_mmx %4 to i64 5584 ret i64 %5 5585 } 5586 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone 5587 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone 5588 5589 define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5590 ; GENERIC-LABEL: test_psrlq: 5591 ; GENERIC: # %bb.0: 5592 ; GENERIC-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5593 ; GENERIC-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5594 ; GENERIC-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5595 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5596 ; GENERIC-NEXT: retq # sched: [1:1.00] 5597 ; 5598 ; ATOM-LABEL: test_psrlq: 5599 ; ATOM: # %bb.0: 5600 ; ATOM-NEXT: psrlq %mm1, %mm0 # sched: [2:1.00] 5601 ; ATOM-NEXT: psrlq (%rdi), %mm0 # sched: [3:1.50] 5602 ; ATOM-NEXT: psrlq $7, %mm0 # sched: [1:0.50] 5603 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5604 ; ATOM-NEXT: retq # sched: [79:39.50] 5605 ; 5606 ; SLM-LABEL: test_psrlq: 5607 ; SLM: # %bb.0: 5608 ; SLM-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5609 ; SLM-NEXT: psrlq (%rdi), %mm0 # sched: [4:1.00] 5610 ; SLM-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5611 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5612 ; SLM-NEXT: retq # sched: [4:1.00] 5613 ; 5614 ; SANDY-LABEL: test_psrlq: 5615 ; SANDY: # %bb.0: 5616 ; SANDY-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5617 ; SANDY-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5618 ; SANDY-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5619 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5620 ; SANDY-NEXT: retq # sched: [1:1.00] 5621 ; 5622 ; HASWELL-LABEL: test_psrlq: 5623 ; HASWELL: # %bb.0: 5624 ; HASWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5625 ; HASWELL-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5626 ; HASWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5627 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5628 ; HASWELL-NEXT: retq # sched: [7:1.00] 5629 ; 5630 ; BROADWELL-LABEL: test_psrlq: 5631 ; BROADWELL: # %bb.0: 5632 ; BROADWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5633 ; BROADWELL-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5634 ; BROADWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5635 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5636 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5637 ; 5638 ; SKYLAKE-LABEL: test_psrlq: 5639 ; SKYLAKE: # %bb.0: 5640 ; SKYLAKE-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5641 ; SKYLAKE-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5642 ; SKYLAKE-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5643 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5644 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5645 ; 5646 ; SKX-LABEL: test_psrlq: 5647 ; SKX: # %bb.0: 5648 ; SKX-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] 5649 ; SKX-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5650 ; SKX-NEXT: psrlq $7, %mm0 # sched: [1:1.00] 5651 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5652 ; SKX-NEXT: retq # sched: [7:1.00] 5653 ; 5654 ; BTVER2-LABEL: test_psrlq: 5655 ; BTVER2: # %bb.0: 5656 ; BTVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:0.50] 5657 ; BTVER2-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] 5658 ; BTVER2-NEXT: psrlq $7, %mm0 # sched: [1:0.50] 5659 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5660 ; BTVER2-NEXT: retq # sched: [4:1.00] 5661 ; 5662 ; ZNVER1-LABEL: test_psrlq: 5663 ; ZNVER1: # %bb.0: 5664 ; ZNVER1-NEXT: psrlq %mm1, %mm0 # sched: [1:0.25] 5665 ; ZNVER1-NEXT: psrlq (%rdi), %mm0 # sched: [8:0.50] 5666 ; ZNVER1-NEXT: psrlq $7, %mm0 # sched: [1:0.25] 5667 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5668 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5669 %1 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a0, x86_mmx %a1) 5670 %2 = load x86_mmx, x86_mmx *%a2, align 8 5671 %3 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %1, x86_mmx %2) 5672 %4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %3, i32 7) 5673 %5 = bitcast x86_mmx %4 to i64 5674 ret i64 %5 5675 } 5676 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone 5677 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone 5678 5679 define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5680 ; GENERIC-LABEL: test_psrlw: 5681 ; GENERIC: # %bb.0: 5682 ; GENERIC-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5683 ; GENERIC-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5684 ; GENERIC-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5685 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5686 ; GENERIC-NEXT: retq # sched: [1:1.00] 5687 ; 5688 ; ATOM-LABEL: test_psrlw: 5689 ; ATOM: # %bb.0: 5690 ; ATOM-NEXT: psrlw %mm1, %mm0 # sched: [2:1.00] 5691 ; ATOM-NEXT: psrlw (%rdi), %mm0 # sched: [3:1.50] 5692 ; ATOM-NEXT: psrlw $7, %mm0 # sched: [1:0.50] 5693 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5694 ; ATOM-NEXT: retq # sched: [79:39.50] 5695 ; 5696 ; SLM-LABEL: test_psrlw: 5697 ; SLM: # %bb.0: 5698 ; SLM-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5699 ; SLM-NEXT: psrlw (%rdi), %mm0 # sched: [4:1.00] 5700 ; SLM-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5701 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5702 ; SLM-NEXT: retq # sched: [4:1.00] 5703 ; 5704 ; SANDY-LABEL: test_psrlw: 5705 ; SANDY: # %bb.0: 5706 ; SANDY-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5707 ; SANDY-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5708 ; SANDY-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5709 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5710 ; SANDY-NEXT: retq # sched: [1:1.00] 5711 ; 5712 ; HASWELL-LABEL: test_psrlw: 5713 ; HASWELL: # %bb.0: 5714 ; HASWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5715 ; HASWELL-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5716 ; HASWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5717 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5718 ; HASWELL-NEXT: retq # sched: [7:1.00] 5719 ; 5720 ; BROADWELL-LABEL: test_psrlw: 5721 ; BROADWELL: # %bb.0: 5722 ; BROADWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5723 ; BROADWELL-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5724 ; BROADWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5725 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5726 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5727 ; 5728 ; SKYLAKE-LABEL: test_psrlw: 5729 ; SKYLAKE: # %bb.0: 5730 ; SKYLAKE-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5731 ; SKYLAKE-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5732 ; SKYLAKE-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5733 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5734 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5735 ; 5736 ; SKX-LABEL: test_psrlw: 5737 ; SKX: # %bb.0: 5738 ; SKX-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] 5739 ; SKX-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5740 ; SKX-NEXT: psrlw $7, %mm0 # sched: [1:1.00] 5741 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5742 ; SKX-NEXT: retq # sched: [7:1.00] 5743 ; 5744 ; BTVER2-LABEL: test_psrlw: 5745 ; BTVER2: # %bb.0: 5746 ; BTVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:0.50] 5747 ; BTVER2-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] 5748 ; BTVER2-NEXT: psrlw $7, %mm0 # sched: [1:0.50] 5749 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5750 ; BTVER2-NEXT: retq # sched: [4:1.00] 5751 ; 5752 ; ZNVER1-LABEL: test_psrlw: 5753 ; ZNVER1: # %bb.0: 5754 ; ZNVER1-NEXT: psrlw %mm1, %mm0 # sched: [1:0.25] 5755 ; ZNVER1-NEXT: psrlw (%rdi), %mm0 # sched: [8:0.50] 5756 ; ZNVER1-NEXT: psrlw $7, %mm0 # sched: [1:0.25] 5757 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5758 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5759 %1 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a0, x86_mmx %a1) 5760 %2 = load x86_mmx, x86_mmx *%a2, align 8 5761 %3 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %1, x86_mmx %2) 5762 %4 = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %3, i32 7) 5763 %5 = bitcast x86_mmx %4 to i64 5764 ret i64 %5 5765 } 5766 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone 5767 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 5768 5769 define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5770 ; GENERIC-LABEL: test_psubb: 5771 ; GENERIC: # %bb.0: 5772 ; GENERIC-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] 5773 ; GENERIC-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] 5774 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5775 ; GENERIC-NEXT: retq # sched: [1:1.00] 5776 ; 5777 ; ATOM-LABEL: test_psubb: 5778 ; ATOM: # %bb.0: 5779 ; ATOM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5780 ; ATOM-NEXT: psubb (%rdi), %mm0 # sched: [1:1.00] 5781 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5782 ; ATOM-NEXT: retq # sched: [79:39.50] 5783 ; 5784 ; SLM-LABEL: test_psubb: 5785 ; SLM: # %bb.0: 5786 ; SLM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5787 ; SLM-NEXT: psubb (%rdi), %mm0 # sched: [4:1.00] 5788 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5789 ; SLM-NEXT: retq # sched: [4:1.00] 5790 ; 5791 ; SANDY-LABEL: test_psubb: 5792 ; SANDY: # %bb.0: 5793 ; SANDY-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] 5794 ; SANDY-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] 5795 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5796 ; SANDY-NEXT: retq # sched: [1:1.00] 5797 ; 5798 ; HASWELL-LABEL: test_psubb: 5799 ; HASWELL: # %bb.0: 5800 ; HASWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5801 ; HASWELL-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] 5802 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5803 ; HASWELL-NEXT: retq # sched: [7:1.00] 5804 ; 5805 ; BROADWELL-LABEL: test_psubb: 5806 ; BROADWELL: # %bb.0: 5807 ; BROADWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5808 ; BROADWELL-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] 5809 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5810 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5811 ; 5812 ; SKYLAKE-LABEL: test_psubb: 5813 ; SKYLAKE: # %bb.0: 5814 ; SKYLAKE-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5815 ; SKYLAKE-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] 5816 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5817 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5818 ; 5819 ; SKX-LABEL: test_psubb: 5820 ; SKX: # %bb.0: 5821 ; SKX-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5822 ; SKX-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50] 5823 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5824 ; SKX-NEXT: retq # sched: [7:1.00] 5825 ; 5826 ; BTVER2-LABEL: test_psubb: 5827 ; BTVER2: # %bb.0: 5828 ; BTVER2-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] 5829 ; BTVER2-NEXT: psubb (%rdi), %mm0 # sched: [6:1.00] 5830 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5831 ; BTVER2-NEXT: retq # sched: [4:1.00] 5832 ; 5833 ; ZNVER1-LABEL: test_psubb: 5834 ; ZNVER1: # %bb.0: 5835 ; ZNVER1-NEXT: psubb %mm1, %mm0 # sched: [1:0.25] 5836 ; ZNVER1-NEXT: psubb (%rdi), %mm0 # sched: [8:0.50] 5837 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5838 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5839 %1 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a0, x86_mmx %a1) 5840 %2 = load x86_mmx, x86_mmx *%a2, align 8 5841 %3 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %1, x86_mmx %2) 5842 %4 = bitcast x86_mmx %3 to i64 5843 ret i64 %4 5844 } 5845 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone 5846 5847 define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5848 ; GENERIC-LABEL: test_psubd: 5849 ; GENERIC: # %bb.0: 5850 ; GENERIC-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] 5851 ; GENERIC-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] 5852 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5853 ; GENERIC-NEXT: retq # sched: [1:1.00] 5854 ; 5855 ; ATOM-LABEL: test_psubd: 5856 ; ATOM: # %bb.0: 5857 ; ATOM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5858 ; ATOM-NEXT: psubd (%rdi), %mm0 # sched: [1:1.00] 5859 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5860 ; ATOM-NEXT: retq # sched: [79:39.50] 5861 ; 5862 ; SLM-LABEL: test_psubd: 5863 ; SLM: # %bb.0: 5864 ; SLM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5865 ; SLM-NEXT: psubd (%rdi), %mm0 # sched: [4:1.00] 5866 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5867 ; SLM-NEXT: retq # sched: [4:1.00] 5868 ; 5869 ; SANDY-LABEL: test_psubd: 5870 ; SANDY: # %bb.0: 5871 ; SANDY-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] 5872 ; SANDY-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] 5873 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5874 ; SANDY-NEXT: retq # sched: [1:1.00] 5875 ; 5876 ; HASWELL-LABEL: test_psubd: 5877 ; HASWELL: # %bb.0: 5878 ; HASWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5879 ; HASWELL-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] 5880 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5881 ; HASWELL-NEXT: retq # sched: [7:1.00] 5882 ; 5883 ; BROADWELL-LABEL: test_psubd: 5884 ; BROADWELL: # %bb.0: 5885 ; BROADWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5886 ; BROADWELL-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] 5887 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5888 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5889 ; 5890 ; SKYLAKE-LABEL: test_psubd: 5891 ; SKYLAKE: # %bb.0: 5892 ; SKYLAKE-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5893 ; SKYLAKE-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] 5894 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5895 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5896 ; 5897 ; SKX-LABEL: test_psubd: 5898 ; SKX: # %bb.0: 5899 ; SKX-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5900 ; SKX-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50] 5901 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5902 ; SKX-NEXT: retq # sched: [7:1.00] 5903 ; 5904 ; BTVER2-LABEL: test_psubd: 5905 ; BTVER2: # %bb.0: 5906 ; BTVER2-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] 5907 ; BTVER2-NEXT: psubd (%rdi), %mm0 # sched: [6:1.00] 5908 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5909 ; BTVER2-NEXT: retq # sched: [4:1.00] 5910 ; 5911 ; ZNVER1-LABEL: test_psubd: 5912 ; ZNVER1: # %bb.0: 5913 ; ZNVER1-NEXT: psubd %mm1, %mm0 # sched: [1:0.25] 5914 ; ZNVER1-NEXT: psubd (%rdi), %mm0 # sched: [8:0.50] 5915 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5916 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5917 %1 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a0, x86_mmx %a1) 5918 %2 = load x86_mmx, x86_mmx *%a2, align 8 5919 %3 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %1, x86_mmx %2) 5920 %4 = bitcast x86_mmx %3 to i64 5921 ret i64 %4 5922 } 5923 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone 5924 5925 define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 5926 ; GENERIC-LABEL: test_psubq: 5927 ; GENERIC: # %bb.0: 5928 ; GENERIC-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] 5929 ; GENERIC-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] 5930 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 5931 ; GENERIC-NEXT: retq # sched: [1:1.00] 5932 ; 5933 ; ATOM-LABEL: test_psubq: 5934 ; ATOM: # %bb.0: 5935 ; ATOM-NEXT: psubq %mm1, %mm0 # sched: [2:1.00] 5936 ; ATOM-NEXT: psubq (%rdi), %mm0 # sched: [3:1.50] 5937 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 5938 ; ATOM-NEXT: retq # sched: [79:39.50] 5939 ; 5940 ; SLM-LABEL: test_psubq: 5941 ; SLM: # %bb.0: 5942 ; SLM-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5943 ; SLM-NEXT: psubq (%rdi), %mm0 # sched: [4:1.00] 5944 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 5945 ; SLM-NEXT: retq # sched: [4:1.00] 5946 ; 5947 ; SANDY-LABEL: test_psubq: 5948 ; SANDY: # %bb.0: 5949 ; SANDY-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] 5950 ; SANDY-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] 5951 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 5952 ; SANDY-NEXT: retq # sched: [1:1.00] 5953 ; 5954 ; HASWELL-LABEL: test_psubq: 5955 ; HASWELL: # %bb.0: 5956 ; HASWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5957 ; HASWELL-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] 5958 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5959 ; HASWELL-NEXT: retq # sched: [7:1.00] 5960 ; 5961 ; BROADWELL-LABEL: test_psubq: 5962 ; BROADWELL: # %bb.0: 5963 ; BROADWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5964 ; BROADWELL-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] 5965 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 5966 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5967 ; 5968 ; SKYLAKE-LABEL: test_psubq: 5969 ; SKYLAKE: # %bb.0: 5970 ; SKYLAKE-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5971 ; SKYLAKE-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] 5972 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 5973 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5974 ; 5975 ; SKX-LABEL: test_psubq: 5976 ; SKX: # %bb.0: 5977 ; SKX-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5978 ; SKX-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50] 5979 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 5980 ; SKX-NEXT: retq # sched: [7:1.00] 5981 ; 5982 ; BTVER2-LABEL: test_psubq: 5983 ; BTVER2: # %bb.0: 5984 ; BTVER2-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] 5985 ; BTVER2-NEXT: psubq (%rdi), %mm0 # sched: [6:1.00] 5986 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 5987 ; BTVER2-NEXT: retq # sched: [4:1.00] 5988 ; 5989 ; ZNVER1-LABEL: test_psubq: 5990 ; ZNVER1: # %bb.0: 5991 ; ZNVER1-NEXT: psubq %mm1, %mm0 # sched: [1:0.25] 5992 ; ZNVER1-NEXT: psubq (%rdi), %mm0 # sched: [8:0.50] 5993 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 5994 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5995 %1 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a0, x86_mmx %a1) 5996 %2 = load x86_mmx, x86_mmx *%a2, align 8 5997 %3 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %1, x86_mmx %2) 5998 %4 = bitcast x86_mmx %3 to i64 5999 ret i64 %4 6000 } 6001 declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone 6002 6003 define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6004 ; GENERIC-LABEL: test_psubsb: 6005 ; GENERIC: # %bb.0: 6006 ; GENERIC-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] 6007 ; GENERIC-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] 6008 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6009 ; GENERIC-NEXT: retq # sched: [1:1.00] 6010 ; 6011 ; ATOM-LABEL: test_psubsb: 6012 ; ATOM: # %bb.0: 6013 ; ATOM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6014 ; ATOM-NEXT: psubsb (%rdi), %mm0 # sched: [1:1.00] 6015 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6016 ; ATOM-NEXT: retq # sched: [79:39.50] 6017 ; 6018 ; SLM-LABEL: test_psubsb: 6019 ; SLM: # %bb.0: 6020 ; SLM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6021 ; SLM-NEXT: psubsb (%rdi), %mm0 # sched: [4:1.00] 6022 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6023 ; SLM-NEXT: retq # sched: [4:1.00] 6024 ; 6025 ; SANDY-LABEL: test_psubsb: 6026 ; SANDY: # %bb.0: 6027 ; SANDY-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] 6028 ; SANDY-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] 6029 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6030 ; SANDY-NEXT: retq # sched: [1:1.00] 6031 ; 6032 ; HASWELL-LABEL: test_psubsb: 6033 ; HASWELL: # %bb.0: 6034 ; HASWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6035 ; HASWELL-NEXT: psubsb (%rdi), %mm0 # sched: [6:0.50] 6036 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6037 ; HASWELL-NEXT: retq # sched: [7:1.00] 6038 ; 6039 ; BROADWELL-LABEL: test_psubsb: 6040 ; BROADWELL: # %bb.0: 6041 ; BROADWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6042 ; BROADWELL-NEXT: psubsb (%rdi), %mm0 # sched: [6:0.50] 6043 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6044 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6045 ; 6046 ; SKYLAKE-LABEL: test_psubsb: 6047 ; SKYLAKE: # %bb.0: 6048 ; SKYLAKE-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00] 6049 ; SKYLAKE-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] 6050 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6051 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6052 ; 6053 ; SKX-LABEL: test_psubsb: 6054 ; SKX: # %bb.0: 6055 ; SKX-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00] 6056 ; SKX-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] 6057 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6058 ; SKX-NEXT: retq # sched: [7:1.00] 6059 ; 6060 ; BTVER2-LABEL: test_psubsb: 6061 ; BTVER2: # %bb.0: 6062 ; BTVER2-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] 6063 ; BTVER2-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] 6064 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6065 ; BTVER2-NEXT: retq # sched: [4:1.00] 6066 ; 6067 ; ZNVER1-LABEL: test_psubsb: 6068 ; ZNVER1: # %bb.0: 6069 ; ZNVER1-NEXT: psubsb %mm1, %mm0 # sched: [1:0.25] 6070 ; ZNVER1-NEXT: psubsb (%rdi), %mm0 # sched: [8:0.50] 6071 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6072 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6073 %1 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a0, x86_mmx %a1) 6074 %2 = load x86_mmx, x86_mmx *%a2, align 8 6075 %3 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %1, x86_mmx %2) 6076 %4 = bitcast x86_mmx %3 to i64 6077 ret i64 %4 6078 } 6079 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone 6080 6081 define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6082 ; GENERIC-LABEL: test_psubsw: 6083 ; GENERIC: # %bb.0: 6084 ; GENERIC-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] 6085 ; GENERIC-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] 6086 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6087 ; GENERIC-NEXT: retq # sched: [1:1.00] 6088 ; 6089 ; ATOM-LABEL: test_psubsw: 6090 ; ATOM: # %bb.0: 6091 ; ATOM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6092 ; ATOM-NEXT: psubsw (%rdi), %mm0 # sched: [1:1.00] 6093 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6094 ; ATOM-NEXT: retq # sched: [79:39.50] 6095 ; 6096 ; SLM-LABEL: test_psubsw: 6097 ; SLM: # %bb.0: 6098 ; SLM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6099 ; SLM-NEXT: psubsw (%rdi), %mm0 # sched: [4:1.00] 6100 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6101 ; SLM-NEXT: retq # sched: [4:1.00] 6102 ; 6103 ; SANDY-LABEL: test_psubsw: 6104 ; SANDY: # %bb.0: 6105 ; SANDY-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] 6106 ; SANDY-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] 6107 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6108 ; SANDY-NEXT: retq # sched: [1:1.00] 6109 ; 6110 ; HASWELL-LABEL: test_psubsw: 6111 ; HASWELL: # %bb.0: 6112 ; HASWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6113 ; HASWELL-NEXT: psubsw (%rdi), %mm0 # sched: [6:0.50] 6114 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6115 ; HASWELL-NEXT: retq # sched: [7:1.00] 6116 ; 6117 ; BROADWELL-LABEL: test_psubsw: 6118 ; BROADWELL: # %bb.0: 6119 ; BROADWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6120 ; BROADWELL-NEXT: psubsw (%rdi), %mm0 # sched: [6:0.50] 6121 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6122 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6123 ; 6124 ; SKYLAKE-LABEL: test_psubsw: 6125 ; SKYLAKE: # %bb.0: 6126 ; SKYLAKE-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00] 6127 ; SKYLAKE-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] 6128 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6129 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6130 ; 6131 ; SKX-LABEL: test_psubsw: 6132 ; SKX: # %bb.0: 6133 ; SKX-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00] 6134 ; SKX-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] 6135 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6136 ; SKX-NEXT: retq # sched: [7:1.00] 6137 ; 6138 ; BTVER2-LABEL: test_psubsw: 6139 ; BTVER2: # %bb.0: 6140 ; BTVER2-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] 6141 ; BTVER2-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] 6142 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6143 ; BTVER2-NEXT: retq # sched: [4:1.00] 6144 ; 6145 ; ZNVER1-LABEL: test_psubsw: 6146 ; ZNVER1: # %bb.0: 6147 ; ZNVER1-NEXT: psubsw %mm1, %mm0 # sched: [1:0.25] 6148 ; ZNVER1-NEXT: psubsw (%rdi), %mm0 # sched: [8:0.50] 6149 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6150 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6151 %1 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a0, x86_mmx %a1) 6152 %2 = load x86_mmx, x86_mmx *%a2, align 8 6153 %3 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %1, x86_mmx %2) 6154 %4 = bitcast x86_mmx %3 to i64 6155 ret i64 %4 6156 } 6157 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone 6158 6159 define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6160 ; GENERIC-LABEL: test_psubusb: 6161 ; GENERIC: # %bb.0: 6162 ; GENERIC-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] 6163 ; GENERIC-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] 6164 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6165 ; GENERIC-NEXT: retq # sched: [1:1.00] 6166 ; 6167 ; ATOM-LABEL: test_psubusb: 6168 ; ATOM: # %bb.0: 6169 ; ATOM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6170 ; ATOM-NEXT: psubusb (%rdi), %mm0 # sched: [1:1.00] 6171 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6172 ; ATOM-NEXT: retq # sched: [79:39.50] 6173 ; 6174 ; SLM-LABEL: test_psubusb: 6175 ; SLM: # %bb.0: 6176 ; SLM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6177 ; SLM-NEXT: psubusb (%rdi), %mm0 # sched: [4:1.00] 6178 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6179 ; SLM-NEXT: retq # sched: [4:1.00] 6180 ; 6181 ; SANDY-LABEL: test_psubusb: 6182 ; SANDY: # %bb.0: 6183 ; SANDY-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] 6184 ; SANDY-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] 6185 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6186 ; SANDY-NEXT: retq # sched: [1:1.00] 6187 ; 6188 ; HASWELL-LABEL: test_psubusb: 6189 ; HASWELL: # %bb.0: 6190 ; HASWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6191 ; HASWELL-NEXT: psubusb (%rdi), %mm0 # sched: [6:0.50] 6192 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6193 ; HASWELL-NEXT: retq # sched: [7:1.00] 6194 ; 6195 ; BROADWELL-LABEL: test_psubusb: 6196 ; BROADWELL: # %bb.0: 6197 ; BROADWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6198 ; BROADWELL-NEXT: psubusb (%rdi), %mm0 # sched: [6:0.50] 6199 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6200 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6201 ; 6202 ; SKYLAKE-LABEL: test_psubusb: 6203 ; SKYLAKE: # %bb.0: 6204 ; SKYLAKE-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00] 6205 ; SKYLAKE-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] 6206 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6207 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6208 ; 6209 ; SKX-LABEL: test_psubusb: 6210 ; SKX: # %bb.0: 6211 ; SKX-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00] 6212 ; SKX-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] 6213 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6214 ; SKX-NEXT: retq # sched: [7:1.00] 6215 ; 6216 ; BTVER2-LABEL: test_psubusb: 6217 ; BTVER2: # %bb.0: 6218 ; BTVER2-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] 6219 ; BTVER2-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] 6220 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6221 ; BTVER2-NEXT: retq # sched: [4:1.00] 6222 ; 6223 ; ZNVER1-LABEL: test_psubusb: 6224 ; ZNVER1: # %bb.0: 6225 ; ZNVER1-NEXT: psubusb %mm1, %mm0 # sched: [1:0.25] 6226 ; ZNVER1-NEXT: psubusb (%rdi), %mm0 # sched: [8:0.50] 6227 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6228 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6229 %1 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a0, x86_mmx %a1) 6230 %2 = load x86_mmx, x86_mmx *%a2, align 8 6231 %3 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %1, x86_mmx %2) 6232 %4 = bitcast x86_mmx %3 to i64 6233 ret i64 %4 6234 } 6235 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone 6236 6237 define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6238 ; GENERIC-LABEL: test_psubusw: 6239 ; GENERIC: # %bb.0: 6240 ; GENERIC-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] 6241 ; GENERIC-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] 6242 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6243 ; GENERIC-NEXT: retq # sched: [1:1.00] 6244 ; 6245 ; ATOM-LABEL: test_psubusw: 6246 ; ATOM: # %bb.0: 6247 ; ATOM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6248 ; ATOM-NEXT: psubusw (%rdi), %mm0 # sched: [1:1.00] 6249 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6250 ; ATOM-NEXT: retq # sched: [79:39.50] 6251 ; 6252 ; SLM-LABEL: test_psubusw: 6253 ; SLM: # %bb.0: 6254 ; SLM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6255 ; SLM-NEXT: psubusw (%rdi), %mm0 # sched: [4:1.00] 6256 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6257 ; SLM-NEXT: retq # sched: [4:1.00] 6258 ; 6259 ; SANDY-LABEL: test_psubusw: 6260 ; SANDY: # %bb.0: 6261 ; SANDY-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] 6262 ; SANDY-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] 6263 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6264 ; SANDY-NEXT: retq # sched: [1:1.00] 6265 ; 6266 ; HASWELL-LABEL: test_psubusw: 6267 ; HASWELL: # %bb.0: 6268 ; HASWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6269 ; HASWELL-NEXT: psubusw (%rdi), %mm0 # sched: [6:0.50] 6270 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6271 ; HASWELL-NEXT: retq # sched: [7:1.00] 6272 ; 6273 ; BROADWELL-LABEL: test_psubusw: 6274 ; BROADWELL: # %bb.0: 6275 ; BROADWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6276 ; BROADWELL-NEXT: psubusw (%rdi), %mm0 # sched: [6:0.50] 6277 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6278 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6279 ; 6280 ; SKYLAKE-LABEL: test_psubusw: 6281 ; SKYLAKE: # %bb.0: 6282 ; SKYLAKE-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00] 6283 ; SKYLAKE-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] 6284 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6285 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6286 ; 6287 ; SKX-LABEL: test_psubusw: 6288 ; SKX: # %bb.0: 6289 ; SKX-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00] 6290 ; SKX-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] 6291 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6292 ; SKX-NEXT: retq # sched: [7:1.00] 6293 ; 6294 ; BTVER2-LABEL: test_psubusw: 6295 ; BTVER2: # %bb.0: 6296 ; BTVER2-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] 6297 ; BTVER2-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] 6298 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6299 ; BTVER2-NEXT: retq # sched: [4:1.00] 6300 ; 6301 ; ZNVER1-LABEL: test_psubusw: 6302 ; ZNVER1: # %bb.0: 6303 ; ZNVER1-NEXT: psubusw %mm1, %mm0 # sched: [1:0.25] 6304 ; ZNVER1-NEXT: psubusw (%rdi), %mm0 # sched: [8:0.50] 6305 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6306 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6307 %1 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a0, x86_mmx %a1) 6308 %2 = load x86_mmx, x86_mmx *%a2, align 8 6309 %3 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %1, x86_mmx %2) 6310 %4 = bitcast x86_mmx %3 to i64 6311 ret i64 %4 6312 } 6313 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone 6314 6315 define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6316 ; GENERIC-LABEL: test_psubw: 6317 ; GENERIC: # %bb.0: 6318 ; GENERIC-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] 6319 ; GENERIC-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] 6320 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6321 ; GENERIC-NEXT: retq # sched: [1:1.00] 6322 ; 6323 ; ATOM-LABEL: test_psubw: 6324 ; ATOM: # %bb.0: 6325 ; ATOM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6326 ; ATOM-NEXT: psubw (%rdi), %mm0 # sched: [1:1.00] 6327 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6328 ; ATOM-NEXT: retq # sched: [79:39.50] 6329 ; 6330 ; SLM-LABEL: test_psubw: 6331 ; SLM: # %bb.0: 6332 ; SLM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6333 ; SLM-NEXT: psubw (%rdi), %mm0 # sched: [4:1.00] 6334 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6335 ; SLM-NEXT: retq # sched: [4:1.00] 6336 ; 6337 ; SANDY-LABEL: test_psubw: 6338 ; SANDY: # %bb.0: 6339 ; SANDY-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] 6340 ; SANDY-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] 6341 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6342 ; SANDY-NEXT: retq # sched: [1:1.00] 6343 ; 6344 ; HASWELL-LABEL: test_psubw: 6345 ; HASWELL: # %bb.0: 6346 ; HASWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6347 ; HASWELL-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] 6348 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6349 ; HASWELL-NEXT: retq # sched: [7:1.00] 6350 ; 6351 ; BROADWELL-LABEL: test_psubw: 6352 ; BROADWELL: # %bb.0: 6353 ; BROADWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6354 ; BROADWELL-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] 6355 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6356 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6357 ; 6358 ; SKYLAKE-LABEL: test_psubw: 6359 ; SKYLAKE: # %bb.0: 6360 ; SKYLAKE-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6361 ; SKYLAKE-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] 6362 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6363 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6364 ; 6365 ; SKX-LABEL: test_psubw: 6366 ; SKX: # %bb.0: 6367 ; SKX-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6368 ; SKX-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50] 6369 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6370 ; SKX-NEXT: retq # sched: [7:1.00] 6371 ; 6372 ; BTVER2-LABEL: test_psubw: 6373 ; BTVER2: # %bb.0: 6374 ; BTVER2-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] 6375 ; BTVER2-NEXT: psubw (%rdi), %mm0 # sched: [6:1.00] 6376 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6377 ; BTVER2-NEXT: retq # sched: [4:1.00] 6378 ; 6379 ; ZNVER1-LABEL: test_psubw: 6380 ; ZNVER1: # %bb.0: 6381 ; ZNVER1-NEXT: psubw %mm1, %mm0 # sched: [1:0.25] 6382 ; ZNVER1-NEXT: psubw (%rdi), %mm0 # sched: [8:0.50] 6383 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6384 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6385 %1 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a0, x86_mmx %a1) 6386 %2 = load x86_mmx, x86_mmx *%a2, align 8 6387 %3 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %1, x86_mmx %2) 6388 %4 = bitcast x86_mmx %3 to i64 6389 ret i64 %4 6390 } 6391 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone 6392 6393 define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6394 ; GENERIC-LABEL: test_punpckhbw: 6395 ; GENERIC: # %bb.0: 6396 ; GENERIC-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6397 ; GENERIC-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6398 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6399 ; GENERIC-NEXT: retq # sched: [1:1.00] 6400 ; 6401 ; ATOM-LABEL: test_punpckhbw: 6402 ; ATOM: # %bb.0: 6403 ; ATOM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50] 6404 ; ATOM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00] 6405 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6406 ; ATOM-NEXT: retq # sched: [79:39.50] 6407 ; 6408 ; SLM-LABEL: test_punpckhbw: 6409 ; SLM: # %bb.0: 6410 ; SLM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6411 ; SLM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [4:1.00] 6412 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6413 ; SLM-NEXT: retq # sched: [4:1.00] 6414 ; 6415 ; SANDY-LABEL: test_punpckhbw: 6416 ; SANDY: # %bb.0: 6417 ; SANDY-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6418 ; SANDY-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6419 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6420 ; SANDY-NEXT: retq # sched: [1:1.00] 6421 ; 6422 ; HASWELL-LABEL: test_punpckhbw: 6423 ; HASWELL: # %bb.0: 6424 ; HASWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6425 ; HASWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6426 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6427 ; HASWELL-NEXT: retq # sched: [7:1.00] 6428 ; 6429 ; BROADWELL-LABEL: test_punpckhbw: 6430 ; BROADWELL: # %bb.0: 6431 ; BROADWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6432 ; BROADWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6433 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6434 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6435 ; 6436 ; SKYLAKE-LABEL: test_punpckhbw: 6437 ; SKYLAKE: # %bb.0: 6438 ; SKYLAKE-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6439 ; SKYLAKE-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6440 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6441 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6442 ; 6443 ; SKX-LABEL: test_punpckhbw: 6444 ; SKX: # %bb.0: 6445 ; SKX-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] 6446 ; SKX-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6447 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6448 ; SKX-NEXT: retq # sched: [7:1.00] 6449 ; 6450 ; BTVER2-LABEL: test_punpckhbw: 6451 ; BTVER2: # %bb.0: 6452 ; BTVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50] 6453 ; BTVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] 6454 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6455 ; BTVER2-NEXT: retq # sched: [4:1.00] 6456 ; 6457 ; ZNVER1-LABEL: test_punpckhbw: 6458 ; ZNVER1: # %bb.0: 6459 ; ZNVER1-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.25] 6460 ; ZNVER1-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [8:0.50] 6461 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6462 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6463 %1 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a0, x86_mmx %a1) 6464 %2 = load x86_mmx, x86_mmx *%a2, align 8 6465 %3 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %1, x86_mmx %2) 6466 %4 = bitcast x86_mmx %3 to i64 6467 ret i64 %4 6468 } 6469 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone 6470 6471 define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6472 ; GENERIC-LABEL: test_punpckhdq: 6473 ; GENERIC: # %bb.0: 6474 ; GENERIC-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6475 ; GENERIC-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6476 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6477 ; GENERIC-NEXT: retq # sched: [1:1.00] 6478 ; 6479 ; ATOM-LABEL: test_punpckhdq: 6480 ; ATOM: # %bb.0: 6481 ; ATOM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50] 6482 ; ATOM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00] 6483 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6484 ; ATOM-NEXT: retq # sched: [79:39.50] 6485 ; 6486 ; SLM-LABEL: test_punpckhdq: 6487 ; SLM: # %bb.0: 6488 ; SLM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6489 ; SLM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [4:1.00] 6490 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6491 ; SLM-NEXT: retq # sched: [4:1.00] 6492 ; 6493 ; SANDY-LABEL: test_punpckhdq: 6494 ; SANDY: # %bb.0: 6495 ; SANDY-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6496 ; SANDY-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6497 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6498 ; SANDY-NEXT: retq # sched: [1:1.00] 6499 ; 6500 ; HASWELL-LABEL: test_punpckhdq: 6501 ; HASWELL: # %bb.0: 6502 ; HASWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6503 ; HASWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6504 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6505 ; HASWELL-NEXT: retq # sched: [7:1.00] 6506 ; 6507 ; BROADWELL-LABEL: test_punpckhdq: 6508 ; BROADWELL: # %bb.0: 6509 ; BROADWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6510 ; BROADWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6511 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6512 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6513 ; 6514 ; SKYLAKE-LABEL: test_punpckhdq: 6515 ; SKYLAKE: # %bb.0: 6516 ; SKYLAKE-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6517 ; SKYLAKE-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6518 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6519 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6520 ; 6521 ; SKX-LABEL: test_punpckhdq: 6522 ; SKX: # %bb.0: 6523 ; SKX-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] 6524 ; SKX-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6525 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6526 ; SKX-NEXT: retq # sched: [7:1.00] 6527 ; 6528 ; BTVER2-LABEL: test_punpckhdq: 6529 ; BTVER2: # %bb.0: 6530 ; BTVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50] 6531 ; BTVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] 6532 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6533 ; BTVER2-NEXT: retq # sched: [4:1.00] 6534 ; 6535 ; ZNVER1-LABEL: test_punpckhdq: 6536 ; ZNVER1: # %bb.0: 6537 ; ZNVER1-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.25] 6538 ; ZNVER1-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [8:0.50] 6539 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6540 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6541 %1 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a0, x86_mmx %a1) 6542 %2 = load x86_mmx, x86_mmx *%a2, align 8 6543 %3 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %1, x86_mmx %2) 6544 %4 = bitcast x86_mmx %3 to i64 6545 ret i64 %4 6546 } 6547 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone 6548 6549 define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6550 ; GENERIC-LABEL: test_punpckhwd: 6551 ; GENERIC: # %bb.0: 6552 ; GENERIC-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6553 ; GENERIC-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6554 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6555 ; GENERIC-NEXT: retq # sched: [1:1.00] 6556 ; 6557 ; ATOM-LABEL: test_punpckhwd: 6558 ; ATOM: # %bb.0: 6559 ; ATOM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] 6560 ; ATOM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] 6561 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6562 ; ATOM-NEXT: retq # sched: [79:39.50] 6563 ; 6564 ; SLM-LABEL: test_punpckhwd: 6565 ; SLM: # %bb.0: 6566 ; SLM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6567 ; SLM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00] 6568 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6569 ; SLM-NEXT: retq # sched: [4:1.00] 6570 ; 6571 ; SANDY-LABEL: test_punpckhwd: 6572 ; SANDY: # %bb.0: 6573 ; SANDY-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6574 ; SANDY-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6575 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6576 ; SANDY-NEXT: retq # sched: [1:1.00] 6577 ; 6578 ; HASWELL-LABEL: test_punpckhwd: 6579 ; HASWELL: # %bb.0: 6580 ; HASWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6581 ; HASWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6582 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6583 ; HASWELL-NEXT: retq # sched: [7:1.00] 6584 ; 6585 ; BROADWELL-LABEL: test_punpckhwd: 6586 ; BROADWELL: # %bb.0: 6587 ; BROADWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6588 ; BROADWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6589 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6590 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6591 ; 6592 ; SKYLAKE-LABEL: test_punpckhwd: 6593 ; SKYLAKE: # %bb.0: 6594 ; SKYLAKE-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6595 ; SKYLAKE-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6596 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6597 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6598 ; 6599 ; SKX-LABEL: test_punpckhwd: 6600 ; SKX: # %bb.0: 6601 ; SKX-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6602 ; SKX-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6603 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6604 ; SKX-NEXT: retq # sched: [7:1.00] 6605 ; 6606 ; BTVER2-LABEL: test_punpckhwd: 6607 ; BTVER2: # %bb.0: 6608 ; BTVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] 6609 ; BTVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6610 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6611 ; BTVER2-NEXT: retq # sched: [4:1.00] 6612 ; 6613 ; ZNVER1-LABEL: test_punpckhwd: 6614 ; ZNVER1: # %bb.0: 6615 ; ZNVER1-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25] 6616 ; ZNVER1-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50] 6617 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6618 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6619 %1 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a0, x86_mmx %a1) 6620 %2 = load x86_mmx, x86_mmx *%a2, align 8 6621 %3 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %1, x86_mmx %2) 6622 %4 = bitcast x86_mmx %3 to i64 6623 ret i64 %4 6624 } 6625 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone 6626 6627 define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6628 ; GENERIC-LABEL: test_punpcklbw: 6629 ; GENERIC: # %bb.0: 6630 ; GENERIC-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6631 ; GENERIC-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6632 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6633 ; GENERIC-NEXT: retq # sched: [1:1.00] 6634 ; 6635 ; ATOM-LABEL: test_punpcklbw: 6636 ; ATOM: # %bb.0: 6637 ; ATOM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6638 ; ATOM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00] 6639 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6640 ; ATOM-NEXT: retq # sched: [79:39.50] 6641 ; 6642 ; SLM-LABEL: test_punpcklbw: 6643 ; SLM: # %bb.0: 6644 ; SLM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6645 ; SLM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00] 6646 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6647 ; SLM-NEXT: retq # sched: [4:1.00] 6648 ; 6649 ; SANDY-LABEL: test_punpcklbw: 6650 ; SANDY: # %bb.0: 6651 ; SANDY-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6652 ; SANDY-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6653 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6654 ; SANDY-NEXT: retq # sched: [1:1.00] 6655 ; 6656 ; HASWELL-LABEL: test_punpcklbw: 6657 ; HASWELL: # %bb.0: 6658 ; HASWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6659 ; HASWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6660 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6661 ; HASWELL-NEXT: retq # sched: [7:1.00] 6662 ; 6663 ; BROADWELL-LABEL: test_punpcklbw: 6664 ; BROADWELL: # %bb.0: 6665 ; BROADWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6666 ; BROADWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6667 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6668 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6669 ; 6670 ; SKYLAKE-LABEL: test_punpcklbw: 6671 ; SKYLAKE: # %bb.0: 6672 ; SKYLAKE-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6673 ; SKYLAKE-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6674 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6675 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6676 ; 6677 ; SKX-LABEL: test_punpcklbw: 6678 ; SKX: # %bb.0: 6679 ; SKX-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] 6680 ; SKX-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6681 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6682 ; SKX-NEXT: retq # sched: [7:1.00] 6683 ; 6684 ; BTVER2-LABEL: test_punpcklbw: 6685 ; BTVER2: # %bb.0: 6686 ; BTVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] 6687 ; BTVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] 6688 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6689 ; BTVER2-NEXT: retq # sched: [4:1.00] 6690 ; 6691 ; ZNVER1-LABEL: test_punpcklbw: 6692 ; ZNVER1: # %bb.0: 6693 ; ZNVER1-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25] 6694 ; ZNVER1-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50] 6695 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6696 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6697 %1 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a0, x86_mmx %a1) 6698 %2 = load x86_mmx, x86_mmx *%a2, align 8 6699 %3 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %1, x86_mmx %2) 6700 %4 = bitcast x86_mmx %3 to i64 6701 ret i64 %4 6702 } 6703 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone 6704 6705 define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6706 ; GENERIC-LABEL: test_punpckldq: 6707 ; GENERIC: # %bb.0: 6708 ; GENERIC-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6709 ; GENERIC-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6710 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6711 ; GENERIC-NEXT: retq # sched: [1:1.00] 6712 ; 6713 ; ATOM-LABEL: test_punpckldq: 6714 ; ATOM: # %bb.0: 6715 ; ATOM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6716 ; ATOM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00] 6717 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6718 ; ATOM-NEXT: retq # sched: [79:39.50] 6719 ; 6720 ; SLM-LABEL: test_punpckldq: 6721 ; SLM: # %bb.0: 6722 ; SLM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6723 ; SLM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [4:1.00] 6724 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6725 ; SLM-NEXT: retq # sched: [4:1.00] 6726 ; 6727 ; SANDY-LABEL: test_punpckldq: 6728 ; SANDY: # %bb.0: 6729 ; SANDY-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6730 ; SANDY-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6731 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6732 ; SANDY-NEXT: retq # sched: [1:1.00] 6733 ; 6734 ; HASWELL-LABEL: test_punpckldq: 6735 ; HASWELL: # %bb.0: 6736 ; HASWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6737 ; HASWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6738 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6739 ; HASWELL-NEXT: retq # sched: [7:1.00] 6740 ; 6741 ; BROADWELL-LABEL: test_punpckldq: 6742 ; BROADWELL: # %bb.0: 6743 ; BROADWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6744 ; BROADWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6745 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6746 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6747 ; 6748 ; SKYLAKE-LABEL: test_punpckldq: 6749 ; SKYLAKE: # %bb.0: 6750 ; SKYLAKE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6751 ; SKYLAKE-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6752 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6753 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6754 ; 6755 ; SKX-LABEL: test_punpckldq: 6756 ; SKX: # %bb.0: 6757 ; SKX-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] 6758 ; SKX-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6759 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6760 ; SKX-NEXT: retq # sched: [7:1.00] 6761 ; 6762 ; BTVER2-LABEL: test_punpckldq: 6763 ; BTVER2: # %bb.0: 6764 ; BTVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50] 6765 ; BTVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] 6766 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6767 ; BTVER2-NEXT: retq # sched: [4:1.00] 6768 ; 6769 ; ZNVER1-LABEL: test_punpckldq: 6770 ; ZNVER1: # %bb.0: 6771 ; ZNVER1-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.25] 6772 ; ZNVER1-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [8:0.50] 6773 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6774 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6775 %1 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a0, x86_mmx %a1) 6776 %2 = load x86_mmx, x86_mmx *%a2, align 8 6777 %3 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %1, x86_mmx %2) 6778 %4 = bitcast x86_mmx %3 to i64 6779 ret i64 %4 6780 } 6781 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone 6782 6783 define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6784 ; GENERIC-LABEL: test_punpcklwd: 6785 ; GENERIC: # %bb.0: 6786 ; GENERIC-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6787 ; GENERIC-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6788 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6789 ; GENERIC-NEXT: retq # sched: [1:1.00] 6790 ; 6791 ; ATOM-LABEL: test_punpcklwd: 6792 ; ATOM: # %bb.0: 6793 ; ATOM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6794 ; ATOM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00] 6795 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6796 ; ATOM-NEXT: retq # sched: [79:39.50] 6797 ; 6798 ; SLM-LABEL: test_punpcklwd: 6799 ; SLM: # %bb.0: 6800 ; SLM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6801 ; SLM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [4:1.00] 6802 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6803 ; SLM-NEXT: retq # sched: [4:1.00] 6804 ; 6805 ; SANDY-LABEL: test_punpcklwd: 6806 ; SANDY: # %bb.0: 6807 ; SANDY-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6808 ; SANDY-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6809 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6810 ; SANDY-NEXT: retq # sched: [1:1.00] 6811 ; 6812 ; HASWELL-LABEL: test_punpcklwd: 6813 ; HASWELL: # %bb.0: 6814 ; HASWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6815 ; HASWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6816 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6817 ; HASWELL-NEXT: retq # sched: [7:1.00] 6818 ; 6819 ; BROADWELL-LABEL: test_punpcklwd: 6820 ; BROADWELL: # %bb.0: 6821 ; BROADWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6822 ; BROADWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6823 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6824 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6825 ; 6826 ; SKYLAKE-LABEL: test_punpcklwd: 6827 ; SKYLAKE: # %bb.0: 6828 ; SKYLAKE-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6829 ; SKYLAKE-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6830 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6831 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6832 ; 6833 ; SKX-LABEL: test_punpcklwd: 6834 ; SKX: # %bb.0: 6835 ; SKX-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] 6836 ; SKX-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6837 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6838 ; SKX-NEXT: retq # sched: [7:1.00] 6839 ; 6840 ; BTVER2-LABEL: test_punpcklwd: 6841 ; BTVER2: # %bb.0: 6842 ; BTVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50] 6843 ; BTVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] 6844 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6845 ; BTVER2-NEXT: retq # sched: [4:1.00] 6846 ; 6847 ; ZNVER1-LABEL: test_punpcklwd: 6848 ; ZNVER1: # %bb.0: 6849 ; ZNVER1-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.25] 6850 ; ZNVER1-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [8:0.50] 6851 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6852 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6853 %1 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a0, x86_mmx %a1) 6854 %2 = load x86_mmx, x86_mmx *%a2, align 8 6855 %3 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %1, x86_mmx %2) 6856 %4 = bitcast x86_mmx %3 to i64 6857 ret i64 %4 6858 } 6859 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone 6860 6861 define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { 6862 ; GENERIC-LABEL: test_pxor: 6863 ; GENERIC: # %bb.0: 6864 ; GENERIC-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] 6865 ; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6866 ; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] 6867 ; GENERIC-NEXT: retq # sched: [1:1.00] 6868 ; 6869 ; ATOM-LABEL: test_pxor: 6870 ; ATOM: # %bb.0: 6871 ; ATOM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6872 ; ATOM-NEXT: pxor (%rdi), %mm0 # sched: [1:1.00] 6873 ; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00] 6874 ; ATOM-NEXT: retq # sched: [79:39.50] 6875 ; 6876 ; SLM-LABEL: test_pxor: 6877 ; SLM: # %bb.0: 6878 ; SLM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6879 ; SLM-NEXT: pxor (%rdi), %mm0 # sched: [4:1.00] 6880 ; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50] 6881 ; SLM-NEXT: retq # sched: [4:1.00] 6882 ; 6883 ; SANDY-LABEL: test_pxor: 6884 ; SANDY: # %bb.0: 6885 ; SANDY-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] 6886 ; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6887 ; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] 6888 ; SANDY-NEXT: retq # sched: [1:1.00] 6889 ; 6890 ; HASWELL-LABEL: test_pxor: 6891 ; HASWELL: # %bb.0: 6892 ; HASWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] 6893 ; HASWELL-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6894 ; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6895 ; HASWELL-NEXT: retq # sched: [7:1.00] 6896 ; 6897 ; BROADWELL-LABEL: test_pxor: 6898 ; BROADWELL: # %bb.0: 6899 ; BROADWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] 6900 ; BROADWELL-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6901 ; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00] 6902 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6903 ; 6904 ; SKYLAKE-LABEL: test_pxor: 6905 ; SKYLAKE: # %bb.0: 6906 ; SKYLAKE-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6907 ; SKYLAKE-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6908 ; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00] 6909 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6910 ; 6911 ; SKX-LABEL: test_pxor: 6912 ; SKX: # %bb.0: 6913 ; SKX-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6914 ; SKX-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] 6915 ; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00] 6916 ; SKX-NEXT: retq # sched: [7:1.00] 6917 ; 6918 ; BTVER2-LABEL: test_pxor: 6919 ; BTVER2: # %bb.0: 6920 ; BTVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] 6921 ; BTVER2-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00] 6922 ; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] 6923 ; BTVER2-NEXT: retq # sched: [4:1.00] 6924 ; 6925 ; ZNVER1-LABEL: test_pxor: 6926 ; ZNVER1: # %bb.0: 6927 ; ZNVER1-NEXT: pxor %mm1, %mm0 # sched: [1:0.25] 6928 ; ZNVER1-NEXT: pxor (%rdi), %mm0 # sched: [8:0.50] 6929 ; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00] 6930 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6931 %1 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a0, x86_mmx %a1) 6932 %2 = load x86_mmx, x86_mmx *%a2, align 8 6933 %3 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %1, x86_mmx %2) 6934 %4 = bitcast x86_mmx %3 to i64 6935 ret i64 %4 6936 } 6937 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone 6938