1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefixes=CHECK,ATOM 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SLM 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE 10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL 11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE 12 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL 13 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE 14 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE 15 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE 16 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX 17 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE 18 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 19 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE 20 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 21 22 define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { 23 ; GENERIC-LABEL: test_pabsb: 24 ; GENERIC: # %bb.0: 25 ; GENERIC-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] 26 ; GENERIC-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] 27 ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 28 ; GENERIC-NEXT: retq # sched: [1:1.00] 29 ; 30 ; ATOM-LABEL: test_pabsb: 31 ; ATOM: # %bb.0: 32 ; ATOM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] 33 ; ATOM-NEXT: pabsb (%rdi), %xmm0 # sched: [1:1.00] 34 ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 35 ; ATOM-NEXT: nop # sched: [1:0.50] 36 ; ATOM-NEXT: nop # sched: [1:0.50] 37 ; ATOM-NEXT: retq # sched: [79:39.50] 38 ; 39 ; SLM-LABEL: test_pabsb: 40 ; SLM: # %bb.0: 41 ; SLM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] 42 ; SLM-NEXT: pabsb (%rdi), %xmm0 # sched: [4:1.00] 43 ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 44 ; SLM-NEXT: retq # sched: [4:1.00] 45 ; 46 ; SANDY-SSE-LABEL: test_pabsb: 47 ; SANDY-SSE: # %bb.0: 48 ; SANDY-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] 49 ; SANDY-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] 50 ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 51 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 52 ; 53 ; SANDY-LABEL: test_pabsb: 54 ; SANDY: # %bb.0: 55 ; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] 56 ; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] 57 ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 58 ; SANDY-NEXT: retq # sched: [1:1.00] 59 ; 60 ; HASWELL-SSE-LABEL: test_pabsb: 61 ; HASWELL-SSE: # %bb.0: 62 ; HASWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] 63 ; HASWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] 64 ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 65 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 66 ; 67 ; HASWELL-LABEL: test_pabsb: 68 ; HASWELL: # %bb.0: 69 ; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] 70 ; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] 71 ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 72 ; HASWELL-NEXT: retq # sched: [7:1.00] 73 ; 74 ; BROADWELL-SSE-LABEL: test_pabsb: 75 ; BROADWELL-SSE: # %bb.0: 76 ; BROADWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] 77 ; BROADWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:0.50] 78 ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 79 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 80 ; 81 ; BROADWELL-LABEL: test_pabsb: 82 ; BROADWELL: # %bb.0: 83 ; BROADWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] 84 ; BROADWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:0.50] 85 ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 86 ; BROADWELL-NEXT: retq # sched: [7:1.00] 87 ; 88 ; SKYLAKE-SSE-LABEL: test_pabsb: 89 ; SKYLAKE-SSE: # %bb.0: 90 ; SKYLAKE-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] 91 ; SKYLAKE-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] 92 ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 93 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 94 ; 95 ; SKYLAKE-LABEL: test_pabsb: 96 ; SKYLAKE: # %bb.0: 97 ; SKYLAKE-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] 98 ; SKYLAKE-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] 99 ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 100 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 101 ; 102 ; SKX-SSE-LABEL: test_pabsb: 103 ; SKX-SSE: # %bb.0: 104 ; SKX-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] 105 ; SKX-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] 106 ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 107 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 108 ; 109 ; SKX-LABEL: test_pabsb: 110 ; SKX: # %bb.0: 111 ; SKX-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] 112 ; SKX-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] 113 ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 114 ; SKX-NEXT: retq # sched: [7:1.00] 115 ; 116 ; BTVER2-SSE-LABEL: test_pabsb: 117 ; BTVER2-SSE: # %bb.0: 118 ; BTVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] 119 ; BTVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:1.00] 120 ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 121 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 122 ; 123 ; BTVER2-LABEL: test_pabsb: 124 ; BTVER2: # %bb.0: 125 ; BTVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:1.00] 126 ; BTVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] 127 ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 128 ; BTVER2-NEXT: retq # sched: [4:1.00] 129 ; 130 ; ZNVER1-SSE-LABEL: test_pabsb: 131 ; ZNVER1-SSE: # %bb.0: 132 ; ZNVER1-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.25] 133 ; ZNVER1-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [8:0.50] 134 ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] 135 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 136 ; 137 ; ZNVER1-LABEL: test_pabsb: 138 ; ZNVER1: # %bb.0: 139 ; ZNVER1-NEXT: vpabsb (%rdi), %xmm1 # sched: [8:0.50] 140 ; ZNVER1-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.25] 141 ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 142 ; ZNVER1-NEXT: retq # sched: [1:0.50] 143 %1 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) 144 %2 = load <16 x i8>, <16 x i8> *%a1, align 16 145 %3 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %2) 146 %4 = or <16 x i8> %1, %3 147 ret <16 x i8> %4 148 } 149 declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone 150 151 define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { 152 ; GENERIC-LABEL: test_pabsd: 153 ; GENERIC: # %bb.0: 154 ; GENERIC-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] 155 ; GENERIC-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] 156 ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 157 ; GENERIC-NEXT: retq # sched: [1:1.00] 158 ; 159 ; ATOM-LABEL: test_pabsd: 160 ; ATOM: # %bb.0: 161 ; ATOM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] 162 ; ATOM-NEXT: pabsd (%rdi), %xmm0 # sched: [1:1.00] 163 ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 164 ; ATOM-NEXT: nop # sched: [1:0.50] 165 ; ATOM-NEXT: nop # sched: [1:0.50] 166 ; ATOM-NEXT: retq # sched: [79:39.50] 167 ; 168 ; SLM-LABEL: test_pabsd: 169 ; SLM: # %bb.0: 170 ; SLM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] 171 ; SLM-NEXT: pabsd (%rdi), %xmm0 # sched: [4:1.00] 172 ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 173 ; SLM-NEXT: retq # sched: [4:1.00] 174 ; 175 ; SANDY-SSE-LABEL: test_pabsd: 176 ; SANDY-SSE: # %bb.0: 177 ; SANDY-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] 178 ; SANDY-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] 179 ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 180 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 181 ; 182 ; SANDY-LABEL: test_pabsd: 183 ; SANDY: # %bb.0: 184 ; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] 185 ; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] 186 ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 187 ; SANDY-NEXT: retq # sched: [1:1.00] 188 ; 189 ; HASWELL-SSE-LABEL: test_pabsd: 190 ; HASWELL-SSE: # %bb.0: 191 ; HASWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] 192 ; HASWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] 193 ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 194 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 195 ; 196 ; HASWELL-LABEL: test_pabsd: 197 ; HASWELL: # %bb.0: 198 ; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] 199 ; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] 200 ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 201 ; HASWELL-NEXT: retq # sched: [7:1.00] 202 ; 203 ; BROADWELL-SSE-LABEL: test_pabsd: 204 ; BROADWELL-SSE: # %bb.0: 205 ; BROADWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] 206 ; BROADWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:0.50] 207 ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 208 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 209 ; 210 ; BROADWELL-LABEL: test_pabsd: 211 ; BROADWELL: # %bb.0: 212 ; BROADWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] 213 ; BROADWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:0.50] 214 ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 215 ; BROADWELL-NEXT: retq # sched: [7:1.00] 216 ; 217 ; SKYLAKE-SSE-LABEL: test_pabsd: 218 ; SKYLAKE-SSE: # %bb.0: 219 ; SKYLAKE-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] 220 ; SKYLAKE-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] 221 ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 222 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 223 ; 224 ; SKYLAKE-LABEL: test_pabsd: 225 ; SKYLAKE: # %bb.0: 226 ; SKYLAKE-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] 227 ; SKYLAKE-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] 228 ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 229 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 230 ; 231 ; SKX-SSE-LABEL: test_pabsd: 232 ; SKX-SSE: # %bb.0: 233 ; SKX-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] 234 ; SKX-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] 235 ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 236 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 237 ; 238 ; SKX-LABEL: test_pabsd: 239 ; SKX: # %bb.0: 240 ; SKX-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] 241 ; SKX-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] 242 ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 243 ; SKX-NEXT: retq # sched: [7:1.00] 244 ; 245 ; BTVER2-SSE-LABEL: test_pabsd: 246 ; BTVER2-SSE: # %bb.0: 247 ; BTVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] 248 ; BTVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:1.00] 249 ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 250 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 251 ; 252 ; BTVER2-LABEL: test_pabsd: 253 ; BTVER2: # %bb.0: 254 ; BTVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:1.00] 255 ; BTVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] 256 ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 257 ; BTVER2-NEXT: retq # sched: [4:1.00] 258 ; 259 ; ZNVER1-SSE-LABEL: test_pabsd: 260 ; ZNVER1-SSE: # %bb.0: 261 ; ZNVER1-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.25] 262 ; ZNVER1-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [8:0.50] 263 ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] 264 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 265 ; 266 ; ZNVER1-LABEL: test_pabsd: 267 ; ZNVER1: # %bb.0: 268 ; ZNVER1-NEXT: vpabsd (%rdi), %xmm1 # sched: [8:0.50] 269 ; ZNVER1-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.25] 270 ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 271 ; ZNVER1-NEXT: retq # sched: [1:0.50] 272 %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) 273 %2 = load <4 x i32>, <4 x i32> *%a1, align 16 274 %3 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %2) 275 %4 = or <4 x i32> %1, %3 276 ret <4 x i32> %4 277 } 278 declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone 279 280 define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { 281 ; GENERIC-LABEL: test_pabsw: 282 ; GENERIC: # %bb.0: 283 ; GENERIC-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] 284 ; GENERIC-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] 285 ; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 286 ; GENERIC-NEXT: retq # sched: [1:1.00] 287 ; 288 ; ATOM-LABEL: test_pabsw: 289 ; ATOM: # %bb.0: 290 ; ATOM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] 291 ; ATOM-NEXT: pabsw (%rdi), %xmm0 # sched: [1:1.00] 292 ; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 293 ; ATOM-NEXT: nop # sched: [1:0.50] 294 ; ATOM-NEXT: nop # sched: [1:0.50] 295 ; ATOM-NEXT: retq # sched: [79:39.50] 296 ; 297 ; SLM-LABEL: test_pabsw: 298 ; SLM: # %bb.0: 299 ; SLM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] 300 ; SLM-NEXT: pabsw (%rdi), %xmm0 # sched: [4:1.00] 301 ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 302 ; SLM-NEXT: retq # sched: [4:1.00] 303 ; 304 ; SANDY-SSE-LABEL: test_pabsw: 305 ; SANDY-SSE: # %bb.0: 306 ; SANDY-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] 307 ; SANDY-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] 308 ; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 309 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 310 ; 311 ; SANDY-LABEL: test_pabsw: 312 ; SANDY: # %bb.0: 313 ; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] 314 ; SANDY-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] 315 ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 316 ; SANDY-NEXT: retq # sched: [1:1.00] 317 ; 318 ; HASWELL-SSE-LABEL: test_pabsw: 319 ; HASWELL-SSE: # %bb.0: 320 ; HASWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] 321 ; HASWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] 322 ; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 323 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 324 ; 325 ; HASWELL-LABEL: test_pabsw: 326 ; HASWELL: # %bb.0: 327 ; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] 328 ; HASWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] 329 ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 330 ; HASWELL-NEXT: retq # sched: [7:1.00] 331 ; 332 ; BROADWELL-SSE-LABEL: test_pabsw: 333 ; BROADWELL-SSE: # %bb.0: 334 ; BROADWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] 335 ; BROADWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:0.50] 336 ; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 337 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 338 ; 339 ; BROADWELL-LABEL: test_pabsw: 340 ; BROADWELL: # %bb.0: 341 ; BROADWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] 342 ; BROADWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:0.50] 343 ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 344 ; BROADWELL-NEXT: retq # sched: [7:1.00] 345 ; 346 ; SKYLAKE-SSE-LABEL: test_pabsw: 347 ; SKYLAKE-SSE: # %bb.0: 348 ; SKYLAKE-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] 349 ; SKYLAKE-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] 350 ; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 351 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 352 ; 353 ; SKYLAKE-LABEL: test_pabsw: 354 ; SKYLAKE: # %bb.0: 355 ; SKYLAKE-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] 356 ; SKYLAKE-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] 357 ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 358 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 359 ; 360 ; SKX-SSE-LABEL: test_pabsw: 361 ; SKX-SSE: # %bb.0: 362 ; SKX-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] 363 ; SKX-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] 364 ; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] 365 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 366 ; 367 ; SKX-LABEL: test_pabsw: 368 ; SKX: # %bb.0: 369 ; SKX-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] 370 ; SKX-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50] 371 ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 372 ; SKX-NEXT: retq # sched: [7:1.00] 373 ; 374 ; BTVER2-SSE-LABEL: test_pabsw: 375 ; BTVER2-SSE: # %bb.0: 376 ; BTVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] 377 ; BTVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:1.00] 378 ; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] 379 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 380 ; 381 ; BTVER2-LABEL: test_pabsw: 382 ; BTVER2: # %bb.0: 383 ; BTVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:1.00] 384 ; BTVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] 385 ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 386 ; BTVER2-NEXT: retq # sched: [4:1.00] 387 ; 388 ; ZNVER1-SSE-LABEL: test_pabsw: 389 ; ZNVER1-SSE: # %bb.0: 390 ; ZNVER1-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.25] 391 ; ZNVER1-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [8:0.50] 392 ; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] 393 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 394 ; 395 ; ZNVER1-LABEL: test_pabsw: 396 ; ZNVER1: # %bb.0: 397 ; ZNVER1-NEXT: vpabsw (%rdi), %xmm1 # sched: [8:0.50] 398 ; ZNVER1-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.25] 399 ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 400 ; ZNVER1-NEXT: retq # sched: [1:0.50] 401 %1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) 402 %2 = load <8 x i16>, <8 x i16> *%a1, align 16 403 %3 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %2) 404 %4 = or <8 x i16> %1, %3 405 ret <8 x i16> %4 406 } 407 declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone 408 409 define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 410 ; GENERIC-LABEL: test_palignr: 411 ; GENERIC: # %bb.0: 412 ; GENERIC-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] 413 ; GENERIC-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] 414 ; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 415 ; GENERIC-NEXT: retq # sched: [1:1.00] 416 ; 417 ; ATOM-LABEL: test_palignr: 418 ; ATOM: # %bb.0: 419 ; ATOM-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 420 ; ATOM-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] 421 ; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 422 ; ATOM-NEXT: nop # sched: [1:0.50] 423 ; ATOM-NEXT: nop # sched: [1:0.50] 424 ; ATOM-NEXT: retq # sched: [79:39.50] 425 ; 426 ; SLM-LABEL: test_palignr: 427 ; SLM: # %bb.0: 428 ; SLM-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 429 ; SLM-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [4:1.00] 430 ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 431 ; SLM-NEXT: retq # sched: [4:1.00] 432 ; 433 ; SANDY-SSE-LABEL: test_palignr: 434 ; SANDY-SSE: # %bb.0: 435 ; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] 436 ; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] 437 ; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 438 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 439 ; 440 ; SANDY-LABEL: test_palignr: 441 ; SANDY: # %bb.0: 442 ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] 443 ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] 444 ; SANDY-NEXT: retq # sched: [1:1.00] 445 ; 446 ; HASWELL-SSE-LABEL: test_palignr: 447 ; HASWELL-SSE: # %bb.0: 448 ; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 449 ; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] 450 ; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 451 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 452 ; 453 ; HASWELL-LABEL: test_palignr: 454 ; HASWELL: # %bb.0: 455 ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 456 ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] 457 ; HASWELL-NEXT: retq # sched: [7:1.00] 458 ; 459 ; BROADWELL-SSE-LABEL: test_palignr: 460 ; BROADWELL-SSE: # %bb.0: 461 ; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 462 ; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] 463 ; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 464 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 465 ; 466 ; BROADWELL-LABEL: test_palignr: 467 ; BROADWELL: # %bb.0: 468 ; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 469 ; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] 470 ; BROADWELL-NEXT: retq # sched: [7:1.00] 471 ; 472 ; SKYLAKE-SSE-LABEL: test_palignr: 473 ; SKYLAKE-SSE: # %bb.0: 474 ; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 475 ; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] 476 ; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 477 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 478 ; 479 ; SKYLAKE-LABEL: test_palignr: 480 ; SKYLAKE: # %bb.0: 481 ; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 482 ; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] 483 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 484 ; 485 ; SKX-SSE-LABEL: test_palignr: 486 ; SKX-SSE: # %bb.0: 487 ; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 488 ; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] 489 ; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] 490 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 491 ; 492 ; SKX-LABEL: test_palignr: 493 ; SKX: # %bb.0: 494 ; SKX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] 495 ; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] 496 ; SKX-NEXT: retq # sched: [7:1.00] 497 ; 498 ; BTVER2-SSE-LABEL: test_palignr: 499 ; BTVER2-SSE: # %bb.0: 500 ; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] 501 ; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] 502 ; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] 503 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 504 ; 505 ; BTVER2-LABEL: test_palignr: 506 ; BTVER2: # %bb.0: 507 ; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] 508 ; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] 509 ; BTVER2-NEXT: retq # sched: [4:1.00] 510 ; 511 ; ZNVER1-SSE-LABEL: test_palignr: 512 ; ZNVER1-SSE: # %bb.0: 513 ; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25] 514 ; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50] 515 ; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25] 516 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 517 ; 518 ; ZNVER1-LABEL: test_palignr: 519 ; ZNVER1: # %bb.0: 520 ; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25] 521 ; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50] 522 ; ZNVER1-NEXT: retq # sched: [1:0.50] 523 %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10> 524 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 525 %3 = shufflevector <8 x i16> %2, <8 x i16> %1, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 526 ret <8 x i16> %3 527 } 528 529 define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 530 ; GENERIC-LABEL: test_phaddd: 531 ; GENERIC: # %bb.0: 532 ; GENERIC-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] 533 ; GENERIC-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50] 534 ; GENERIC-NEXT: retq # sched: [1:1.00] 535 ; 536 ; ATOM-LABEL: test_phaddd: 537 ; ATOM: # %bb.0: 538 ; ATOM-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] 539 ; ATOM-NEXT: phaddd (%rdi), %xmm0 # sched: [4:2.00] 540 ; ATOM-NEXT: retq # sched: [79:39.50] 541 ; 542 ; SLM-LABEL: test_phaddd: 543 ; SLM: # %bb.0: 544 ; SLM-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50] 545 ; SLM-NEXT: phaddd (%rdi), %xmm0 # sched: [4:1.00] 546 ; SLM-NEXT: retq # sched: [4:1.00] 547 ; 548 ; SANDY-SSE-LABEL: test_phaddd: 549 ; SANDY-SSE: # %bb.0: 550 ; SANDY-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] 551 ; SANDY-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50] 552 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 553 ; 554 ; SANDY-LABEL: test_phaddd: 555 ; SANDY: # %bb.0: 556 ; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] 557 ; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] 558 ; SANDY-NEXT: retq # sched: [1:1.00] 559 ; 560 ; HASWELL-SSE-LABEL: test_phaddd: 561 ; HASWELL-SSE: # %bb.0: 562 ; HASWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] 563 ; HASWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] 564 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 565 ; 566 ; HASWELL-LABEL: test_phaddd: 567 ; HASWELL: # %bb.0: 568 ; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 569 ; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 570 ; HASWELL-NEXT: retq # sched: [7:1.00] 571 ; 572 ; BROADWELL-SSE-LABEL: test_phaddd: 573 ; BROADWELL-SSE: # %bb.0: 574 ; BROADWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] 575 ; BROADWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [8:2.00] 576 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 577 ; 578 ; BROADWELL-LABEL: test_phaddd: 579 ; BROADWELL: # %bb.0: 580 ; BROADWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 581 ; BROADWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 582 ; BROADWELL-NEXT: retq # sched: [7:1.00] 583 ; 584 ; SKYLAKE-SSE-LABEL: test_phaddd: 585 ; SKYLAKE-SSE: # %bb.0: 586 ; SKYLAKE-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] 587 ; SKYLAKE-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] 588 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 589 ; 590 ; SKYLAKE-LABEL: test_phaddd: 591 ; SKYLAKE: # %bb.0: 592 ; SKYLAKE-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 593 ; SKYLAKE-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 594 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 595 ; 596 ; SKX-SSE-LABEL: test_phaddd: 597 ; SKX-SSE: # %bb.0: 598 ; SKX-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] 599 ; SKX-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] 600 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 601 ; 602 ; SKX-LABEL: test_phaddd: 603 ; SKX: # %bb.0: 604 ; SKX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 605 ; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 606 ; SKX-NEXT: retq # sched: [7:1.00] 607 ; 608 ; BTVER2-SSE-LABEL: test_phaddd: 609 ; BTVER2-SSE: # %bb.0: 610 ; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50] 611 ; BTVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [6:1.00] 612 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 613 ; 614 ; BTVER2-LABEL: test_phaddd: 615 ; BTVER2: # %bb.0: 616 ; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 617 ; BTVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 618 ; BTVER2-NEXT: retq # sched: [4:1.00] 619 ; 620 ; ZNVER1-SSE-LABEL: test_phaddd: 621 ; ZNVER1-SSE: # %bb.0: 622 ; ZNVER1-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [100:0.25] 623 ; ZNVER1-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [100:0.25] 624 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 625 ; 626 ; ZNVER1-LABEL: test_phaddd: 627 ; ZNVER1: # %bb.0: 628 ; ZNVER1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 629 ; ZNVER1-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 630 ; ZNVER1-NEXT: retq # sched: [1:0.50] 631 %1 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) 632 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 633 %3 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %1, <4 x i32> %2) 634 ret <4 x i32> %3 635 } 636 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 637 638 define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 639 ; GENERIC-LABEL: test_phaddsw: 640 ; GENERIC: # %bb.0: 641 ; GENERIC-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50] 642 ; GENERIC-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50] 643 ; GENERIC-NEXT: retq # sched: [1:1.00] 644 ; 645 ; ATOM-LABEL: test_phaddsw: 646 ; ATOM: # %bb.0: 647 ; ATOM-NEXT: phaddsw %xmm1, %xmm0 # sched: [7:3.50] 648 ; ATOM-NEXT: phaddsw (%rdi), %xmm0 # sched: [8:4.00] 649 ; ATOM-NEXT: retq # sched: [79:39.50] 650 ; 651 ; SLM-LABEL: test_phaddsw: 652 ; SLM: # %bb.0: 653 ; SLM-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50] 654 ; SLM-NEXT: phaddsw (%rdi), %xmm0 # sched: [4:1.00] 655 ; SLM-NEXT: retq # sched: [4:1.00] 656 ; 657 ; SANDY-SSE-LABEL: test_phaddsw: 658 ; SANDY-SSE: # %bb.0: 659 ; SANDY-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50] 660 ; SANDY-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50] 661 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 662 ; 663 ; SANDY-LABEL: test_phaddsw: 664 ; SANDY: # %bb.0: 665 ; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] 666 ; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] 667 ; SANDY-NEXT: retq # sched: [1:1.00] 668 ; 669 ; HASWELL-SSE-LABEL: test_phaddsw: 670 ; HASWELL-SSE: # %bb.0: 671 ; HASWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] 672 ; HASWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] 673 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 674 ; 675 ; HASWELL-LABEL: test_phaddsw: 676 ; HASWELL: # %bb.0: 677 ; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 678 ; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 679 ; HASWELL-NEXT: retq # sched: [7:1.00] 680 ; 681 ; BROADWELL-SSE-LABEL: test_phaddsw: 682 ; BROADWELL-SSE: # %bb.0: 683 ; BROADWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] 684 ; BROADWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [8:2.00] 685 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 686 ; 687 ; BROADWELL-LABEL: test_phaddsw: 688 ; BROADWELL: # %bb.0: 689 ; BROADWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 690 ; BROADWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 691 ; BROADWELL-NEXT: retq # sched: [7:1.00] 692 ; 693 ; SKYLAKE-SSE-LABEL: test_phaddsw: 694 ; SKYLAKE-SSE: # %bb.0: 695 ; SKYLAKE-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] 696 ; SKYLAKE-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] 697 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 698 ; 699 ; SKYLAKE-LABEL: test_phaddsw: 700 ; SKYLAKE: # %bb.0: 701 ; SKYLAKE-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 702 ; SKYLAKE-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 703 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 704 ; 705 ; SKX-SSE-LABEL: test_phaddsw: 706 ; SKX-SSE: # %bb.0: 707 ; SKX-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] 708 ; SKX-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] 709 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 710 ; 711 ; SKX-LABEL: test_phaddsw: 712 ; SKX: # %bb.0: 713 ; SKX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 714 ; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 715 ; SKX-NEXT: retq # sched: [7:1.00] 716 ; 717 ; BTVER2-SSE-LABEL: test_phaddsw: 718 ; BTVER2-SSE: # %bb.0: 719 ; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50] 720 ; BTVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [6:1.00] 721 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 722 ; 723 ; BTVER2-LABEL: test_phaddsw: 724 ; BTVER2: # %bb.0: 725 ; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 726 ; BTVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 727 ; BTVER2-NEXT: retq # sched: [4:1.00] 728 ; 729 ; ZNVER1-SSE-LABEL: test_phaddsw: 730 ; ZNVER1-SSE: # %bb.0: 731 ; ZNVER1-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [100:0.25] 732 ; ZNVER1-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [100:0.25] 733 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 734 ; 735 ; ZNVER1-LABEL: test_phaddsw: 736 ; ZNVER1: # %bb.0: 737 ; ZNVER1-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 738 ; ZNVER1-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 739 ; ZNVER1-NEXT: retq # sched: [1:0.50] 740 %1 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) 741 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 742 %3 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %1, <8 x i16> %2) 743 ret <8 x i16> %3 744 } 745 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 746 747 define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 748 ; GENERIC-LABEL: test_phaddw: 749 ; GENERIC: # %bb.0: 750 ; GENERIC-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50] 751 ; GENERIC-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50] 752 ; GENERIC-NEXT: retq # sched: [1:1.00] 753 ; 754 ; ATOM-LABEL: test_phaddw: 755 ; ATOM: # %bb.0: 756 ; ATOM-NEXT: phaddw %xmm1, %xmm0 # sched: [7:3.50] 757 ; ATOM-NEXT: phaddw (%rdi), %xmm0 # sched: [8:4.00] 758 ; ATOM-NEXT: retq # sched: [79:39.50] 759 ; 760 ; SLM-LABEL: test_phaddw: 761 ; SLM: # %bb.0: 762 ; SLM-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50] 763 ; SLM-NEXT: phaddw (%rdi), %xmm0 # sched: [4:1.00] 764 ; SLM-NEXT: retq # sched: [4:1.00] 765 ; 766 ; SANDY-SSE-LABEL: test_phaddw: 767 ; SANDY-SSE: # %bb.0: 768 ; SANDY-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50] 769 ; SANDY-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50] 770 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 771 ; 772 ; SANDY-LABEL: test_phaddw: 773 ; SANDY: # %bb.0: 774 ; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] 775 ; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] 776 ; SANDY-NEXT: retq # sched: [1:1.00] 777 ; 778 ; HASWELL-SSE-LABEL: test_phaddw: 779 ; HASWELL-SSE: # %bb.0: 780 ; HASWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] 781 ; HASWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] 782 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 783 ; 784 ; HASWELL-LABEL: test_phaddw: 785 ; HASWELL: # %bb.0: 786 ; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 787 ; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 788 ; HASWELL-NEXT: retq # sched: [7:1.00] 789 ; 790 ; BROADWELL-SSE-LABEL: test_phaddw: 791 ; BROADWELL-SSE: # %bb.0: 792 ; BROADWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] 793 ; BROADWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [8:2.00] 794 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 795 ; 796 ; BROADWELL-LABEL: test_phaddw: 797 ; BROADWELL: # %bb.0: 798 ; BROADWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 799 ; BROADWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 800 ; BROADWELL-NEXT: retq # sched: [7:1.00] 801 ; 802 ; SKYLAKE-SSE-LABEL: test_phaddw: 803 ; SKYLAKE-SSE: # %bb.0: 804 ; SKYLAKE-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] 805 ; SKYLAKE-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] 806 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 807 ; 808 ; SKYLAKE-LABEL: test_phaddw: 809 ; SKYLAKE: # %bb.0: 810 ; SKYLAKE-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 811 ; SKYLAKE-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 812 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 813 ; 814 ; SKX-SSE-LABEL: test_phaddw: 815 ; SKX-SSE: # %bb.0: 816 ; SKX-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] 817 ; SKX-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] 818 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 819 ; 820 ; SKX-LABEL: test_phaddw: 821 ; SKX: # %bb.0: 822 ; SKX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 823 ; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 824 ; SKX-NEXT: retq # sched: [7:1.00] 825 ; 826 ; BTVER2-SSE-LABEL: test_phaddw: 827 ; BTVER2-SSE: # %bb.0: 828 ; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50] 829 ; BTVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [6:1.00] 830 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 831 ; 832 ; BTVER2-LABEL: test_phaddw: 833 ; BTVER2: # %bb.0: 834 ; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 835 ; BTVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 836 ; BTVER2-NEXT: retq # sched: [4:1.00] 837 ; 838 ; ZNVER1-SSE-LABEL: test_phaddw: 839 ; ZNVER1-SSE: # %bb.0: 840 ; ZNVER1-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [100:0.25] 841 ; ZNVER1-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [100:0.25] 842 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 843 ; 844 ; ZNVER1-LABEL: test_phaddw: 845 ; ZNVER1: # %bb.0: 846 ; ZNVER1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 847 ; ZNVER1-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 848 ; ZNVER1-NEXT: retq # sched: [1:0.50] 849 %1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) 850 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 851 %3 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %1, <8 x i16> %2) 852 ret <8 x i16> %3 853 } 854 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 855 856 define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 857 ; GENERIC-LABEL: test_phsubd: 858 ; GENERIC: # %bb.0: 859 ; GENERIC-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] 860 ; GENERIC-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50] 861 ; GENERIC-NEXT: retq # sched: [1:1.00] 862 ; 863 ; ATOM-LABEL: test_phsubd: 864 ; ATOM: # %bb.0: 865 ; ATOM-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] 866 ; ATOM-NEXT: phsubd (%rdi), %xmm0 # sched: [4:2.00] 867 ; ATOM-NEXT: retq # sched: [79:39.50] 868 ; 869 ; SLM-LABEL: test_phsubd: 870 ; SLM: # %bb.0: 871 ; SLM-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50] 872 ; SLM-NEXT: phsubd (%rdi), %xmm0 # sched: [4:1.00] 873 ; SLM-NEXT: retq # sched: [4:1.00] 874 ; 875 ; SANDY-SSE-LABEL: test_phsubd: 876 ; SANDY-SSE: # %bb.0: 877 ; SANDY-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] 878 ; SANDY-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50] 879 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 880 ; 881 ; SANDY-LABEL: test_phsubd: 882 ; SANDY: # %bb.0: 883 ; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] 884 ; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] 885 ; SANDY-NEXT: retq # sched: [1:1.00] 886 ; 887 ; HASWELL-SSE-LABEL: test_phsubd: 888 ; HASWELL-SSE: # %bb.0: 889 ; HASWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] 890 ; HASWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] 891 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 892 ; 893 ; HASWELL-LABEL: test_phsubd: 894 ; HASWELL: # %bb.0: 895 ; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 896 ; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 897 ; HASWELL-NEXT: retq # sched: [7:1.00] 898 ; 899 ; BROADWELL-SSE-LABEL: test_phsubd: 900 ; BROADWELL-SSE: # %bb.0: 901 ; BROADWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] 902 ; BROADWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [8:2.00] 903 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 904 ; 905 ; BROADWELL-LABEL: test_phsubd: 906 ; BROADWELL: # %bb.0: 907 ; BROADWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 908 ; BROADWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 909 ; BROADWELL-NEXT: retq # sched: [7:1.00] 910 ; 911 ; SKYLAKE-SSE-LABEL: test_phsubd: 912 ; SKYLAKE-SSE: # %bb.0: 913 ; SKYLAKE-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] 914 ; SKYLAKE-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] 915 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 916 ; 917 ; SKYLAKE-LABEL: test_phsubd: 918 ; SKYLAKE: # %bb.0: 919 ; SKYLAKE-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 920 ; SKYLAKE-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 921 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 922 ; 923 ; SKX-SSE-LABEL: test_phsubd: 924 ; SKX-SSE: # %bb.0: 925 ; SKX-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] 926 ; SKX-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] 927 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 928 ; 929 ; SKX-LABEL: test_phsubd: 930 ; SKX: # %bb.0: 931 ; SKX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 932 ; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 933 ; SKX-NEXT: retq # sched: [7:1.00] 934 ; 935 ; BTVER2-SSE-LABEL: test_phsubd: 936 ; BTVER2-SSE: # %bb.0: 937 ; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50] 938 ; BTVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [6:1.00] 939 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 940 ; 941 ; BTVER2-LABEL: test_phsubd: 942 ; BTVER2: # %bb.0: 943 ; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 944 ; BTVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 945 ; BTVER2-NEXT: retq # sched: [4:1.00] 946 ; 947 ; ZNVER1-SSE-LABEL: test_phsubd: 948 ; ZNVER1-SSE: # %bb.0: 949 ; ZNVER1-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [100:0.25] 950 ; ZNVER1-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [100:0.25] 951 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 952 ; 953 ; ZNVER1-LABEL: test_phsubd: 954 ; ZNVER1: # %bb.0: 955 ; ZNVER1-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 956 ; ZNVER1-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 957 ; ZNVER1-NEXT: retq # sched: [1:0.50] 958 %1 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) 959 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 960 %3 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %1, <4 x i32> %2) 961 ret <4 x i32> %3 962 } 963 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 964 965 define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 966 ; GENERIC-LABEL: test_phsubsw: 967 ; GENERIC: # %bb.0: 968 ; GENERIC-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50] 969 ; GENERIC-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50] 970 ; GENERIC-NEXT: retq # sched: [1:1.00] 971 ; 972 ; ATOM-LABEL: test_phsubsw: 973 ; ATOM: # %bb.0: 974 ; ATOM-NEXT: phsubsw %xmm1, %xmm0 # sched: [7:3.50] 975 ; ATOM-NEXT: phsubsw (%rdi), %xmm0 # sched: [8:4.00] 976 ; ATOM-NEXT: retq # sched: [79:39.50] 977 ; 978 ; SLM-LABEL: test_phsubsw: 979 ; SLM: # %bb.0: 980 ; SLM-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50] 981 ; SLM-NEXT: phsubsw (%rdi), %xmm0 # sched: [4:1.00] 982 ; SLM-NEXT: retq # sched: [4:1.00] 983 ; 984 ; SANDY-SSE-LABEL: test_phsubsw: 985 ; SANDY-SSE: # %bb.0: 986 ; SANDY-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50] 987 ; SANDY-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50] 988 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 989 ; 990 ; SANDY-LABEL: test_phsubsw: 991 ; SANDY: # %bb.0: 992 ; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] 993 ; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] 994 ; SANDY-NEXT: retq # sched: [1:1.00] 995 ; 996 ; HASWELL-SSE-LABEL: test_phsubsw: 997 ; HASWELL-SSE: # %bb.0: 998 ; HASWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] 999 ; HASWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] 1000 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1001 ; 1002 ; HASWELL-LABEL: test_phsubsw: 1003 ; HASWELL: # %bb.0: 1004 ; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 1005 ; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 1006 ; HASWELL-NEXT: retq # sched: [7:1.00] 1007 ; 1008 ; BROADWELL-SSE-LABEL: test_phsubsw: 1009 ; BROADWELL-SSE: # %bb.0: 1010 ; BROADWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] 1011 ; BROADWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [8:2.00] 1012 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1013 ; 1014 ; BROADWELL-LABEL: test_phsubsw: 1015 ; BROADWELL: # %bb.0: 1016 ; BROADWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 1017 ; BROADWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 1018 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1019 ; 1020 ; SKYLAKE-SSE-LABEL: test_phsubsw: 1021 ; SKYLAKE-SSE: # %bb.0: 1022 ; SKYLAKE-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] 1023 ; SKYLAKE-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] 1024 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1025 ; 1026 ; SKYLAKE-LABEL: test_phsubsw: 1027 ; SKYLAKE: # %bb.0: 1028 ; SKYLAKE-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 1029 ; SKYLAKE-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 1030 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1031 ; 1032 ; SKX-SSE-LABEL: test_phsubsw: 1033 ; SKX-SSE: # %bb.0: 1034 ; SKX-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] 1035 ; SKX-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] 1036 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1037 ; 1038 ; SKX-LABEL: test_phsubsw: 1039 ; SKX: # %bb.0: 1040 ; SKX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 1041 ; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 1042 ; SKX-NEXT: retq # sched: [7:1.00] 1043 ; 1044 ; BTVER2-SSE-LABEL: test_phsubsw: 1045 ; BTVER2-SSE: # %bb.0: 1046 ; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50] 1047 ; BTVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [6:1.00] 1048 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1049 ; 1050 ; BTVER2-LABEL: test_phsubsw: 1051 ; BTVER2: # %bb.0: 1052 ; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1053 ; BTVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 1054 ; BTVER2-NEXT: retq # sched: [4:1.00] 1055 ; 1056 ; ZNVER1-SSE-LABEL: test_phsubsw: 1057 ; ZNVER1-SSE: # %bb.0: 1058 ; ZNVER1-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [100:0.25] 1059 ; ZNVER1-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [100:0.25] 1060 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1061 ; 1062 ; ZNVER1-LABEL: test_phsubsw: 1063 ; ZNVER1: # %bb.0: 1064 ; ZNVER1-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 1065 ; ZNVER1-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 1066 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1067 %1 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) 1068 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 1069 %3 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %1, <8 x i16> %2) 1070 ret <8 x i16> %3 1071 } 1072 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1073 1074 define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 1075 ; GENERIC-LABEL: test_phsubw: 1076 ; GENERIC: # %bb.0: 1077 ; GENERIC-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50] 1078 ; GENERIC-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50] 1079 ; GENERIC-NEXT: retq # sched: [1:1.00] 1080 ; 1081 ; ATOM-LABEL: test_phsubw: 1082 ; ATOM: # %bb.0: 1083 ; ATOM-NEXT: phsubw %xmm1, %xmm0 # sched: [7:3.50] 1084 ; ATOM-NEXT: phsubw (%rdi), %xmm0 # sched: [8:4.00] 1085 ; ATOM-NEXT: retq # sched: [79:39.50] 1086 ; 1087 ; SLM-LABEL: test_phsubw: 1088 ; SLM: # %bb.0: 1089 ; SLM-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50] 1090 ; SLM-NEXT: phsubw (%rdi), %xmm0 # sched: [4:1.00] 1091 ; SLM-NEXT: retq # sched: [4:1.00] 1092 ; 1093 ; SANDY-SSE-LABEL: test_phsubw: 1094 ; SANDY-SSE: # %bb.0: 1095 ; SANDY-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50] 1096 ; SANDY-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50] 1097 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1098 ; 1099 ; SANDY-LABEL: test_phsubw: 1100 ; SANDY: # %bb.0: 1101 ; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] 1102 ; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] 1103 ; SANDY-NEXT: retq # sched: [1:1.00] 1104 ; 1105 ; HASWELL-SSE-LABEL: test_phsubw: 1106 ; HASWELL-SSE: # %bb.0: 1107 ; HASWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] 1108 ; HASWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] 1109 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1110 ; 1111 ; HASWELL-LABEL: test_phsubw: 1112 ; HASWELL: # %bb.0: 1113 ; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 1114 ; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 1115 ; HASWELL-NEXT: retq # sched: [7:1.00] 1116 ; 1117 ; BROADWELL-SSE-LABEL: test_phsubw: 1118 ; BROADWELL-SSE: # %bb.0: 1119 ; BROADWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] 1120 ; BROADWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [8:2.00] 1121 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1122 ; 1123 ; BROADWELL-LABEL: test_phsubw: 1124 ; BROADWELL: # %bb.0: 1125 ; BROADWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 1126 ; BROADWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 1127 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1128 ; 1129 ; SKYLAKE-SSE-LABEL: test_phsubw: 1130 ; SKYLAKE-SSE: # %bb.0: 1131 ; SKYLAKE-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] 1132 ; SKYLAKE-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] 1133 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1134 ; 1135 ; SKYLAKE-LABEL: test_phsubw: 1136 ; SKYLAKE: # %bb.0: 1137 ; SKYLAKE-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 1138 ; SKYLAKE-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 1139 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1140 ; 1141 ; SKX-SSE-LABEL: test_phsubw: 1142 ; SKX-SSE: # %bb.0: 1143 ; SKX-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] 1144 ; SKX-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] 1145 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1146 ; 1147 ; SKX-LABEL: test_phsubw: 1148 ; SKX: # %bb.0: 1149 ; SKX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 1150 ; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 1151 ; SKX-NEXT: retq # sched: [7:1.00] 1152 ; 1153 ; BTVER2-SSE-LABEL: test_phsubw: 1154 ; BTVER2-SSE: # %bb.0: 1155 ; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50] 1156 ; BTVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [6:1.00] 1157 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1158 ; 1159 ; BTVER2-LABEL: test_phsubw: 1160 ; BTVER2: # %bb.0: 1161 ; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1162 ; BTVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 1163 ; BTVER2-NEXT: retq # sched: [4:1.00] 1164 ; 1165 ; ZNVER1-SSE-LABEL: test_phsubw: 1166 ; ZNVER1-SSE: # %bb.0: 1167 ; ZNVER1-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [100:0.25] 1168 ; ZNVER1-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [100:0.25] 1169 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1170 ; 1171 ; ZNVER1-LABEL: test_phsubw: 1172 ; ZNVER1: # %bb.0: 1173 ; ZNVER1-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [100:0.25] 1174 ; ZNVER1-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [100:0.25] 1175 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1176 %1 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) 1177 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 1178 %3 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %1, <8 x i16> %2) 1179 ret <8 x i16> %3 1180 } 1181 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1182 1183 define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 1184 ; GENERIC-LABEL: test_pmaddubsw: 1185 ; GENERIC: # %bb.0: 1186 ; GENERIC-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] 1187 ; GENERIC-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] 1188 ; GENERIC-NEXT: retq # sched: [1:1.00] 1189 ; 1190 ; ATOM-LABEL: test_pmaddubsw: 1191 ; ATOM: # %bb.0: 1192 ; ATOM-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:5.00] 1193 ; ATOM-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [5:5.00] 1194 ; ATOM-NEXT: retq # sched: [79:39.50] 1195 ; 1196 ; SLM-LABEL: test_pmaddubsw: 1197 ; SLM: # %bb.0: 1198 ; SLM-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00] 1199 ; SLM-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00] 1200 ; SLM-NEXT: retq # sched: [4:1.00] 1201 ; 1202 ; SANDY-SSE-LABEL: test_pmaddubsw: 1203 ; SANDY-SSE: # %bb.0: 1204 ; SANDY-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] 1205 ; SANDY-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] 1206 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1207 ; 1208 ; SANDY-LABEL: test_pmaddubsw: 1209 ; SANDY: # %bb.0: 1210 ; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 1211 ; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 1212 ; SANDY-NEXT: retq # sched: [1:1.00] 1213 ; 1214 ; HASWELL-SSE-LABEL: test_pmaddubsw: 1215 ; HASWELL-SSE: # %bb.0: 1216 ; HASWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] 1217 ; HASWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] 1218 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1219 ; 1220 ; HASWELL-LABEL: test_pmaddubsw: 1221 ; HASWELL: # %bb.0: 1222 ; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 1223 ; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 1224 ; HASWELL-NEXT: retq # sched: [7:1.00] 1225 ; 1226 ; BROADWELL-SSE-LABEL: test_pmaddubsw: 1227 ; BROADWELL-SSE: # %bb.0: 1228 ; BROADWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] 1229 ; BROADWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:1.00] 1230 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1231 ; 1232 ; BROADWELL-LABEL: test_pmaddubsw: 1233 ; BROADWELL: # %bb.0: 1234 ; BROADWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 1235 ; BROADWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 1236 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1237 ; 1238 ; SKYLAKE-SSE-LABEL: test_pmaddubsw: 1239 ; SKYLAKE-SSE: # %bb.0: 1240 ; SKYLAKE-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50] 1241 ; SKYLAKE-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] 1242 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1243 ; 1244 ; SKYLAKE-LABEL: test_pmaddubsw: 1245 ; SKYLAKE: # %bb.0: 1246 ; SKYLAKE-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1247 ; SKYLAKE-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 1248 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1249 ; 1250 ; SKX-SSE-LABEL: test_pmaddubsw: 1251 ; SKX-SSE: # %bb.0: 1252 ; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50] 1253 ; SKX-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] 1254 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1255 ; 1256 ; SKX-LABEL: test_pmaddubsw: 1257 ; SKX: # %bb.0: 1258 ; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1259 ; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 1260 ; SKX-NEXT: retq # sched: [7:1.00] 1261 ; 1262 ; BTVER2-SSE-LABEL: test_pmaddubsw: 1263 ; BTVER2-SSE: # %bb.0: 1264 ; BTVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [2:1.00] 1265 ; BTVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00] 1266 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1267 ; 1268 ; BTVER2-LABEL: test_pmaddubsw: 1269 ; BTVER2: # %bb.0: 1270 ; BTVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 1271 ; BTVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 1272 ; BTVER2-NEXT: retq # sched: [4:1.00] 1273 ; 1274 ; ZNVER1-SSE-LABEL: test_pmaddubsw: 1275 ; ZNVER1-SSE: # %bb.0: 1276 ; ZNVER1-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00] 1277 ; ZNVER1-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] 1278 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1279 ; 1280 ; ZNVER1-LABEL: test_pmaddubsw: 1281 ; ZNVER1: # %bb.0: 1282 ; ZNVER1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] 1283 ; ZNVER1-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 1284 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1285 %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) 1286 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 1287 %3 = bitcast <8 x i16> %1 to <16 x i8> 1288 %4 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %3, <16 x i8> %2) 1289 ret <8 x i16> %4 1290 } 1291 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 1292 1293 define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 1294 ; GENERIC-LABEL: test_pmulhrsw: 1295 ; GENERIC: # %bb.0: 1296 ; GENERIC-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] 1297 ; GENERIC-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] 1298 ; GENERIC-NEXT: retq # sched: [1:1.00] 1299 ; 1300 ; ATOM-LABEL: test_pmulhrsw: 1301 ; ATOM: # %bb.0: 1302 ; ATOM-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:5.00] 1303 ; ATOM-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [5:5.00] 1304 ; ATOM-NEXT: retq # sched: [79:39.50] 1305 ; 1306 ; SLM-LABEL: test_pmulhrsw: 1307 ; SLM: # %bb.0: 1308 ; SLM-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00] 1309 ; SLM-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00] 1310 ; SLM-NEXT: retq # sched: [4:1.00] 1311 ; 1312 ; SANDY-SSE-LABEL: test_pmulhrsw: 1313 ; SANDY-SSE: # %bb.0: 1314 ; SANDY-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] 1315 ; SANDY-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] 1316 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1317 ; 1318 ; SANDY-LABEL: test_pmulhrsw: 1319 ; SANDY: # %bb.0: 1320 ; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 1321 ; SANDY-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 1322 ; SANDY-NEXT: retq # sched: [1:1.00] 1323 ; 1324 ; HASWELL-SSE-LABEL: test_pmulhrsw: 1325 ; HASWELL-SSE: # %bb.0: 1326 ; HASWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] 1327 ; HASWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] 1328 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1329 ; 1330 ; HASWELL-LABEL: test_pmulhrsw: 1331 ; HASWELL: # %bb.0: 1332 ; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 1333 ; HASWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 1334 ; HASWELL-NEXT: retq # sched: [7:1.00] 1335 ; 1336 ; BROADWELL-SSE-LABEL: test_pmulhrsw: 1337 ; BROADWELL-SSE: # %bb.0: 1338 ; BROADWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] 1339 ; BROADWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:1.00] 1340 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1341 ; 1342 ; BROADWELL-LABEL: test_pmulhrsw: 1343 ; BROADWELL: # %bb.0: 1344 ; BROADWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] 1345 ; BROADWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] 1346 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1347 ; 1348 ; SKYLAKE-SSE-LABEL: test_pmulhrsw: 1349 ; SKYLAKE-SSE: # %bb.0: 1350 ; SKYLAKE-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50] 1351 ; SKYLAKE-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] 1352 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1353 ; 1354 ; SKYLAKE-LABEL: test_pmulhrsw: 1355 ; SKYLAKE: # %bb.0: 1356 ; SKYLAKE-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1357 ; SKYLAKE-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 1358 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1359 ; 1360 ; SKX-SSE-LABEL: test_pmulhrsw: 1361 ; SKX-SSE: # %bb.0: 1362 ; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50] 1363 ; SKX-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] 1364 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1365 ; 1366 ; SKX-LABEL: test_pmulhrsw: 1367 ; SKX: # %bb.0: 1368 ; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] 1369 ; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] 1370 ; SKX-NEXT: retq # sched: [7:1.00] 1371 ; 1372 ; BTVER2-SSE-LABEL: test_pmulhrsw: 1373 ; BTVER2-SSE: # %bb.0: 1374 ; BTVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [2:1.00] 1375 ; BTVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00] 1376 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1377 ; 1378 ; BTVER2-LABEL: test_pmulhrsw: 1379 ; BTVER2: # %bb.0: 1380 ; BTVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] 1381 ; BTVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 1382 ; BTVER2-NEXT: retq # sched: [4:1.00] 1383 ; 1384 ; ZNVER1-SSE-LABEL: test_pmulhrsw: 1385 ; ZNVER1-SSE: # %bb.0: 1386 ; ZNVER1-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00] 1387 ; ZNVER1-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] 1388 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1389 ; 1390 ; ZNVER1-LABEL: test_pmulhrsw: 1391 ; ZNVER1: # %bb.0: 1392 ; ZNVER1-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] 1393 ; ZNVER1-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] 1394 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1395 %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) 1396 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 1397 %3 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %1, <8 x i16> %2) 1398 ret <8 x i16> %3 1399 } 1400 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 1401 1402 define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 1403 ; GENERIC-LABEL: test_pshufb: 1404 ; GENERIC: # %bb.0: 1405 ; GENERIC-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50] 1406 ; GENERIC-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50] 1407 ; GENERIC-NEXT: retq # sched: [1:1.00] 1408 ; 1409 ; ATOM-LABEL: test_pshufb: 1410 ; ATOM: # %bb.0: 1411 ; ATOM-NEXT: pshufb %xmm1, %xmm0 # sched: [4:2.00] 1412 ; ATOM-NEXT: pshufb (%rdi), %xmm0 # sched: [5:2.50] 1413 ; ATOM-NEXT: retq # sched: [79:39.50] 1414 ; 1415 ; SLM-LABEL: test_pshufb: 1416 ; SLM: # %bb.0: 1417 ; SLM-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] 1418 ; SLM-NEXT: pshufb (%rdi), %xmm0 # sched: [4:1.00] 1419 ; SLM-NEXT: retq # sched: [4:1.00] 1420 ; 1421 ; SANDY-SSE-LABEL: test_pshufb: 1422 ; SANDY-SSE: # %bb.0: 1423 ; SANDY-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50] 1424 ; SANDY-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50] 1425 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1426 ; 1427 ; SANDY-LABEL: test_pshufb: 1428 ; SANDY: # %bb.0: 1429 ; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1430 ; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1431 ; SANDY-NEXT: retq # sched: [1:1.00] 1432 ; 1433 ; HASWELL-SSE-LABEL: test_pshufb: 1434 ; HASWELL-SSE: # %bb.0: 1435 ; HASWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] 1436 ; HASWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] 1437 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1438 ; 1439 ; HASWELL-LABEL: test_pshufb: 1440 ; HASWELL: # %bb.0: 1441 ; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1442 ; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 1443 ; HASWELL-NEXT: retq # sched: [7:1.00] 1444 ; 1445 ; BROADWELL-SSE-LABEL: test_pshufb: 1446 ; BROADWELL-SSE: # %bb.0: 1447 ; BROADWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] 1448 ; BROADWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [6:1.00] 1449 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1450 ; 1451 ; BROADWELL-LABEL: test_pshufb: 1452 ; BROADWELL: # %bb.0: 1453 ; BROADWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1454 ; BROADWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 1455 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1456 ; 1457 ; SKYLAKE-SSE-LABEL: test_pshufb: 1458 ; SKYLAKE-SSE: # %bb.0: 1459 ; SKYLAKE-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] 1460 ; SKYLAKE-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] 1461 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1462 ; 1463 ; SKYLAKE-LABEL: test_pshufb: 1464 ; SKYLAKE: # %bb.0: 1465 ; SKYLAKE-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1466 ; SKYLAKE-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 1467 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1468 ; 1469 ; SKX-SSE-LABEL: test_pshufb: 1470 ; SKX-SSE: # %bb.0: 1471 ; SKX-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] 1472 ; SKX-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] 1473 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1474 ; 1475 ; SKX-LABEL: test_pshufb: 1476 ; SKX: # %bb.0: 1477 ; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 1478 ; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 1479 ; SKX-NEXT: retq # sched: [7:1.00] 1480 ; 1481 ; BTVER2-SSE-LABEL: test_pshufb: 1482 ; BTVER2-SSE: # %bb.0: 1483 ; BTVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [2:2.00] 1484 ; BTVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:2.00] 1485 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1486 ; 1487 ; BTVER2-LABEL: test_pshufb: 1488 ; BTVER2: # %bb.0: 1489 ; BTVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [2:2.00] 1490 ; BTVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:2.00] 1491 ; BTVER2-NEXT: retq # sched: [4:1.00] 1492 ; 1493 ; ZNVER1-SSE-LABEL: test_pshufb: 1494 ; ZNVER1-SSE: # %bb.0: 1495 ; ZNVER1-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.25] 1496 ; ZNVER1-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [8:0.50] 1497 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1498 ; 1499 ; ZNVER1-LABEL: test_pshufb: 1500 ; ZNVER1: # %bb.0: 1501 ; ZNVER1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1502 ; ZNVER1-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 1503 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1504 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) 1505 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 1506 %3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> %2) 1507 ret <16 x i8> %3 1508 } 1509 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1510 1511 define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { 1512 ; GENERIC-LABEL: test_psignb: 1513 ; GENERIC: # %bb.0: 1514 ; GENERIC-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] 1515 ; GENERIC-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] 1516 ; GENERIC-NEXT: retq # sched: [1:1.00] 1517 ; 1518 ; ATOM-LABEL: test_psignb: 1519 ; ATOM: # %bb.0: 1520 ; ATOM-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] 1521 ; ATOM-NEXT: psignb (%rdi), %xmm0 # sched: [1:1.00] 1522 ; ATOM-NEXT: nop # sched: [1:0.50] 1523 ; ATOM-NEXT: nop # sched: [1:0.50] 1524 ; ATOM-NEXT: nop # sched: [1:0.50] 1525 ; ATOM-NEXT: nop # sched: [1:0.50] 1526 ; ATOM-NEXT: retq # sched: [79:39.50] 1527 ; 1528 ; SLM-LABEL: test_psignb: 1529 ; SLM: # %bb.0: 1530 ; SLM-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] 1531 ; SLM-NEXT: psignb (%rdi), %xmm0 # sched: [4:1.00] 1532 ; SLM-NEXT: retq # sched: [4:1.00] 1533 ; 1534 ; SANDY-SSE-LABEL: test_psignb: 1535 ; SANDY-SSE: # %bb.0: 1536 ; SANDY-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] 1537 ; SANDY-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] 1538 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1539 ; 1540 ; SANDY-LABEL: test_psignb: 1541 ; SANDY: # %bb.0: 1542 ; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1543 ; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1544 ; SANDY-NEXT: retq # sched: [1:1.00] 1545 ; 1546 ; HASWELL-SSE-LABEL: test_psignb: 1547 ; HASWELL-SSE: # %bb.0: 1548 ; HASWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] 1549 ; HASWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] 1550 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1551 ; 1552 ; HASWELL-LABEL: test_psignb: 1553 ; HASWELL: # %bb.0: 1554 ; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1555 ; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1556 ; HASWELL-NEXT: retq # sched: [7:1.00] 1557 ; 1558 ; BROADWELL-SSE-LABEL: test_psignb: 1559 ; BROADWELL-SSE: # %bb.0: 1560 ; BROADWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] 1561 ; BROADWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:0.50] 1562 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1563 ; 1564 ; BROADWELL-LABEL: test_psignb: 1565 ; BROADWELL: # %bb.0: 1566 ; BROADWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1567 ; BROADWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 1568 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1569 ; 1570 ; SKYLAKE-SSE-LABEL: test_psignb: 1571 ; SKYLAKE-SSE: # %bb.0: 1572 ; SKYLAKE-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] 1573 ; SKYLAKE-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] 1574 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1575 ; 1576 ; SKYLAKE-LABEL: test_psignb: 1577 ; SKYLAKE: # %bb.0: 1578 ; SKYLAKE-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1579 ; SKYLAKE-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1580 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1581 ; 1582 ; SKX-SSE-LABEL: test_psignb: 1583 ; SKX-SSE: # %bb.0: 1584 ; SKX-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] 1585 ; SKX-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] 1586 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1587 ; 1588 ; SKX-LABEL: test_psignb: 1589 ; SKX: # %bb.0: 1590 ; SKX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1591 ; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1592 ; SKX-NEXT: retq # sched: [7:1.00] 1593 ; 1594 ; BTVER2-SSE-LABEL: test_psignb: 1595 ; BTVER2-SSE: # %bb.0: 1596 ; BTVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] 1597 ; BTVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:1.00] 1598 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1599 ; 1600 ; BTVER2-LABEL: test_psignb: 1601 ; BTVER2: # %bb.0: 1602 ; BTVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1603 ; BTVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 1604 ; BTVER2-NEXT: retq # sched: [4:1.00] 1605 ; 1606 ; ZNVER1-SSE-LABEL: test_psignb: 1607 ; ZNVER1-SSE: # %bb.0: 1608 ; ZNVER1-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.25] 1609 ; ZNVER1-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [8:0.50] 1610 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1611 ; 1612 ; ZNVER1-LABEL: test_psignb: 1613 ; ZNVER1: # %bb.0: 1614 ; ZNVER1-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1615 ; ZNVER1-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 1616 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1617 %1 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) 1618 %2 = load <16 x i8>, <16 x i8> *%a2, align 16 1619 %3 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %1, <16 x i8> %2) 1620 ret <16 x i8> %3 1621 } 1622 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 1623 1624 define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 1625 ; GENERIC-LABEL: test_psignd: 1626 ; GENERIC: # %bb.0: 1627 ; GENERIC-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] 1628 ; GENERIC-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] 1629 ; GENERIC-NEXT: retq # sched: [1:1.00] 1630 ; 1631 ; ATOM-LABEL: test_psignd: 1632 ; ATOM: # %bb.0: 1633 ; ATOM-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] 1634 ; ATOM-NEXT: psignd (%rdi), %xmm0 # sched: [1:1.00] 1635 ; ATOM-NEXT: nop # sched: [1:0.50] 1636 ; ATOM-NEXT: nop # sched: [1:0.50] 1637 ; ATOM-NEXT: nop # sched: [1:0.50] 1638 ; ATOM-NEXT: nop # sched: [1:0.50] 1639 ; ATOM-NEXT: retq # sched: [79:39.50] 1640 ; 1641 ; SLM-LABEL: test_psignd: 1642 ; SLM: # %bb.0: 1643 ; SLM-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] 1644 ; SLM-NEXT: psignd (%rdi), %xmm0 # sched: [4:1.00] 1645 ; SLM-NEXT: retq # sched: [4:1.00] 1646 ; 1647 ; SANDY-SSE-LABEL: test_psignd: 1648 ; SANDY-SSE: # %bb.0: 1649 ; SANDY-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] 1650 ; SANDY-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] 1651 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1652 ; 1653 ; SANDY-LABEL: test_psignd: 1654 ; SANDY: # %bb.0: 1655 ; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1656 ; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1657 ; SANDY-NEXT: retq # sched: [1:1.00] 1658 ; 1659 ; HASWELL-SSE-LABEL: test_psignd: 1660 ; HASWELL-SSE: # %bb.0: 1661 ; HASWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] 1662 ; HASWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] 1663 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1664 ; 1665 ; HASWELL-LABEL: test_psignd: 1666 ; HASWELL: # %bb.0: 1667 ; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1668 ; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1669 ; HASWELL-NEXT: retq # sched: [7:1.00] 1670 ; 1671 ; BROADWELL-SSE-LABEL: test_psignd: 1672 ; BROADWELL-SSE: # %bb.0: 1673 ; BROADWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] 1674 ; BROADWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:0.50] 1675 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1676 ; 1677 ; BROADWELL-LABEL: test_psignd: 1678 ; BROADWELL: # %bb.0: 1679 ; BROADWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1680 ; BROADWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 1681 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1682 ; 1683 ; SKYLAKE-SSE-LABEL: test_psignd: 1684 ; SKYLAKE-SSE: # %bb.0: 1685 ; SKYLAKE-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] 1686 ; SKYLAKE-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] 1687 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1688 ; 1689 ; SKYLAKE-LABEL: test_psignd: 1690 ; SKYLAKE: # %bb.0: 1691 ; SKYLAKE-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1692 ; SKYLAKE-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1693 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1694 ; 1695 ; SKX-SSE-LABEL: test_psignd: 1696 ; SKX-SSE: # %bb.0: 1697 ; SKX-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] 1698 ; SKX-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] 1699 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1700 ; 1701 ; SKX-LABEL: test_psignd: 1702 ; SKX: # %bb.0: 1703 ; SKX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1704 ; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1705 ; SKX-NEXT: retq # sched: [7:1.00] 1706 ; 1707 ; BTVER2-SSE-LABEL: test_psignd: 1708 ; BTVER2-SSE: # %bb.0: 1709 ; BTVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] 1710 ; BTVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:1.00] 1711 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1712 ; 1713 ; BTVER2-LABEL: test_psignd: 1714 ; BTVER2: # %bb.0: 1715 ; BTVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1716 ; BTVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 1717 ; BTVER2-NEXT: retq # sched: [4:1.00] 1718 ; 1719 ; ZNVER1-SSE-LABEL: test_psignd: 1720 ; ZNVER1-SSE: # %bb.0: 1721 ; ZNVER1-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.25] 1722 ; ZNVER1-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [8:0.50] 1723 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1724 ; 1725 ; ZNVER1-LABEL: test_psignd: 1726 ; ZNVER1: # %bb.0: 1727 ; ZNVER1-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1728 ; ZNVER1-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 1729 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1730 %1 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) 1731 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 1732 %3 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %1, <4 x i32> %2) 1733 ret <4 x i32> %3 1734 } 1735 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 1736 1737 define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 1738 ; GENERIC-LABEL: test_psignw: 1739 ; GENERIC: # %bb.0: 1740 ; GENERIC-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] 1741 ; GENERIC-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] 1742 ; GENERIC-NEXT: retq # sched: [1:1.00] 1743 ; 1744 ; ATOM-LABEL: test_psignw: 1745 ; ATOM: # %bb.0: 1746 ; ATOM-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] 1747 ; ATOM-NEXT: psignw (%rdi), %xmm0 # sched: [1:1.00] 1748 ; ATOM-NEXT: nop # sched: [1:0.50] 1749 ; ATOM-NEXT: nop # sched: [1:0.50] 1750 ; ATOM-NEXT: nop # sched: [1:0.50] 1751 ; ATOM-NEXT: nop # sched: [1:0.50] 1752 ; ATOM-NEXT: retq # sched: [79:39.50] 1753 ; 1754 ; SLM-LABEL: test_psignw: 1755 ; SLM: # %bb.0: 1756 ; SLM-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] 1757 ; SLM-NEXT: psignw (%rdi), %xmm0 # sched: [4:1.00] 1758 ; SLM-NEXT: retq # sched: [4:1.00] 1759 ; 1760 ; SANDY-SSE-LABEL: test_psignw: 1761 ; SANDY-SSE: # %bb.0: 1762 ; SANDY-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] 1763 ; SANDY-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] 1764 ; SANDY-SSE-NEXT: retq # sched: [1:1.00] 1765 ; 1766 ; SANDY-LABEL: test_psignw: 1767 ; SANDY: # %bb.0: 1768 ; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1769 ; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1770 ; SANDY-NEXT: retq # sched: [1:1.00] 1771 ; 1772 ; HASWELL-SSE-LABEL: test_psignw: 1773 ; HASWELL-SSE: # %bb.0: 1774 ; HASWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] 1775 ; HASWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] 1776 ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] 1777 ; 1778 ; HASWELL-LABEL: test_psignw: 1779 ; HASWELL: # %bb.0: 1780 ; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1781 ; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1782 ; HASWELL-NEXT: retq # sched: [7:1.00] 1783 ; 1784 ; BROADWELL-SSE-LABEL: test_psignw: 1785 ; BROADWELL-SSE: # %bb.0: 1786 ; BROADWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] 1787 ; BROADWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:0.50] 1788 ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] 1789 ; 1790 ; BROADWELL-LABEL: test_psignw: 1791 ; BROADWELL: # %bb.0: 1792 ; BROADWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1793 ; BROADWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] 1794 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1795 ; 1796 ; SKYLAKE-SSE-LABEL: test_psignw: 1797 ; SKYLAKE-SSE: # %bb.0: 1798 ; SKYLAKE-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] 1799 ; SKYLAKE-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] 1800 ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] 1801 ; 1802 ; SKYLAKE-LABEL: test_psignw: 1803 ; SKYLAKE: # %bb.0: 1804 ; SKYLAKE-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1805 ; SKYLAKE-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1806 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1807 ; 1808 ; SKX-SSE-LABEL: test_psignw: 1809 ; SKX-SSE: # %bb.0: 1810 ; SKX-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] 1811 ; SKX-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] 1812 ; SKX-SSE-NEXT: retq # sched: [7:1.00] 1813 ; 1814 ; SKX-LABEL: test_psignw: 1815 ; SKX: # %bb.0: 1816 ; SKX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1817 ; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 1818 ; SKX-NEXT: retq # sched: [7:1.00] 1819 ; 1820 ; BTVER2-SSE-LABEL: test_psignw: 1821 ; BTVER2-SSE: # %bb.0: 1822 ; BTVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] 1823 ; BTVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:1.00] 1824 ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] 1825 ; 1826 ; BTVER2-LABEL: test_psignw: 1827 ; BTVER2: # %bb.0: 1828 ; BTVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1829 ; BTVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 1830 ; BTVER2-NEXT: retq # sched: [4:1.00] 1831 ; 1832 ; ZNVER1-SSE-LABEL: test_psignw: 1833 ; ZNVER1-SSE: # %bb.0: 1834 ; ZNVER1-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.25] 1835 ; ZNVER1-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [8:0.50] 1836 ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] 1837 ; 1838 ; ZNVER1-LABEL: test_psignw: 1839 ; ZNVER1: # %bb.0: 1840 ; ZNVER1-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1841 ; ZNVER1-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 1842 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1843 %1 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) 1844 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 1845 %3 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %1, <8 x i16> %2) 1846 ret <8 x i16> %3 1847 } 1848 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 1849