1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 8 9 define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) { 10 ; GENERIC-LABEL: test_broadcasti128: 11 ; GENERIC: # %bb.0: 12 ; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:1.00] 13 ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 14 ; GENERIC-NEXT: retq # sched: [1:1.00] 15 ; 16 ; HASWELL-LABEL: test_broadcasti128: 17 ; HASWELL: # %bb.0: 18 ; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] 19 ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 20 ; HASWELL-NEXT: retq # sched: [7:1.00] 21 ; 22 ; BROADWELL-LABEL: test_broadcasti128: 23 ; BROADWELL: # %bb.0: 24 ; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50] 25 ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 26 ; BROADWELL-NEXT: retq # sched: [7:1.00] 27 ; 28 ; SKYLAKE-LABEL: test_broadcasti128: 29 ; SKYLAKE: # %bb.0: 30 ; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] 31 ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 32 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 33 ; 34 ; SKX-LABEL: test_broadcasti128: 35 ; SKX: # %bb.0: 36 ; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50] 37 ; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 38 ; SKX-NEXT: retq # sched: [7:1.00] 39 ; 40 ; ZNVER1-LABEL: test_broadcasti128: 41 ; ZNVER1: # %bb.0: 42 ; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50] 43 ; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 44 ; ZNVER1-NEXT: retq # sched: [1:0.50] 45 %1 = load <4 x i32>, <4 x i32> *%a1, align 16 46 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 47 %3 = add <8 x i32> %2, %a0 48 ret <8 x i32> %3 49 } 50 51 define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) { 52 ; GENERIC-LABEL: test_broadcastsd_ymm: 53 ; GENERIC: # %bb.0: 54 ; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00] 55 ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 56 ; GENERIC-NEXT: retq # sched: [1:1.00] 57 ; 58 ; HASWELL-LABEL: test_broadcastsd_ymm: 59 ; HASWELL: # %bb.0: 60 ; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] 61 ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 62 ; HASWELL-NEXT: retq # sched: [7:1.00] 63 ; 64 ; BROADWELL-LABEL: test_broadcastsd_ymm: 65 ; BROADWELL: # %bb.0: 66 ; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] 67 ; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 68 ; BROADWELL-NEXT: retq # sched: [7:1.00] 69 ; 70 ; SKYLAKE-LABEL: test_broadcastsd_ymm: 71 ; SKYLAKE: # %bb.0: 72 ; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] 73 ; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 74 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 75 ; 76 ; SKX-LABEL: test_broadcastsd_ymm: 77 ; SKX: # %bb.0: 78 ; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] 79 ; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 80 ; SKX-NEXT: retq # sched: [7:1.00] 81 ; 82 ; ZNVER1-LABEL: test_broadcastsd_ymm: 83 ; ZNVER1: # %bb.0: 84 ; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25] 85 ; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 86 ; ZNVER1-NEXT: retq # sched: [1:0.50] 87 %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer 88 %2 = fadd <4 x double> %1, %1 89 ret <4 x double> %2 90 } 91 92 define <4 x float> @test_broadcastss(<4 x float> %a0) { 93 ; GENERIC-LABEL: test_broadcastss: 94 ; GENERIC: # %bb.0: 95 ; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 96 ; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 97 ; GENERIC-NEXT: retq # sched: [1:1.00] 98 ; 99 ; HASWELL-LABEL: test_broadcastss: 100 ; HASWELL: # %bb.0: 101 ; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 102 ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 103 ; HASWELL-NEXT: retq # sched: [7:1.00] 104 ; 105 ; BROADWELL-LABEL: test_broadcastss: 106 ; BROADWELL: # %bb.0: 107 ; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 108 ; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 109 ; BROADWELL-NEXT: retq # sched: [7:1.00] 110 ; 111 ; SKYLAKE-LABEL: test_broadcastss: 112 ; SKYLAKE: # %bb.0: 113 ; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 114 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 115 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 116 ; 117 ; SKX-LABEL: test_broadcastss: 118 ; SKX: # %bb.0: 119 ; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] 120 ; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] 121 ; SKX-NEXT: retq # sched: [7:1.00] 122 ; 123 ; ZNVER1-LABEL: test_broadcastss: 124 ; ZNVER1: # %bb.0: 125 ; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50] 126 ; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] 127 ; ZNVER1-NEXT: retq # sched: [1:0.50] 128 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer 129 %2 = fadd <4 x float> %1, %1 130 ret <4 x float> %2 131 } 132 133 define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) { 134 ; GENERIC-LABEL: test_broadcastss_ymm: 135 ; GENERIC: # %bb.0: 136 ; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00] 137 ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 138 ; GENERIC-NEXT: retq # sched: [1:1.00] 139 ; 140 ; HASWELL-LABEL: test_broadcastss_ymm: 141 ; HASWELL: # %bb.0: 142 ; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] 143 ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 144 ; HASWELL-NEXT: retq # sched: [7:1.00] 145 ; 146 ; BROADWELL-LABEL: test_broadcastss_ymm: 147 ; BROADWELL: # %bb.0: 148 ; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] 149 ; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 150 ; BROADWELL-NEXT: retq # sched: [7:1.00] 151 ; 152 ; SKYLAKE-LABEL: test_broadcastss_ymm: 153 ; SKYLAKE: # %bb.0: 154 ; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] 155 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 156 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 157 ; 158 ; SKX-LABEL: test_broadcastss_ymm: 159 ; SKX: # %bb.0: 160 ; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] 161 ; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] 162 ; SKX-NEXT: retq # sched: [7:1.00] 163 ; 164 ; ZNVER1-LABEL: test_broadcastss_ymm: 165 ; ZNVER1: # %bb.0: 166 ; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25] 167 ; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] 168 ; ZNVER1-NEXT: retq # sched: [1:0.50] 169 %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer 170 %2 = fadd <8 x float> %1, %1 171 ret <8 x float> %2 172 } 173 174 define <4 x i32> @test_extracti128(<8 x i32> %a0, <8 x i32> %a1, <4 x i32> *%a2) { 175 ; GENERIC-LABEL: test_extracti128: 176 ; GENERIC: # %bb.0: 177 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] 178 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 179 ; GENERIC-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [1:1.00] 180 ; GENERIC-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 181 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 182 ; GENERIC-NEXT: retq # sched: [1:1.00] 183 ; 184 ; HASWELL-LABEL: test_extracti128: 185 ; HASWELL: # %bb.0: 186 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] 187 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 188 ; HASWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] 189 ; HASWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 190 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 191 ; HASWELL-NEXT: retq # sched: [7:1.00] 192 ; 193 ; BROADWELL-LABEL: test_extracti128: 194 ; BROADWELL: # %bb.0: 195 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.50] 196 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 197 ; BROADWELL-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] 198 ; BROADWELL-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 199 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 200 ; BROADWELL-NEXT: retq # sched: [7:1.00] 201 ; 202 ; SKYLAKE-LABEL: test_extracti128: 203 ; SKYLAKE: # %bb.0: 204 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33] 205 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 206 ; SKYLAKE-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] 207 ; SKYLAKE-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 208 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 209 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 210 ; 211 ; SKX-LABEL: test_extracti128: 212 ; SKX: # %bb.0: 213 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.33] 214 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 215 ; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [3:1.00] 216 ; SKX-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:1.00] 217 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 218 ; SKX-NEXT: retq # sched: [7:1.00] 219 ; 220 ; ZNVER1-LABEL: test_extracti128: 221 ; ZNVER1: # %bb.0: 222 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm2 # sched: [1:0.25] 223 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 224 ; ZNVER1-NEXT: vextracti128 $1, %ymm0, %xmm0 # sched: [2:0.25] 225 ; ZNVER1-NEXT: vextracti128 $1, %ymm2, (%rdi) # sched: [1:0.50] 226 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 227 ; ZNVER1-NEXT: retq # sched: [1:0.50] 228 %1 = add <8 x i32> %a0, %a1 229 %2 = sub <8 x i32> %a0, %a1 230 %3 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 231 %4 = shufflevector <8 x i32> %2, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 232 store <4 x i32> %3, <4 x i32> *%a2 233 ret <4 x i32> %4 234 } 235 236 define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) { 237 ; GENERIC-LABEL: test_gatherdpd: 238 ; GENERIC: # %bb.0: 239 ; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 240 ; GENERIC-NEXT: retq # sched: [1:1.00] 241 ; 242 ; HASWELL-LABEL: test_gatherdpd: 243 ; HASWELL: # %bb.0: 244 ; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67] 245 ; HASWELL-NEXT: retq # sched: [7:1.00] 246 ; 247 ; BROADWELL-LABEL: test_gatherdpd: 248 ; BROADWELL: # %bb.0: 249 ; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00] 250 ; BROADWELL-NEXT: retq # sched: [7:1.00] 251 ; 252 ; SKYLAKE-LABEL: test_gatherdpd: 253 ; SKYLAKE: # %bb.0: 254 ; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 255 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 256 ; 257 ; SKX-LABEL: test_gatherdpd: 258 ; SKX: # %bb.0: 259 ; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 260 ; SKX-NEXT: retq # sched: [7:1.00] 261 ; 262 ; ZNVER1-LABEL: test_gatherdpd: 263 ; ZNVER1: # %bb.0: 264 ; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 265 ; ZNVER1-NEXT: retq # sched: [1:0.50] 266 %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2) 267 ret <2 x double> %1 268 } 269 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly 270 271 define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) { 272 ; GENERIC-LABEL: test_gatherdpd_ymm: 273 ; GENERIC: # %bb.0: 274 ; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [5:0.50] 275 ; GENERIC-NEXT: retq # sched: [1:1.00] 276 ; 277 ; HASWELL-LABEL: test_gatherdpd_ymm: 278 ; HASWELL: # %bb.0: 279 ; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [27:4.00] 280 ; HASWELL-NEXT: retq # sched: [7:1.00] 281 ; 282 ; BROADWELL-LABEL: test_gatherdpd_ymm: 283 ; BROADWELL: # %bb.0: 284 ; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00] 285 ; BROADWELL-NEXT: retq # sched: [7:1.00] 286 ; 287 ; SKYLAKE-LABEL: test_gatherdpd_ymm: 288 ; SKYLAKE: # %bb.0: 289 ; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00] 290 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 291 ; 292 ; SKX-LABEL: test_gatherdpd_ymm: 293 ; SKX: # %bb.0: 294 ; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00] 295 ; SKX-NEXT: retq # sched: [7:1.00] 296 ; 297 ; ZNVER1-LABEL: test_gatherdpd_ymm: 298 ; ZNVER1: # %bb.0: 299 ; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:0.25] 300 ; ZNVER1-NEXT: retq # sched: [1:0.50] 301 %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8) 302 ret <4 x double> %1 303 } 304 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly 305 306 define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) { 307 ; GENERIC-LABEL: test_gatherdps: 308 ; GENERIC: # %bb.0: 309 ; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 310 ; GENERIC-NEXT: retq # sched: [1:1.00] 311 ; 312 ; HASWELL-LABEL: test_gatherdps: 313 ; HASWELL: # %bb.0: 314 ; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67] 315 ; HASWELL-NEXT: retq # sched: [7:1.00] 316 ; 317 ; BROADWELL-LABEL: test_gatherdps: 318 ; BROADWELL: # %bb.0: 319 ; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00] 320 ; BROADWELL-NEXT: retq # sched: [7:1.00] 321 ; 322 ; SKYLAKE-LABEL: test_gatherdps: 323 ; SKYLAKE: # %bb.0: 324 ; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 325 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 326 ; 327 ; SKX-LABEL: test_gatherdps: 328 ; SKX: # %bb.0: 329 ; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 330 ; SKX-NEXT: retq # sched: [7:1.00] 331 ; 332 ; ZNVER1-LABEL: test_gatherdps: 333 ; ZNVER1: # %bb.0: 334 ; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 335 ; ZNVER1-NEXT: retq # sched: [1:0.50] 336 %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2) 337 ret <4 x float> %1 338 } 339 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly 340 341 define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) { 342 ; GENERIC-LABEL: test_gatherdps_ymm: 343 ; GENERIC: # %bb.0: 344 ; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [5:0.50] 345 ; GENERIC-NEXT: retq # sched: [1:1.00] 346 ; 347 ; HASWELL-LABEL: test_gatherdps_ymm: 348 ; HASWELL: # %bb.0: 349 ; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [27:6.50] 350 ; HASWELL-NEXT: retq # sched: [7:1.00] 351 ; 352 ; BROADWELL-LABEL: test_gatherdps_ymm: 353 ; BROADWELL: # %bb.0: 354 ; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00] 355 ; BROADWELL-NEXT: retq # sched: [7:1.00] 356 ; 357 ; SKYLAKE-LABEL: test_gatherdps_ymm: 358 ; SKYLAKE: # %bb.0: 359 ; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00] 360 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 361 ; 362 ; SKX-LABEL: test_gatherdps_ymm: 363 ; SKX: # %bb.0: 364 ; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00] 365 ; SKX-NEXT: retq # sched: [7:1.00] 366 ; 367 ; ZNVER1-LABEL: test_gatherdps_ymm: 368 ; ZNVER1: # %bb.0: 369 ; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:0.25] 370 ; ZNVER1-NEXT: retq # sched: [1:0.50] 371 %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4) 372 ret <8 x float> %1 373 } 374 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly 375 376 define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) { 377 ; GENERIC-LABEL: test_gatherqpd: 378 ; GENERIC: # %bb.0: 379 ; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 380 ; GENERIC-NEXT: retq # sched: [1:1.00] 381 ; 382 ; HASWELL-LABEL: test_gatherqpd: 383 ; HASWELL: # %bb.0: 384 ; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33] 385 ; HASWELL-NEXT: retq # sched: [7:1.00] 386 ; 387 ; BROADWELL-LABEL: test_gatherqpd: 388 ; BROADWELL: # %bb.0: 389 ; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00] 390 ; BROADWELL-NEXT: retq # sched: [7:1.00] 391 ; 392 ; SKYLAKE-LABEL: test_gatherqpd: 393 ; SKYLAKE: # %bb.0: 394 ; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 395 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 396 ; 397 ; SKX-LABEL: test_gatherqpd: 398 ; SKX: # %bb.0: 399 ; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 400 ; SKX-NEXT: retq # sched: [7:1.00] 401 ; 402 ; ZNVER1-LABEL: test_gatherqpd: 403 ; ZNVER1: # %bb.0: 404 ; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 405 ; ZNVER1-NEXT: retq # sched: [1:0.50] 406 %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2) 407 ret <2 x double> %1 408 } 409 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly 410 411 define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) { 412 ; GENERIC-LABEL: test_gatherqpd_ymm: 413 ; GENERIC: # %bb.0: 414 ; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [5:0.50] 415 ; GENERIC-NEXT: retq # sched: [1:1.00] 416 ; 417 ; HASWELL-LABEL: test_gatherqpd_ymm: 418 ; HASWELL: # %bb.0: 419 ; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [24:5.00] 420 ; HASWELL-NEXT: retq # sched: [7:1.00] 421 ; 422 ; BROADWELL-LABEL: test_gatherqpd_ymm: 423 ; BROADWELL: # %bb.0: 424 ; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00] 425 ; BROADWELL-NEXT: retq # sched: [7:1.00] 426 ; 427 ; SKYLAKE-LABEL: test_gatherqpd_ymm: 428 ; SKYLAKE: # %bb.0: 429 ; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00] 430 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 431 ; 432 ; SKX-LABEL: test_gatherqpd_ymm: 433 ; SKX: # %bb.0: 434 ; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00] 435 ; SKX-NEXT: retq # sched: [7:1.00] 436 ; 437 ; ZNVER1-LABEL: test_gatherqpd_ymm: 438 ; ZNVER1: # %bb.0: 439 ; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:0.25] 440 ; ZNVER1-NEXT: retq # sched: [1:0.50] 441 %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8) 442 ret <4 x double> %1 443 } 444 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly 445 446 define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) { 447 ; GENERIC-LABEL: test_gatherqps: 448 ; GENERIC: # %bb.0: 449 ; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 450 ; GENERIC-NEXT: retq # sched: [1:1.00] 451 ; 452 ; HASWELL-LABEL: test_gatherqps: 453 ; HASWELL: # %bb.0: 454 ; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67] 455 ; HASWELL-NEXT: retq # sched: [7:1.00] 456 ; 457 ; BROADWELL-LABEL: test_gatherqps: 458 ; BROADWELL: # %bb.0: 459 ; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00] 460 ; BROADWELL-NEXT: retq # sched: [7:1.00] 461 ; 462 ; SKYLAKE-LABEL: test_gatherqps: 463 ; SKYLAKE: # %bb.0: 464 ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 465 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 466 ; 467 ; SKX-LABEL: test_gatherqps: 468 ; SKX: # %bb.0: 469 ; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 470 ; SKX-NEXT: retq # sched: [7:1.00] 471 ; 472 ; ZNVER1-LABEL: test_gatherqps: 473 ; ZNVER1: # %bb.0: 474 ; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 475 ; ZNVER1-NEXT: retq # sched: [1:0.50] 476 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2) 477 ret <4 x float> %1 478 } 479 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly 480 481 define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) { 482 ; GENERIC-LABEL: test_gatherqps_ymm: 483 ; GENERIC: # %bb.0: 484 ; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [5:0.50] 485 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 486 ; GENERIC-NEXT: retq # sched: [1:1.00] 487 ; 488 ; HASWELL-LABEL: test_gatherqps_ymm: 489 ; HASWELL: # %bb.0: 490 ; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [28:3.67] 491 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 492 ; HASWELL-NEXT: retq # sched: [7:1.00] 493 ; 494 ; BROADWELL-LABEL: test_gatherqps_ymm: 495 ; BROADWELL: # %bb.0: 496 ; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00] 497 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 498 ; BROADWELL-NEXT: retq # sched: [7:1.00] 499 ; 500 ; SKYLAKE-LABEL: test_gatherqps_ymm: 501 ; SKYLAKE: # %bb.0: 502 ; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] 503 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 504 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 505 ; 506 ; SKX-LABEL: test_gatherqps_ymm: 507 ; SKX: # %bb.0: 508 ; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00] 509 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 510 ; SKX-NEXT: retq # sched: [7:1.00] 511 ; 512 ; ZNVER1-LABEL: test_gatherqps_ymm: 513 ; ZNVER1: # %bb.0: 514 ; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:0.25] 515 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 516 ; ZNVER1-NEXT: retq # sched: [1:0.50] 517 %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4) 518 ret <4 x float> %1 519 } 520 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly 521 522 define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 523 ; GENERIC-LABEL: test_inserti128: 524 ; GENERIC: # %bb.0: 525 ; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] 526 ; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 527 ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 528 ; GENERIC-NEXT: retq # sched: [1:1.00] 529 ; 530 ; HASWELL-LABEL: test_inserti128: 531 ; HASWELL: # %bb.0: 532 ; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 533 ; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 534 ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 535 ; HASWELL-NEXT: retq # sched: [7:1.00] 536 ; 537 ; BROADWELL-LABEL: test_inserti128: 538 ; BROADWELL: # %bb.0: 539 ; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 540 ; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50] 541 ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 542 ; BROADWELL-NEXT: retq # sched: [7:1.00] 543 ; 544 ; SKYLAKE-LABEL: test_inserti128: 545 ; SKYLAKE: # %bb.0: 546 ; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 547 ; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 548 ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 549 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 550 ; 551 ; SKX-LABEL: test_inserti128: 552 ; SKX: # %bb.0: 553 ; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] 554 ; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 555 ; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 556 ; SKX-NEXT: retq # sched: [7:1.00] 557 ; 558 ; ZNVER1-LABEL: test_inserti128: 559 ; ZNVER1: # %bb.0: 560 ; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25] 561 ; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50] 562 ; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 563 ; ZNVER1-NEXT: retq # sched: [1:0.50] 564 %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 565 %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 566 %3 = load <4 x i32>, <4 x i32> *%a2, align 16 567 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 568 %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 569 %6 = add <8 x i32> %2, %5 570 ret <8 x i32> %6 571 } 572 573 define <4 x i64> @test_movntdqa(i8* %a0) { 574 ; GENERIC-LABEL: test_movntdqa: 575 ; GENERIC: # %bb.0: 576 ; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] 577 ; GENERIC-NEXT: retq # sched: [1:1.00] 578 ; 579 ; HASWELL-LABEL: test_movntdqa: 580 ; HASWELL: # %bb.0: 581 ; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] 582 ; HASWELL-NEXT: retq # sched: [7:1.00] 583 ; 584 ; BROADWELL-LABEL: test_movntdqa: 585 ; BROADWELL: # %bb.0: 586 ; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50] 587 ; BROADWELL-NEXT: retq # sched: [7:1.00] 588 ; 589 ; SKYLAKE-LABEL: test_movntdqa: 590 ; SKYLAKE: # %bb.0: 591 ; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] 592 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 593 ; 594 ; SKX-LABEL: test_movntdqa: 595 ; SKX: # %bb.0: 596 ; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50] 597 ; SKX-NEXT: retq # sched: [7:1.00] 598 ; 599 ; ZNVER1-LABEL: test_movntdqa: 600 ; ZNVER1: # %bb.0: 601 ; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50] 602 ; ZNVER1-NEXT: retq # sched: [1:0.50] 603 %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) 604 ret <4 x i64> %1 605 } 606 declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly 607 608 define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 609 ; GENERIC-LABEL: test_mpsadbw: 610 ; GENERIC: # %bb.0: 611 ; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:1.00] 612 ; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:1.00] 613 ; GENERIC-NEXT: retq # sched: [1:1.00] 614 ; 615 ; HASWELL-LABEL: test_mpsadbw: 616 ; HASWELL: # %bb.0: 617 ; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00] 618 ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] 619 ; HASWELL-NEXT: retq # sched: [7:1.00] 620 ; 621 ; BROADWELL-LABEL: test_mpsadbw: 622 ; BROADWELL: # %bb.0: 623 ; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00] 624 ; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00] 625 ; BROADWELL-NEXT: retq # sched: [7:1.00] 626 ; 627 ; SKYLAKE-LABEL: test_mpsadbw: 628 ; SKYLAKE: # %bb.0: 629 ; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] 630 ; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00] 631 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 632 ; 633 ; SKX-LABEL: test_mpsadbw: 634 ; SKX: # %bb.0: 635 ; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00] 636 ; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00] 637 ; SKX-NEXT: retq # sched: [7:1.00] 638 ; 639 ; ZNVER1-LABEL: test_mpsadbw: 640 ; ZNVER1: # %bb.0: 641 ; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 642 ; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 643 ; ZNVER1-NEXT: retq # sched: [1:0.50] 644 %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) 645 %2 = bitcast <16 x i16> %1 to <32 x i8> 646 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 647 %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7) 648 ret <16 x i16> %4 649 } 650 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone 651 652 define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { 653 ; GENERIC-LABEL: test_pabsb: 654 ; GENERIC: # %bb.0: 655 ; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 656 ; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 657 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 658 ; GENERIC-NEXT: retq # sched: [1:1.00] 659 ; 660 ; HASWELL-LABEL: test_pabsb: 661 ; HASWELL: # %bb.0: 662 ; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 663 ; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 664 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 665 ; HASWELL-NEXT: retq # sched: [7:1.00] 666 ; 667 ; BROADWELL-LABEL: test_pabsb: 668 ; BROADWELL: # %bb.0: 669 ; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 670 ; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50] 671 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 672 ; BROADWELL-NEXT: retq # sched: [7:1.00] 673 ; 674 ; SKYLAKE-LABEL: test_pabsb: 675 ; SKYLAKE: # %bb.0: 676 ; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 677 ; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 678 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 679 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 680 ; 681 ; SKX-LABEL: test_pabsb: 682 ; SKX: # %bb.0: 683 ; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] 684 ; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 685 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 686 ; SKX-NEXT: retq # sched: [7:1.00] 687 ; 688 ; ZNVER1-LABEL: test_pabsb: 689 ; ZNVER1: # %bb.0: 690 ; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50] 691 ; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25] 692 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 693 ; ZNVER1-NEXT: retq # sched: [1:0.50] 694 %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) 695 %2 = load <32 x i8>, <32 x i8> *%a1, align 32 696 %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2) 697 %4 = or <32 x i8> %1, %3 698 ret <32 x i8> %4 699 } 700 declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone 701 702 define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { 703 ; GENERIC-LABEL: test_pabsd: 704 ; GENERIC: # %bb.0: 705 ; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 706 ; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 707 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 708 ; GENERIC-NEXT: retq # sched: [1:1.00] 709 ; 710 ; HASWELL-LABEL: test_pabsd: 711 ; HASWELL: # %bb.0: 712 ; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 713 ; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 714 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 715 ; HASWELL-NEXT: retq # sched: [7:1.00] 716 ; 717 ; BROADWELL-LABEL: test_pabsd: 718 ; BROADWELL: # %bb.0: 719 ; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 720 ; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50] 721 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 722 ; BROADWELL-NEXT: retq # sched: [7:1.00] 723 ; 724 ; SKYLAKE-LABEL: test_pabsd: 725 ; SKYLAKE: # %bb.0: 726 ; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 727 ; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 728 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 729 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 730 ; 731 ; SKX-LABEL: test_pabsd: 732 ; SKX: # %bb.0: 733 ; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] 734 ; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 735 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 736 ; SKX-NEXT: retq # sched: [7:1.00] 737 ; 738 ; ZNVER1-LABEL: test_pabsd: 739 ; ZNVER1: # %bb.0: 740 ; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50] 741 ; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25] 742 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 743 ; ZNVER1-NEXT: retq # sched: [1:0.50] 744 %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) 745 %2 = load <8 x i32>, <8 x i32> *%a1, align 32 746 %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2) 747 %4 = or <8 x i32> %1, %3 748 ret <8 x i32> %4 749 } 750 declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone 751 752 define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { 753 ; GENERIC-LABEL: test_pabsw: 754 ; GENERIC: # %bb.0: 755 ; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 756 ; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 757 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 758 ; GENERIC-NEXT: retq # sched: [1:1.00] 759 ; 760 ; HASWELL-LABEL: test_pabsw: 761 ; HASWELL: # %bb.0: 762 ; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 763 ; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 764 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 765 ; HASWELL-NEXT: retq # sched: [7:1.00] 766 ; 767 ; BROADWELL-LABEL: test_pabsw: 768 ; BROADWELL: # %bb.0: 769 ; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 770 ; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50] 771 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 772 ; BROADWELL-NEXT: retq # sched: [7:1.00] 773 ; 774 ; SKYLAKE-LABEL: test_pabsw: 775 ; SKYLAKE: # %bb.0: 776 ; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 777 ; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 778 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 779 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 780 ; 781 ; SKX-LABEL: test_pabsw: 782 ; SKX: # %bb.0: 783 ; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] 784 ; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 785 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 786 ; SKX-NEXT: retq # sched: [7:1.00] 787 ; 788 ; ZNVER1-LABEL: test_pabsw: 789 ; ZNVER1: # %bb.0: 790 ; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50] 791 ; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25] 792 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 793 ; ZNVER1-NEXT: retq # sched: [1:0.50] 794 %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) 795 %2 = load <16 x i16>, <16 x i16> *%a1, align 32 796 %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2) 797 %4 = or <16 x i16> %1, %3 798 ret <16 x i16> %4 799 } 800 declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone 801 802 define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 803 ; GENERIC-LABEL: test_packssdw: 804 ; GENERIC: # %bb.0: 805 ; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 806 ; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 807 ; GENERIC-NEXT: retq # sched: [1:1.00] 808 ; 809 ; HASWELL-LABEL: test_packssdw: 810 ; HASWELL: # %bb.0: 811 ; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 812 ; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 813 ; HASWELL-NEXT: retq # sched: [7:1.00] 814 ; 815 ; BROADWELL-LABEL: test_packssdw: 816 ; BROADWELL: # %bb.0: 817 ; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 818 ; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 819 ; BROADWELL-NEXT: retq # sched: [7:1.00] 820 ; 821 ; SKYLAKE-LABEL: test_packssdw: 822 ; SKYLAKE: # %bb.0: 823 ; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 824 ; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 825 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 826 ; 827 ; SKX-LABEL: test_packssdw: 828 ; SKX: # %bb.0: 829 ; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 830 ; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 831 ; SKX-NEXT: retq # sched: [7:1.00] 832 ; 833 ; ZNVER1-LABEL: test_packssdw: 834 ; ZNVER1: # %bb.0: 835 ; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 836 ; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 837 ; ZNVER1-NEXT: retq # sched: [1:0.50] 838 %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) 839 %2 = bitcast <16 x i16> %1 to <8 x i32> 840 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 841 %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3) 842 ret <16 x i16> %4 843 } 844 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone 845 846 define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 847 ; GENERIC-LABEL: test_packsswb: 848 ; GENERIC: # %bb.0: 849 ; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 850 ; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 851 ; GENERIC-NEXT: retq # sched: [1:1.00] 852 ; 853 ; HASWELL-LABEL: test_packsswb: 854 ; HASWELL: # %bb.0: 855 ; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 856 ; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 857 ; HASWELL-NEXT: retq # sched: [7:1.00] 858 ; 859 ; BROADWELL-LABEL: test_packsswb: 860 ; BROADWELL: # %bb.0: 861 ; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 862 ; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 863 ; BROADWELL-NEXT: retq # sched: [7:1.00] 864 ; 865 ; SKYLAKE-LABEL: test_packsswb: 866 ; SKYLAKE: # %bb.0: 867 ; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 868 ; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 869 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 870 ; 871 ; SKX-LABEL: test_packsswb: 872 ; SKX: # %bb.0: 873 ; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 874 ; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 875 ; SKX-NEXT: retq # sched: [7:1.00] 876 ; 877 ; ZNVER1-LABEL: test_packsswb: 878 ; ZNVER1: # %bb.0: 879 ; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 880 ; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 881 ; ZNVER1-NEXT: retq # sched: [1:0.50] 882 %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) 883 %2 = bitcast <32 x i8> %1 to <16 x i16> 884 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 885 %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3) 886 ret <32 x i8> %4 887 } 888 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone 889 890 define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 891 ; GENERIC-LABEL: test_packusdw: 892 ; GENERIC: # %bb.0: 893 ; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 894 ; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 895 ; GENERIC-NEXT: retq # sched: [1:1.00] 896 ; 897 ; HASWELL-LABEL: test_packusdw: 898 ; HASWELL: # %bb.0: 899 ; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 900 ; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 901 ; HASWELL-NEXT: retq # sched: [7:1.00] 902 ; 903 ; BROADWELL-LABEL: test_packusdw: 904 ; BROADWELL: # %bb.0: 905 ; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 906 ; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 907 ; BROADWELL-NEXT: retq # sched: [7:1.00] 908 ; 909 ; SKYLAKE-LABEL: test_packusdw: 910 ; SKYLAKE: # %bb.0: 911 ; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 912 ; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 913 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 914 ; 915 ; SKX-LABEL: test_packusdw: 916 ; SKX: # %bb.0: 917 ; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 918 ; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 919 ; SKX-NEXT: retq # sched: [7:1.00] 920 ; 921 ; ZNVER1-LABEL: test_packusdw: 922 ; ZNVER1: # %bb.0: 923 ; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 924 ; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 925 ; ZNVER1-NEXT: retq # sched: [1:0.50] 926 %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) 927 %2 = bitcast <16 x i16> %1 to <8 x i32> 928 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 929 %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3) 930 ret <16 x i16> %4 931 } 932 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone 933 934 define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 935 ; GENERIC-LABEL: test_packuswb: 936 ; GENERIC: # %bb.0: 937 ; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 938 ; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 939 ; GENERIC-NEXT: retq # sched: [1:1.00] 940 ; 941 ; HASWELL-LABEL: test_packuswb: 942 ; HASWELL: # %bb.0: 943 ; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 944 ; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 945 ; HASWELL-NEXT: retq # sched: [7:1.00] 946 ; 947 ; BROADWELL-LABEL: test_packuswb: 948 ; BROADWELL: # %bb.0: 949 ; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 950 ; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 951 ; BROADWELL-NEXT: retq # sched: [7:1.00] 952 ; 953 ; SKYLAKE-LABEL: test_packuswb: 954 ; SKYLAKE: # %bb.0: 955 ; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 956 ; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 957 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 958 ; 959 ; SKX-LABEL: test_packuswb: 960 ; SKX: # %bb.0: 961 ; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 962 ; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 963 ; SKX-NEXT: retq # sched: [7:1.00] 964 ; 965 ; ZNVER1-LABEL: test_packuswb: 966 ; ZNVER1: # %bb.0: 967 ; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 968 ; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 969 ; ZNVER1-NEXT: retq # sched: [1:0.50] 970 %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) 971 %2 = bitcast <32 x i8> %1 to <16 x i16> 972 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 973 %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3) 974 ret <32 x i8> %4 975 } 976 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone 977 978 define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 979 ; GENERIC-LABEL: test_paddb: 980 ; GENERIC: # %bb.0: 981 ; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 982 ; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 983 ; GENERIC-NEXT: retq # sched: [1:1.00] 984 ; 985 ; HASWELL-LABEL: test_paddb: 986 ; HASWELL: # %bb.0: 987 ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 988 ; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 989 ; HASWELL-NEXT: retq # sched: [7:1.00] 990 ; 991 ; BROADWELL-LABEL: test_paddb: 992 ; BROADWELL: # %bb.0: 993 ; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 994 ; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 995 ; BROADWELL-NEXT: retq # sched: [7:1.00] 996 ; 997 ; SKYLAKE-LABEL: test_paddb: 998 ; SKYLAKE: # %bb.0: 999 ; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1000 ; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1001 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1002 ; 1003 ; SKX-LABEL: test_paddb: 1004 ; SKX: # %bb.0: 1005 ; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1006 ; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1007 ; SKX-NEXT: retq # sched: [7:1.00] 1008 ; 1009 ; ZNVER1-LABEL: test_paddb: 1010 ; ZNVER1: # %bb.0: 1011 ; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1012 ; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1013 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1014 %1 = add <32 x i8> %a0, %a1 1015 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 1016 %3 = add <32 x i8> %1, %2 1017 ret <32 x i8> %3 1018 } 1019 1020 define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 1021 ; GENERIC-LABEL: test_paddd: 1022 ; GENERIC: # %bb.0: 1023 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1024 ; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1025 ; GENERIC-NEXT: retq # sched: [1:1.00] 1026 ; 1027 ; HASWELL-LABEL: test_paddd: 1028 ; HASWELL: # %bb.0: 1029 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1030 ; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1031 ; HASWELL-NEXT: retq # sched: [7:1.00] 1032 ; 1033 ; BROADWELL-LABEL: test_paddd: 1034 ; BROADWELL: # %bb.0: 1035 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1036 ; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1037 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1038 ; 1039 ; SKYLAKE-LABEL: test_paddd: 1040 ; SKYLAKE: # %bb.0: 1041 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1042 ; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1043 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1044 ; 1045 ; SKX-LABEL: test_paddd: 1046 ; SKX: # %bb.0: 1047 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1048 ; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1049 ; SKX-NEXT: retq # sched: [7:1.00] 1050 ; 1051 ; ZNVER1-LABEL: test_paddd: 1052 ; ZNVER1: # %bb.0: 1053 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1054 ; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1055 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1056 %1 = add <8 x i32> %a0, %a1 1057 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 1058 %3 = add <8 x i32> %1, %2 1059 ret <8 x i32> %3 1060 } 1061 1062 define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 1063 ; GENERIC-LABEL: test_paddq: 1064 ; GENERIC: # %bb.0: 1065 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1066 ; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1067 ; GENERIC-NEXT: retq # sched: [1:1.00] 1068 ; 1069 ; HASWELL-LABEL: test_paddq: 1070 ; HASWELL: # %bb.0: 1071 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1072 ; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1073 ; HASWELL-NEXT: retq # sched: [7:1.00] 1074 ; 1075 ; BROADWELL-LABEL: test_paddq: 1076 ; BROADWELL: # %bb.0: 1077 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1078 ; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1079 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1080 ; 1081 ; SKYLAKE-LABEL: test_paddq: 1082 ; SKYLAKE: # %bb.0: 1083 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1084 ; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1085 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1086 ; 1087 ; SKX-LABEL: test_paddq: 1088 ; SKX: # %bb.0: 1089 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1090 ; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1091 ; SKX-NEXT: retq # sched: [7:1.00] 1092 ; 1093 ; ZNVER1-LABEL: test_paddq: 1094 ; ZNVER1: # %bb.0: 1095 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1096 ; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1097 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1098 %1 = add <4 x i64> %a0, %a1 1099 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 1100 %3 = add <4 x i64> %1, %2 1101 ret <4 x i64> %3 1102 } 1103 1104 define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 1105 ; GENERIC-LABEL: test_paddsb: 1106 ; GENERIC: # %bb.0: 1107 ; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1108 ; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1109 ; GENERIC-NEXT: retq # sched: [1:1.00] 1110 ; 1111 ; HASWELL-LABEL: test_paddsb: 1112 ; HASWELL: # %bb.0: 1113 ; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1114 ; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1115 ; HASWELL-NEXT: retq # sched: [7:1.00] 1116 ; 1117 ; BROADWELL-LABEL: test_paddsb: 1118 ; BROADWELL: # %bb.0: 1119 ; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1120 ; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1121 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1122 ; 1123 ; SKYLAKE-LABEL: test_paddsb: 1124 ; SKYLAKE: # %bb.0: 1125 ; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1126 ; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1127 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1128 ; 1129 ; SKX-LABEL: test_paddsb: 1130 ; SKX: # %bb.0: 1131 ; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1132 ; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1133 ; SKX-NEXT: retq # sched: [7:1.00] 1134 ; 1135 ; ZNVER1-LABEL: test_paddsb: 1136 ; ZNVER1: # %bb.0: 1137 ; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1138 ; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1139 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1140 %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) 1141 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 1142 %3 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %1, <32 x i8> %2) 1143 ret <32 x i8> %3 1144 } 1145 declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone 1146 1147 define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1148 ; GENERIC-LABEL: test_paddsw: 1149 ; GENERIC: # %bb.0: 1150 ; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1151 ; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1152 ; GENERIC-NEXT: retq # sched: [1:1.00] 1153 ; 1154 ; HASWELL-LABEL: test_paddsw: 1155 ; HASWELL: # %bb.0: 1156 ; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1157 ; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1158 ; HASWELL-NEXT: retq # sched: [7:1.00] 1159 ; 1160 ; BROADWELL-LABEL: test_paddsw: 1161 ; BROADWELL: # %bb.0: 1162 ; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1163 ; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1164 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1165 ; 1166 ; SKYLAKE-LABEL: test_paddsw: 1167 ; SKYLAKE: # %bb.0: 1168 ; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1169 ; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1170 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1171 ; 1172 ; SKX-LABEL: test_paddsw: 1173 ; SKX: # %bb.0: 1174 ; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1175 ; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1176 ; SKX-NEXT: retq # sched: [7:1.00] 1177 ; 1178 ; ZNVER1-LABEL: test_paddsw: 1179 ; ZNVER1: # %bb.0: 1180 ; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1181 ; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1182 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1183 %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) 1184 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 1185 %3 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %1, <16 x i16> %2) 1186 ret <16 x i16> %3 1187 } 1188 declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone 1189 1190 define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 1191 ; GENERIC-LABEL: test_paddusb: 1192 ; GENERIC: # %bb.0: 1193 ; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1194 ; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1195 ; GENERIC-NEXT: retq # sched: [1:1.00] 1196 ; 1197 ; HASWELL-LABEL: test_paddusb: 1198 ; HASWELL: # %bb.0: 1199 ; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1200 ; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1201 ; HASWELL-NEXT: retq # sched: [7:1.00] 1202 ; 1203 ; BROADWELL-LABEL: test_paddusb: 1204 ; BROADWELL: # %bb.0: 1205 ; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1206 ; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1207 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1208 ; 1209 ; SKYLAKE-LABEL: test_paddusb: 1210 ; SKYLAKE: # %bb.0: 1211 ; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1212 ; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1213 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1214 ; 1215 ; SKX-LABEL: test_paddusb: 1216 ; SKX: # %bb.0: 1217 ; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1218 ; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1219 ; SKX-NEXT: retq # sched: [7:1.00] 1220 ; 1221 ; ZNVER1-LABEL: test_paddusb: 1222 ; ZNVER1: # %bb.0: 1223 ; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1224 ; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1225 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1226 %1 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) 1227 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 1228 %3 = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %1, <32 x i8> %2) 1229 ret <32 x i8> %3 1230 } 1231 declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone 1232 1233 define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1234 ; GENERIC-LABEL: test_paddusw: 1235 ; GENERIC: # %bb.0: 1236 ; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1237 ; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1238 ; GENERIC-NEXT: retq # sched: [1:1.00] 1239 ; 1240 ; HASWELL-LABEL: test_paddusw: 1241 ; HASWELL: # %bb.0: 1242 ; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1243 ; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1244 ; HASWELL-NEXT: retq # sched: [7:1.00] 1245 ; 1246 ; BROADWELL-LABEL: test_paddusw: 1247 ; BROADWELL: # %bb.0: 1248 ; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1249 ; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1250 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1251 ; 1252 ; SKYLAKE-LABEL: test_paddusw: 1253 ; SKYLAKE: # %bb.0: 1254 ; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1255 ; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1256 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1257 ; 1258 ; SKX-LABEL: test_paddusw: 1259 ; SKX: # %bb.0: 1260 ; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1261 ; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1262 ; SKX-NEXT: retq # sched: [7:1.00] 1263 ; 1264 ; ZNVER1-LABEL: test_paddusw: 1265 ; ZNVER1: # %bb.0: 1266 ; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1267 ; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1268 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1269 %1 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) 1270 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 1271 %3 = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %1, <16 x i16> %2) 1272 ret <16 x i16> %3 1273 } 1274 declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone 1275 1276 define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1277 ; GENERIC-LABEL: test_paddw: 1278 ; GENERIC: # %bb.0: 1279 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1280 ; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1281 ; GENERIC-NEXT: retq # sched: [1:1.00] 1282 ; 1283 ; HASWELL-LABEL: test_paddw: 1284 ; HASWELL: # %bb.0: 1285 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1286 ; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1287 ; HASWELL-NEXT: retq # sched: [7:1.00] 1288 ; 1289 ; BROADWELL-LABEL: test_paddw: 1290 ; BROADWELL: # %bb.0: 1291 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1292 ; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1293 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1294 ; 1295 ; SKYLAKE-LABEL: test_paddw: 1296 ; SKYLAKE: # %bb.0: 1297 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1298 ; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1299 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1300 ; 1301 ; SKX-LABEL: test_paddw: 1302 ; SKX: # %bb.0: 1303 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1304 ; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1305 ; SKX-NEXT: retq # sched: [7:1.00] 1306 ; 1307 ; ZNVER1-LABEL: test_paddw: 1308 ; ZNVER1: # %bb.0: 1309 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1310 ; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1311 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1312 %1 = add <16 x i16> %a0, %a1 1313 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 1314 %3 = add <16 x i16> %1, %2 1315 ret <16 x i16> %3 1316 } 1317 1318 define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 1319 ; GENERIC-LABEL: test_palignr: 1320 ; GENERIC: # %bb.0: 1321 ; GENERIC-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1322 ; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1323 ; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 1324 ; GENERIC-NEXT: retq # sched: [1:1.00] 1325 ; 1326 ; HASWELL-LABEL: test_palignr: 1327 ; HASWELL: # %bb.0: 1328 ; HASWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1329 ; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1330 ; HASWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 1331 ; HASWELL-NEXT: retq # sched: [7:1.00] 1332 ; 1333 ; BROADWELL-LABEL: test_palignr: 1334 ; BROADWELL: # %bb.0: 1335 ; BROADWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1336 ; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1337 ; BROADWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 1338 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1339 ; 1340 ; SKYLAKE-LABEL: test_palignr: 1341 ; SKYLAKE: # %bb.0: 1342 ; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1343 ; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1344 ; SKYLAKE-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 1345 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1346 ; 1347 ; SKX-LABEL: test_palignr: 1348 ; SKX: # %bb.0: 1349 ; SKX-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00] 1350 ; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00] 1351 ; SKX-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 1352 ; SKX-NEXT: retq # sched: [7:1.00] 1353 ; 1354 ; ZNVER1-LABEL: test_palignr: 1355 ; ZNVER1: # %bb.0: 1356 ; ZNVER1-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25] 1357 ; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:0.25] 1358 ; ZNVER1-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 1359 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1360 %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48> 1361 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 1362 %3 = shufflevector <32 x i8> %a0, <32 x i8> %1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48> 1363 %4 = add <32 x i8> %1, %3 1364 ret <32 x i8> %4 1365 } 1366 1367 define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 1368 ; GENERIC-LABEL: test_pand: 1369 ; GENERIC: # %bb.0: 1370 ; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1371 ; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1372 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1373 ; GENERIC-NEXT: retq # sched: [1:1.00] 1374 ; 1375 ; HASWELL-LABEL: test_pand: 1376 ; HASWELL: # %bb.0: 1377 ; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1378 ; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1379 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1380 ; HASWELL-NEXT: retq # sched: [7:1.00] 1381 ; 1382 ; BROADWELL-LABEL: test_pand: 1383 ; BROADWELL: # %bb.0: 1384 ; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1385 ; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1386 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1387 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1388 ; 1389 ; SKYLAKE-LABEL: test_pand: 1390 ; SKYLAKE: # %bb.0: 1391 ; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1392 ; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1393 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1394 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1395 ; 1396 ; SKX-LABEL: test_pand: 1397 ; SKX: # %bb.0: 1398 ; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1399 ; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1400 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1401 ; SKX-NEXT: retq # sched: [7:1.00] 1402 ; 1403 ; ZNVER1-LABEL: test_pand: 1404 ; ZNVER1: # %bb.0: 1405 ; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1406 ; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1407 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1408 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1409 %1 = and <4 x i64> %a0, %a1 1410 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 1411 %3 = and <4 x i64> %1, %2 1412 %4 = add <4 x i64> %3, %a1 1413 ret <4 x i64> %4 1414 } 1415 1416 define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 1417 ; GENERIC-LABEL: test_pandn: 1418 ; GENERIC: # %bb.0: 1419 ; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1420 ; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1421 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1422 ; GENERIC-NEXT: retq # sched: [1:1.00] 1423 ; 1424 ; HASWELL-LABEL: test_pandn: 1425 ; HASWELL: # %bb.0: 1426 ; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1427 ; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1428 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1429 ; HASWELL-NEXT: retq # sched: [7:1.00] 1430 ; 1431 ; BROADWELL-LABEL: test_pandn: 1432 ; BROADWELL: # %bb.0: 1433 ; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1434 ; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50] 1435 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1436 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1437 ; 1438 ; SKYLAKE-LABEL: test_pandn: 1439 ; SKYLAKE: # %bb.0: 1440 ; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1441 ; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1442 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1443 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1444 ; 1445 ; SKX-LABEL: test_pandn: 1446 ; SKX: # %bb.0: 1447 ; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1448 ; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1449 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1450 ; SKX-NEXT: retq # sched: [7:1.00] 1451 ; 1452 ; ZNVER1-LABEL: test_pandn: 1453 ; ZNVER1: # %bb.0: 1454 ; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1455 ; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] 1456 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1457 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1458 %1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1> 1459 %2 = and <4 x i64> %a1, %1 1460 %3 = load <4 x i64>, <4 x i64> *%a2, align 32 1461 %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1> 1462 %5 = and <4 x i64> %3, %4 1463 %6 = add <4 x i64> %2, %5 1464 ret <4 x i64> %6 1465 } 1466 1467 define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 1468 ; GENERIC-LABEL: test_pavgb: 1469 ; GENERIC: # %bb.0: 1470 ; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1471 ; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1472 ; GENERIC-NEXT: retq # sched: [1:1.00] 1473 ; 1474 ; HASWELL-LABEL: test_pavgb: 1475 ; HASWELL: # %bb.0: 1476 ; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1477 ; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1478 ; HASWELL-NEXT: retq # sched: [7:1.00] 1479 ; 1480 ; BROADWELL-LABEL: test_pavgb: 1481 ; BROADWELL: # %bb.0: 1482 ; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1483 ; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1484 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1485 ; 1486 ; SKYLAKE-LABEL: test_pavgb: 1487 ; SKYLAKE: # %bb.0: 1488 ; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1489 ; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1490 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1491 ; 1492 ; SKX-LABEL: test_pavgb: 1493 ; SKX: # %bb.0: 1494 ; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1495 ; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1496 ; SKX-NEXT: retq # sched: [7:1.00] 1497 ; 1498 ; ZNVER1-LABEL: test_pavgb: 1499 ; ZNVER1: # %bb.0: 1500 ; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1501 ; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1502 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1503 %1 = zext <32 x i8> %a0 to <32 x i16> 1504 %2 = zext <32 x i8> %a1 to <32 x i16> 1505 %3 = add <32 x i16> %1, %2 1506 %4 = add <32 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1507 %5 = lshr <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1508 %6 = trunc <32 x i16> %5 to <32 x i8> 1509 %7 = load <32 x i8>, <32 x i8> *%a2, align 32 1510 %8 = zext <32 x i8> %6 to <32 x i16> 1511 %9 = zext <32 x i8> %7 to <32 x i16> 1512 %10 = add <32 x i16> %8, %9 1513 %11 = add <32 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1514 %12 = lshr <32 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1515 %13 = trunc <32 x i16> %12 to <32 x i8> 1516 ret <32 x i8> %13 1517 } 1518 1519 define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1520 ; GENERIC-LABEL: test_pavgw: 1521 ; GENERIC: # %bb.0: 1522 ; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1523 ; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1524 ; GENERIC-NEXT: retq # sched: [1:1.00] 1525 ; 1526 ; HASWELL-LABEL: test_pavgw: 1527 ; HASWELL: # %bb.0: 1528 ; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1529 ; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1530 ; HASWELL-NEXT: retq # sched: [7:1.00] 1531 ; 1532 ; BROADWELL-LABEL: test_pavgw: 1533 ; BROADWELL: # %bb.0: 1534 ; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1535 ; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 1536 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1537 ; 1538 ; SKYLAKE-LABEL: test_pavgw: 1539 ; SKYLAKE: # %bb.0: 1540 ; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1541 ; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1542 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1543 ; 1544 ; SKX-LABEL: test_pavgw: 1545 ; SKX: # %bb.0: 1546 ; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1547 ; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1548 ; SKX-NEXT: retq # sched: [7:1.00] 1549 ; 1550 ; ZNVER1-LABEL: test_pavgw: 1551 ; ZNVER1: # %bb.0: 1552 ; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1553 ; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 1554 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1555 %1 = zext <16 x i16> %a0 to <16 x i32> 1556 %2 = zext <16 x i16> %a1 to <16 x i32> 1557 %3 = add <16 x i32> %1, %2 1558 %4 = add <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1559 %5 = lshr <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1560 %6 = trunc <16 x i32> %5 to <16 x i16> 1561 %7 = load <16 x i16>, <16 x i16> *%a2, align 32 1562 %8 = zext <16 x i16> %6 to <16 x i32> 1563 %9 = zext <16 x i16> %7 to <16 x i32> 1564 %10 = add <16 x i32> %8, %9 1565 %11 = add <16 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1566 %12 = lshr <16 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1567 %13 = trunc <16 x i32> %12 to <16 x i16> 1568 ret <16 x i16> %13 1569 } 1570 1571 define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 1572 ; GENERIC-LABEL: test_pblendd: 1573 ; GENERIC: # %bb.0: 1574 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] 1575 ; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] 1576 ; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1577 ; GENERIC-NEXT: retq # sched: [1:1.00] 1578 ; 1579 ; HASWELL-LABEL: test_pblendd: 1580 ; HASWELL: # %bb.0: 1581 ; HASWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] 1582 ; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] 1583 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1584 ; HASWELL-NEXT: retq # sched: [7:1.00] 1585 ; 1586 ; BROADWELL-LABEL: test_pblendd: 1587 ; BROADWELL: # %bb.0: 1588 ; BROADWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] 1589 ; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50] 1590 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1591 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1592 ; 1593 ; SKYLAKE-LABEL: test_pblendd: 1594 ; SKYLAKE: # %bb.0: 1595 ; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] 1596 ; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] 1597 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1598 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1599 ; 1600 ; SKX-LABEL: test_pblendd: 1601 ; SKX: # %bb.0: 1602 ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33] 1603 ; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50] 1604 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1605 ; SKX-NEXT: retq # sched: [7:1.00] 1606 ; 1607 ; ZNVER1-LABEL: test_pblendd: 1608 ; ZNVER1: # %bb.0: 1609 ; ZNVER1-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50] 1610 ; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00] 1611 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1612 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1613 %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3> 1614 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 1615 %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 1616 %4 = add <4 x i32> %1, %3 1617 ret <4 x i32> %4 1618 } 1619 1620 define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 1621 ; GENERIC-LABEL: test_pblendd_ymm: 1622 ; GENERIC: # %bb.0: 1623 ; GENERIC-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] 1624 ; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] 1625 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1626 ; GENERIC-NEXT: retq # sched: [1:1.00] 1627 ; 1628 ; HASWELL-LABEL: test_pblendd_ymm: 1629 ; HASWELL: # %bb.0: 1630 ; HASWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] 1631 ; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] 1632 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1633 ; HASWELL-NEXT: retq # sched: [7:1.00] 1634 ; 1635 ; BROADWELL-LABEL: test_pblendd_ymm: 1636 ; BROADWELL: # %bb.0: 1637 ; BROADWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] 1638 ; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50] 1639 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1640 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1641 ; 1642 ; SKYLAKE-LABEL: test_pblendd_ymm: 1643 ; SKYLAKE: # %bb.0: 1644 ; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] 1645 ; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] 1646 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1647 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1648 ; 1649 ; SKX-LABEL: test_pblendd_ymm: 1650 ; SKX: # %bb.0: 1651 ; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33] 1652 ; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50] 1653 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1654 ; SKX-NEXT: retq # sched: [7:1.00] 1655 ; 1656 ; ZNVER1-LABEL: test_pblendd_ymm: 1657 ; ZNVER1: # %bb.0: 1658 ; ZNVER1-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50] 1659 ; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50] 1660 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1661 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1662 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15> 1663 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 1664 %3 = shufflevector <8 x i32> %a1, <8 x i32> %2, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7> 1665 %4 = add <8 x i32> %1, %3 1666 ret <8 x i32> %4 1667 } 1668 1669 define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) { 1670 ; GENERIC-LABEL: test_pblendvb: 1671 ; GENERIC: # %bb.0: 1672 ; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] 1673 ; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 1674 ; GENERIC-NEXT: retq # sched: [1:1.00] 1675 ; 1676 ; HASWELL-LABEL: test_pblendvb: 1677 ; HASWELL: # %bb.0: 1678 ; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 1679 ; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 1680 ; HASWELL-NEXT: retq # sched: [7:1.00] 1681 ; 1682 ; BROADWELL-LABEL: test_pblendvb: 1683 ; BROADWELL: # %bb.0: 1684 ; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] 1685 ; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00] 1686 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1687 ; 1688 ; SKYLAKE-LABEL: test_pblendvb: 1689 ; SKYLAKE: # %bb.0: 1690 ; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] 1691 ; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67] 1692 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1693 ; 1694 ; SKX-LABEL: test_pblendvb: 1695 ; SKX: # %bb.0: 1696 ; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67] 1697 ; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67] 1698 ; SKX-NEXT: retq # sched: [7:1.00] 1699 ; 1700 ; ZNVER1-LABEL: test_pblendvb: 1701 ; ZNVER1: # %bb.0: 1702 ; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 1703 ; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 1704 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1705 %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) 1706 %2 = load <32 x i8>, <32 x i8> *%a3, align 32 1707 %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4) 1708 ret <32 x i8> %3 1709 } 1710 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone 1711 1712 define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 1713 ; GENERIC-LABEL: test_pblendw: 1714 ; GENERIC: # %bb.0: 1715 ; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50] 1716 ; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:0.50] 1717 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1718 ; GENERIC-NEXT: retq # sched: [1:1.00] 1719 ; 1720 ; HASWELL-LABEL: test_pblendw: 1721 ; HASWELL: # %bb.0: 1722 ; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] 1723 ; HASWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00] 1724 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1725 ; HASWELL-NEXT: retq # sched: [7:1.00] 1726 ; 1727 ; BROADWELL-LABEL: test_pblendw: 1728 ; BROADWELL: # %bb.0: 1729 ; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] 1730 ; BROADWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [7:1.00] 1731 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1732 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1733 ; 1734 ; SKYLAKE-LABEL: test_pblendw: 1735 ; SKYLAKE: # %bb.0: 1736 ; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] 1737 ; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00] 1738 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1739 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1740 ; 1741 ; SKX-LABEL: test_pblendw: 1742 ; SKX: # %bb.0: 1743 ; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00] 1744 ; SKX-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00] 1745 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1746 ; SKX-NEXT: retq # sched: [7:1.00] 1747 ; 1748 ; ZNVER1-LABEL: test_pblendw: 1749 ; ZNVER1: # %bb.0: 1750 ; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33] 1751 ; ZNVER1-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [9:0.50] 1752 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1753 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1754 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 28, i32 13, i32 14, i32 15> 1755 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 1756 %3 = shufflevector <16 x i16> %a1, <16 x i16> %2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 1757 %4 = add <16 x i16> %1, %3 1758 ret <16 x i16> %4 1759 } 1760 1761 define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) { 1762 ; GENERIC-LABEL: test_pbroadcastb: 1763 ; GENERIC: # %bb.0: 1764 ; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.50] 1765 ; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:0.50] 1766 ; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1767 ; GENERIC-NEXT: retq # sched: [1:1.00] 1768 ; 1769 ; HASWELL-LABEL: test_pbroadcastb: 1770 ; HASWELL: # %bb.0: 1771 ; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] 1772 ; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00] 1773 ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1774 ; HASWELL-NEXT: retq # sched: [7:1.00] 1775 ; 1776 ; BROADWELL-LABEL: test_pbroadcastb: 1777 ; BROADWELL: # %bb.0: 1778 ; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00] 1779 ; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] 1780 ; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1781 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1782 ; 1783 ; SKYLAKE-LABEL: test_pbroadcastb: 1784 ; SKYLAKE: # %bb.0: 1785 ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] 1786 ; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00] 1787 ; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1788 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1789 ; 1790 ; SKX-LABEL: test_pbroadcastb: 1791 ; SKX: # %bb.0: 1792 ; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00] 1793 ; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00] 1794 ; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1795 ; SKX-NEXT: retq # sched: [7:1.00] 1796 ; 1797 ; ZNVER1-LABEL: test_pbroadcastb: 1798 ; ZNVER1: # %bb.0: 1799 ; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00] 1800 ; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25] 1801 ; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1802 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1803 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer 1804 %2 = load <16 x i8>, <16 x i8> *%a1, align 16 1805 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer 1806 %4 = add <16 x i8> %1, %3 1807 ret <16 x i8> %4 1808 } 1809 1810 define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) { 1811 ; GENERIC-LABEL: test_pbroadcastb_ymm: 1812 ; GENERIC: # %bb.0: 1813 ; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00] 1814 ; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [7:0.50] 1815 ; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1816 ; GENERIC-NEXT: retq # sched: [1:1.00] 1817 ; 1818 ; HASWELL-LABEL: test_pbroadcastb_ymm: 1819 ; HASWELL: # %bb.0: 1820 ; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] 1821 ; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00] 1822 ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1823 ; HASWELL-NEXT: retq # sched: [7:1.00] 1824 ; 1825 ; BROADWELL-LABEL: test_pbroadcastb_ymm: 1826 ; BROADWELL: # %bb.0: 1827 ; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00] 1828 ; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] 1829 ; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1830 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1831 ; 1832 ; SKYLAKE-LABEL: test_pbroadcastb_ymm: 1833 ; SKYLAKE: # %bb.0: 1834 ; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] 1835 ; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00] 1836 ; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1837 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1838 ; 1839 ; SKX-LABEL: test_pbroadcastb_ymm: 1840 ; SKX: # %bb.0: 1841 ; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00] 1842 ; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00] 1843 ; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1844 ; SKX-NEXT: retq # sched: [7:1.00] 1845 ; 1846 ; ZNVER1-LABEL: test_pbroadcastb_ymm: 1847 ; ZNVER1: # %bb.0: 1848 ; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00] 1849 ; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25] 1850 ; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1851 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1852 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer 1853 %2 = load <32 x i8>, <32 x i8> *%a1, align 32 1854 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer 1855 %4 = add <32 x i8> %1, %3 1856 ret <32 x i8> %4 1857 } 1858 1859 define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) { 1860 ; GENERIC-LABEL: test_pbroadcastd: 1861 ; GENERIC: # %bb.0: 1862 ; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.50] 1863 ; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [7:0.50] 1864 ; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1865 ; GENERIC-NEXT: retq # sched: [1:1.00] 1866 ; 1867 ; HASWELL-LABEL: test_pbroadcastd: 1868 ; HASWELL: # %bb.0: 1869 ; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] 1870 ; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] 1871 ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1872 ; HASWELL-NEXT: retq # sched: [7:1.00] 1873 ; 1874 ; BROADWELL-LABEL: test_pbroadcastd: 1875 ; BROADWELL: # %bb.0: 1876 ; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] 1877 ; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50] 1878 ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1879 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1880 ; 1881 ; SKYLAKE-LABEL: test_pbroadcastd: 1882 ; SKYLAKE: # %bb.0: 1883 ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] 1884 ; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] 1885 ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1886 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1887 ; 1888 ; SKX-LABEL: test_pbroadcastd: 1889 ; SKX: # %bb.0: 1890 ; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00] 1891 ; SKX-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50] 1892 ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1893 ; SKX-NEXT: retq # sched: [7:1.00] 1894 ; 1895 ; ZNVER1-LABEL: test_pbroadcastd: 1896 ; ZNVER1: # %bb.0: 1897 ; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50] 1898 ; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25] 1899 ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1900 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1901 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer 1902 %2 = load <4 x i32>, <4 x i32> *%a1, align 16 1903 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer 1904 %4 = add <4 x i32> %1, %3 1905 ret <4 x i32> %4 1906 } 1907 1908 define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) { 1909 ; GENERIC-LABEL: test_pbroadcastd_ymm: 1910 ; GENERIC: # %bb.0: 1911 ; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00] 1912 ; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] 1913 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1914 ; GENERIC-NEXT: retq # sched: [1:1.00] 1915 ; 1916 ; HASWELL-LABEL: test_pbroadcastd_ymm: 1917 ; HASWELL: # %bb.0: 1918 ; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] 1919 ; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] 1920 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1921 ; HASWELL-NEXT: retq # sched: [7:1.00] 1922 ; 1923 ; BROADWELL-LABEL: test_pbroadcastd_ymm: 1924 ; BROADWELL: # %bb.0: 1925 ; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] 1926 ; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50] 1927 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 1928 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1929 ; 1930 ; SKYLAKE-LABEL: test_pbroadcastd_ymm: 1931 ; SKYLAKE: # %bb.0: 1932 ; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] 1933 ; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] 1934 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1935 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1936 ; 1937 ; SKX-LABEL: test_pbroadcastd_ymm: 1938 ; SKX: # %bb.0: 1939 ; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00] 1940 ; SKX-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50] 1941 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 1942 ; SKX-NEXT: retq # sched: [7:1.00] 1943 ; 1944 ; ZNVER1-LABEL: test_pbroadcastd_ymm: 1945 ; ZNVER1: # %bb.0: 1946 ; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50] 1947 ; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25] 1948 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 1949 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1950 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer 1951 %2 = load <8 x i32>, <8 x i32> *%a1, align 32 1952 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer 1953 %4 = add <8 x i32> %1, %3 1954 ret <8 x i32> %4 1955 } 1956 1957 define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) { 1958 ; GENERIC-LABEL: test_pbroadcastq: 1959 ; GENERIC: # %bb.0: 1960 ; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.50] 1961 ; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [7:0.50] 1962 ; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1963 ; GENERIC-NEXT: retq # sched: [1:1.00] 1964 ; 1965 ; HASWELL-LABEL: test_pbroadcastq: 1966 ; HASWELL: # %bb.0: 1967 ; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] 1968 ; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] 1969 ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1970 ; HASWELL-NEXT: retq # sched: [7:1.00] 1971 ; 1972 ; BROADWELL-LABEL: test_pbroadcastq: 1973 ; BROADWELL: # %bb.0: 1974 ; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] 1975 ; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50] 1976 ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 1977 ; BROADWELL-NEXT: retq # sched: [7:1.00] 1978 ; 1979 ; SKYLAKE-LABEL: test_pbroadcastq: 1980 ; SKYLAKE: # %bb.0: 1981 ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] 1982 ; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] 1983 ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1984 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 1985 ; 1986 ; SKX-LABEL: test_pbroadcastq: 1987 ; SKX: # %bb.0: 1988 ; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00] 1989 ; SKX-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50] 1990 ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 1991 ; SKX-NEXT: retq # sched: [7:1.00] 1992 ; 1993 ; ZNVER1-LABEL: test_pbroadcastq: 1994 ; ZNVER1: # %bb.0: 1995 ; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50] 1996 ; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25] 1997 ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 1998 ; ZNVER1-NEXT: retq # sched: [1:0.50] 1999 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer 2000 %2 = load <2 x i64>, <2 x i64> *%a1, align 16 2001 %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer 2002 %4 = add <2 x i64> %1, %3 2003 ret <2 x i64> %4 2004 } 2005 2006 define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) { 2007 ; GENERIC-LABEL: test_pbroadcastq_ymm: 2008 ; GENERIC: # %bb.0: 2009 ; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00] 2010 ; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] 2011 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2012 ; GENERIC-NEXT: retq # sched: [1:1.00] 2013 ; 2014 ; HASWELL-LABEL: test_pbroadcastq_ymm: 2015 ; HASWELL: # %bb.0: 2016 ; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] 2017 ; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] 2018 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2019 ; HASWELL-NEXT: retq # sched: [7:1.00] 2020 ; 2021 ; BROADWELL-LABEL: test_pbroadcastq_ymm: 2022 ; BROADWELL: # %bb.0: 2023 ; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] 2024 ; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50] 2025 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2026 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2027 ; 2028 ; SKYLAKE-LABEL: test_pbroadcastq_ymm: 2029 ; SKYLAKE: # %bb.0: 2030 ; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] 2031 ; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] 2032 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2033 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2034 ; 2035 ; SKX-LABEL: test_pbroadcastq_ymm: 2036 ; SKX: # %bb.0: 2037 ; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00] 2038 ; SKX-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50] 2039 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2040 ; SKX-NEXT: retq # sched: [7:1.00] 2041 ; 2042 ; ZNVER1-LABEL: test_pbroadcastq_ymm: 2043 ; ZNVER1: # %bb.0: 2044 ; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50] 2045 ; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25] 2046 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2047 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2048 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer 2049 %2 = load <4 x i64>, <4 x i64> *%a1, align 32 2050 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer 2051 %4 = add <4 x i64> %1, %3 2052 ret <4 x i64> %4 2053 } 2054 2055 define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) { 2056 ; GENERIC-LABEL: test_pbroadcastw: 2057 ; GENERIC: # %bb.0: 2058 ; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.50] 2059 ; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:0.50] 2060 ; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2061 ; GENERIC-NEXT: retq # sched: [1:1.00] 2062 ; 2063 ; HASWELL-LABEL: test_pbroadcastw: 2064 ; HASWELL: # %bb.0: 2065 ; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] 2066 ; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00] 2067 ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2068 ; HASWELL-NEXT: retq # sched: [7:1.00] 2069 ; 2070 ; BROADWELL-LABEL: test_pbroadcastw: 2071 ; BROADWELL: # %bb.0: 2072 ; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00] 2073 ; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] 2074 ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 2075 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2076 ; 2077 ; SKYLAKE-LABEL: test_pbroadcastw: 2078 ; SKYLAKE: # %bb.0: 2079 ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] 2080 ; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00] 2081 ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2082 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2083 ; 2084 ; SKX-LABEL: test_pbroadcastw: 2085 ; SKX: # %bb.0: 2086 ; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00] 2087 ; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00] 2088 ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] 2089 ; SKX-NEXT: retq # sched: [7:1.00] 2090 ; 2091 ; ZNVER1-LABEL: test_pbroadcastw: 2092 ; ZNVER1: # %bb.0: 2093 ; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00] 2094 ; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25] 2095 ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] 2096 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2097 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer 2098 %2 = load <8 x i16>, <8 x i16> *%a1, align 16 2099 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer 2100 %4 = add <8 x i16> %1, %3 2101 ret <8 x i16> %4 2102 } 2103 2104 define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) { 2105 ; GENERIC-LABEL: test_pbroadcastw_ymm: 2106 ; GENERIC: # %bb.0: 2107 ; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00] 2108 ; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [7:0.50] 2109 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2110 ; GENERIC-NEXT: retq # sched: [1:1.00] 2111 ; 2112 ; HASWELL-LABEL: test_pbroadcastw_ymm: 2113 ; HASWELL: # %bb.0: 2114 ; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] 2115 ; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00] 2116 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2117 ; HASWELL-NEXT: retq # sched: [7:1.00] 2118 ; 2119 ; BROADWELL-LABEL: test_pbroadcastw_ymm: 2120 ; BROADWELL: # %bb.0: 2121 ; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00] 2122 ; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] 2123 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2124 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2125 ; 2126 ; SKYLAKE-LABEL: test_pbroadcastw_ymm: 2127 ; SKYLAKE: # %bb.0: 2128 ; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] 2129 ; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00] 2130 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2131 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2132 ; 2133 ; SKX-LABEL: test_pbroadcastw_ymm: 2134 ; SKX: # %bb.0: 2135 ; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00] 2136 ; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00] 2137 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2138 ; SKX-NEXT: retq # sched: [7:1.00] 2139 ; 2140 ; ZNVER1-LABEL: test_pbroadcastw_ymm: 2141 ; ZNVER1: # %bb.0: 2142 ; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00] 2143 ; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25] 2144 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2145 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2146 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer 2147 %2 = load <16 x i16>, <16 x i16> *%a1, align 32 2148 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer 2149 %4 = add <16 x i16> %1, %3 2150 ret <16 x i16> %4 2151 } 2152 2153 define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 2154 ; GENERIC-LABEL: test_pcmpeqb: 2155 ; GENERIC: # %bb.0: 2156 ; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2157 ; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2158 ; GENERIC-NEXT: retq # sched: [1:1.00] 2159 ; 2160 ; HASWELL-LABEL: test_pcmpeqb: 2161 ; HASWELL: # %bb.0: 2162 ; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2163 ; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2164 ; HASWELL-NEXT: retq # sched: [7:1.00] 2165 ; 2166 ; BROADWELL-LABEL: test_pcmpeqb: 2167 ; BROADWELL: # %bb.0: 2168 ; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2169 ; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2170 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2171 ; 2172 ; SKYLAKE-LABEL: test_pcmpeqb: 2173 ; SKYLAKE: # %bb.0: 2174 ; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2175 ; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2176 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2177 ; 2178 ; SKX-LABEL: test_pcmpeqb: 2179 ; SKX: # %bb.0: 2180 ; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2181 ; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2182 ; SKX-NEXT: retq # sched: [7:1.00] 2183 ; 2184 ; ZNVER1-LABEL: test_pcmpeqb: 2185 ; ZNVER1: # %bb.0: 2186 ; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2187 ; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2188 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2189 %1 = icmp eq <32 x i8> %a0, %a1 2190 %2 = sext <32 x i1> %1 to <32 x i8> 2191 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 2192 %4 = icmp eq <32 x i8> %2, %3 2193 %5 = sext <32 x i1> %4 to <32 x i8> 2194 ret <32 x i8> %5 2195 } 2196 2197 define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 2198 ; GENERIC-LABEL: test_pcmpeqd: 2199 ; GENERIC: # %bb.0: 2200 ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2201 ; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2202 ; GENERIC-NEXT: retq # sched: [1:1.00] 2203 ; 2204 ; HASWELL-LABEL: test_pcmpeqd: 2205 ; HASWELL: # %bb.0: 2206 ; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2207 ; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2208 ; HASWELL-NEXT: retq # sched: [7:1.00] 2209 ; 2210 ; BROADWELL-LABEL: test_pcmpeqd: 2211 ; BROADWELL: # %bb.0: 2212 ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2213 ; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2214 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2215 ; 2216 ; SKYLAKE-LABEL: test_pcmpeqd: 2217 ; SKYLAKE: # %bb.0: 2218 ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2219 ; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2220 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2221 ; 2222 ; SKX-LABEL: test_pcmpeqd: 2223 ; SKX: # %bb.0: 2224 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2225 ; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2226 ; SKX-NEXT: retq # sched: [7:1.00] 2227 ; 2228 ; ZNVER1-LABEL: test_pcmpeqd: 2229 ; ZNVER1: # %bb.0: 2230 ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2231 ; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2232 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2233 %1 = icmp eq <8 x i32> %a0, %a1 2234 %2 = sext <8 x i1> %1 to <8 x i32> 2235 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 2236 %4 = icmp eq <8 x i32> %2, %3 2237 %5 = sext <8 x i1> %4 to <8 x i32> 2238 ret <8 x i32> %5 2239 } 2240 2241 define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 2242 ; GENERIC-LABEL: test_pcmpeqq: 2243 ; GENERIC: # %bb.0: 2244 ; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2245 ; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2246 ; GENERIC-NEXT: retq # sched: [1:1.00] 2247 ; 2248 ; HASWELL-LABEL: test_pcmpeqq: 2249 ; HASWELL: # %bb.0: 2250 ; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2251 ; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2252 ; HASWELL-NEXT: retq # sched: [7:1.00] 2253 ; 2254 ; BROADWELL-LABEL: test_pcmpeqq: 2255 ; BROADWELL: # %bb.0: 2256 ; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2257 ; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2258 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2259 ; 2260 ; SKYLAKE-LABEL: test_pcmpeqq: 2261 ; SKYLAKE: # %bb.0: 2262 ; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2263 ; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2264 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2265 ; 2266 ; SKX-LABEL: test_pcmpeqq: 2267 ; SKX: # %bb.0: 2268 ; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2269 ; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2270 ; SKX-NEXT: retq # sched: [7:1.00] 2271 ; 2272 ; ZNVER1-LABEL: test_pcmpeqq: 2273 ; ZNVER1: # %bb.0: 2274 ; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2275 ; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2276 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2277 %1 = icmp eq <4 x i64> %a0, %a1 2278 %2 = sext <4 x i1> %1 to <4 x i64> 2279 %3 = load <4 x i64>, <4 x i64> *%a2, align 32 2280 %4 = icmp eq <4 x i64> %2, %3 2281 %5 = sext <4 x i1> %4 to <4 x i64> 2282 ret <4 x i64> %5 2283 } 2284 2285 define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 2286 ; GENERIC-LABEL: test_pcmpeqw: 2287 ; GENERIC: # %bb.0: 2288 ; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2289 ; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2290 ; GENERIC-NEXT: retq # sched: [1:1.00] 2291 ; 2292 ; HASWELL-LABEL: test_pcmpeqw: 2293 ; HASWELL: # %bb.0: 2294 ; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2295 ; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2296 ; HASWELL-NEXT: retq # sched: [7:1.00] 2297 ; 2298 ; BROADWELL-LABEL: test_pcmpeqw: 2299 ; BROADWELL: # %bb.0: 2300 ; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2301 ; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2302 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2303 ; 2304 ; SKYLAKE-LABEL: test_pcmpeqw: 2305 ; SKYLAKE: # %bb.0: 2306 ; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2307 ; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2308 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2309 ; 2310 ; SKX-LABEL: test_pcmpeqw: 2311 ; SKX: # %bb.0: 2312 ; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2313 ; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2314 ; SKX-NEXT: retq # sched: [7:1.00] 2315 ; 2316 ; ZNVER1-LABEL: test_pcmpeqw: 2317 ; ZNVER1: # %bb.0: 2318 ; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2319 ; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2320 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2321 %1 = icmp eq <16 x i16> %a0, %a1 2322 %2 = sext <16 x i1> %1 to <16 x i16> 2323 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 2324 %4 = icmp eq <16 x i16> %2, %3 2325 %5 = sext <16 x i1> %4 to <16 x i16> 2326 ret <16 x i16> %5 2327 } 2328 2329 define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 2330 ; GENERIC-LABEL: test_pcmpgtb: 2331 ; GENERIC: # %bb.0: 2332 ; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2333 ; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2334 ; GENERIC-NEXT: retq # sched: [1:1.00] 2335 ; 2336 ; HASWELL-LABEL: test_pcmpgtb: 2337 ; HASWELL: # %bb.0: 2338 ; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2339 ; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2340 ; HASWELL-NEXT: retq # sched: [7:1.00] 2341 ; 2342 ; BROADWELL-LABEL: test_pcmpgtb: 2343 ; BROADWELL: # %bb.0: 2344 ; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2345 ; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2346 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2347 ; 2348 ; SKYLAKE-LABEL: test_pcmpgtb: 2349 ; SKYLAKE: # %bb.0: 2350 ; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2351 ; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2352 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2353 ; 2354 ; SKX-LABEL: test_pcmpgtb: 2355 ; SKX: # %bb.0: 2356 ; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2357 ; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2358 ; SKX-NEXT: retq # sched: [7:1.00] 2359 ; 2360 ; ZNVER1-LABEL: test_pcmpgtb: 2361 ; ZNVER1: # %bb.0: 2362 ; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2363 ; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2364 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2365 %1 = icmp sgt <32 x i8> %a0, %a1 2366 %2 = sext <32 x i1> %1 to <32 x i8> 2367 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 2368 %4 = icmp sgt <32 x i8> %2, %3 2369 %5 = sext <32 x i1> %4 to <32 x i8> 2370 ret <32 x i8> %5 2371 } 2372 2373 define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 2374 ; GENERIC-LABEL: test_pcmpgtd: 2375 ; GENERIC: # %bb.0: 2376 ; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2377 ; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2378 ; GENERIC-NEXT: retq # sched: [1:1.00] 2379 ; 2380 ; HASWELL-LABEL: test_pcmpgtd: 2381 ; HASWELL: # %bb.0: 2382 ; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2383 ; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2384 ; HASWELL-NEXT: retq # sched: [7:1.00] 2385 ; 2386 ; BROADWELL-LABEL: test_pcmpgtd: 2387 ; BROADWELL: # %bb.0: 2388 ; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2389 ; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2390 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2391 ; 2392 ; SKYLAKE-LABEL: test_pcmpgtd: 2393 ; SKYLAKE: # %bb.0: 2394 ; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2395 ; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2396 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2397 ; 2398 ; SKX-LABEL: test_pcmpgtd: 2399 ; SKX: # %bb.0: 2400 ; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2401 ; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2402 ; SKX-NEXT: retq # sched: [7:1.00] 2403 ; 2404 ; ZNVER1-LABEL: test_pcmpgtd: 2405 ; ZNVER1: # %bb.0: 2406 ; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2407 ; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2408 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2409 %1 = icmp sgt <8 x i32> %a0, %a1 2410 %2 = sext <8 x i1> %1 to <8 x i32> 2411 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 2412 %4 = icmp sgt <8 x i32> %2, %3 2413 %5 = sext <8 x i1> %4 to <8 x i32> 2414 ret <8 x i32> %5 2415 } 2416 2417 define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 2418 ; GENERIC-LABEL: test_pcmpgtq: 2419 ; GENERIC: # %bb.0: 2420 ; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2421 ; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2422 ; GENERIC-NEXT: retq # sched: [1:1.00] 2423 ; 2424 ; HASWELL-LABEL: test_pcmpgtq: 2425 ; HASWELL: # %bb.0: 2426 ; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 2427 ; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 2428 ; HASWELL-NEXT: retq # sched: [7:1.00] 2429 ; 2430 ; BROADWELL-LABEL: test_pcmpgtq: 2431 ; BROADWELL: # %bb.0: 2432 ; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 2433 ; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 2434 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2435 ; 2436 ; SKYLAKE-LABEL: test_pcmpgtq: 2437 ; SKYLAKE: # %bb.0: 2438 ; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2439 ; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2440 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2441 ; 2442 ; SKX-LABEL: test_pcmpgtq: 2443 ; SKX: # %bb.0: 2444 ; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2445 ; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2446 ; SKX-NEXT: retq # sched: [7:1.00] 2447 ; 2448 ; ZNVER1-LABEL: test_pcmpgtq: 2449 ; ZNVER1: # %bb.0: 2450 ; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2451 ; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2452 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2453 %1 = icmp sgt <4 x i64> %a0, %a1 2454 %2 = sext <4 x i1> %1 to <4 x i64> 2455 %3 = load <4 x i64>, <4 x i64> *%a2, align 32 2456 %4 = icmp sgt <4 x i64> %2, %3 2457 %5 = sext <4 x i1> %4 to <4 x i64> 2458 ret <4 x i64> %5 2459 } 2460 2461 define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 2462 ; GENERIC-LABEL: test_pcmpgtw: 2463 ; GENERIC: # %bb.0: 2464 ; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2465 ; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2466 ; GENERIC-NEXT: retq # sched: [1:1.00] 2467 ; 2468 ; HASWELL-LABEL: test_pcmpgtw: 2469 ; HASWELL: # %bb.0: 2470 ; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2471 ; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2472 ; HASWELL-NEXT: retq # sched: [7:1.00] 2473 ; 2474 ; BROADWELL-LABEL: test_pcmpgtw: 2475 ; BROADWELL: # %bb.0: 2476 ; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2477 ; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 2478 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2479 ; 2480 ; SKYLAKE-LABEL: test_pcmpgtw: 2481 ; SKYLAKE: # %bb.0: 2482 ; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2483 ; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2484 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2485 ; 2486 ; SKX-LABEL: test_pcmpgtw: 2487 ; SKX: # %bb.0: 2488 ; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2489 ; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2490 ; SKX-NEXT: retq # sched: [7:1.00] 2491 ; 2492 ; ZNVER1-LABEL: test_pcmpgtw: 2493 ; ZNVER1: # %bb.0: 2494 ; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2495 ; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 2496 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2497 %1 = icmp sgt <16 x i16> %a0, %a1 2498 %2 = sext <16 x i1> %1 to <16 x i16> 2499 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 2500 %4 = icmp sgt <16 x i16> %2, %3 2501 %5 = sext <16 x i1> %4 to <16 x i16> 2502 ret <16 x i16> %5 2503 } 2504 2505 define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 2506 ; GENERIC-LABEL: test_perm2i128: 2507 ; GENERIC: # %bb.0: 2508 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00] 2509 ; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00] 2510 ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2511 ; GENERIC-NEXT: retq # sched: [1:1.00] 2512 ; 2513 ; HASWELL-LABEL: test_perm2i128: 2514 ; HASWELL: # %bb.0: 2515 ; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 2516 ; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 2517 ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2518 ; HASWELL-NEXT: retq # sched: [7:1.00] 2519 ; 2520 ; BROADWELL-LABEL: test_perm2i128: 2521 ; BROADWELL: # %bb.0: 2522 ; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 2523 ; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00] 2524 ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2525 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2526 ; 2527 ; SKYLAKE-LABEL: test_perm2i128: 2528 ; SKYLAKE: # %bb.0: 2529 ; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 2530 ; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 2531 ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 2532 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2533 ; 2534 ; SKX-LABEL: test_perm2i128: 2535 ; SKX: # %bb.0: 2536 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] 2537 ; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] 2538 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 2539 ; SKX-NEXT: retq # sched: [7:1.00] 2540 ; 2541 ; ZNVER1-LABEL: test_perm2i128: 2542 ; ZNVER1: # %bb.0: 2543 ; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25] 2544 ; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50] 2545 ; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 2546 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2547 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 2548 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 2549 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 2550 %4 = add <4 x i64> %1, %3 2551 ret <4 x i64> %4 2552 } 2553 2554 define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 2555 ; GENERIC-LABEL: test_permd: 2556 ; GENERIC: # %bb.0: 2557 ; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00] 2558 ; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2559 ; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2560 ; GENERIC-NEXT: retq # sched: [1:1.00] 2561 ; 2562 ; HASWELL-LABEL: test_permd: 2563 ; HASWELL: # %bb.0: 2564 ; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2565 ; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2566 ; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2567 ; HASWELL-NEXT: retq # sched: [7:1.00] 2568 ; 2569 ; BROADWELL-LABEL: test_permd: 2570 ; BROADWELL: # %bb.0: 2571 ; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2572 ; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 2573 ; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 2574 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2575 ; 2576 ; SKYLAKE-LABEL: test_permd: 2577 ; SKYLAKE: # %bb.0: 2578 ; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2579 ; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2580 ; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 2581 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2582 ; 2583 ; SKX-LABEL: test_permd: 2584 ; SKX: # %bb.0: 2585 ; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2586 ; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2587 ; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 2588 ; SKX-NEXT: retq # sched: [7:1.00] 2589 ; 2590 ; ZNVER1-LABEL: test_permd: 2591 ; ZNVER1: # %bb.0: 2592 ; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25] 2593 ; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50] 2594 ; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 2595 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2596 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0) 2597 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 2598 %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0) 2599 %4 = add <8 x i32> %1, %3 2600 ret <8 x i32> %4 2601 } 2602 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly 2603 2604 define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) { 2605 ; GENERIC-LABEL: test_permpd: 2606 ; GENERIC: # %bb.0: 2607 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] 2608 ; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00] 2609 ; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2610 ; GENERIC-NEXT: retq # sched: [1:1.00] 2611 ; 2612 ; HASWELL-LABEL: test_permpd: 2613 ; HASWELL: # %bb.0: 2614 ; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2615 ; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2616 ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2617 ; HASWELL-NEXT: retq # sched: [7:1.00] 2618 ; 2619 ; BROADWELL-LABEL: test_permpd: 2620 ; BROADWELL: # %bb.0: 2621 ; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2622 ; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00] 2623 ; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2624 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2625 ; 2626 ; SKYLAKE-LABEL: test_permpd: 2627 ; SKYLAKE: # %bb.0: 2628 ; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2629 ; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2630 ; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2631 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2632 ; 2633 ; SKX-LABEL: test_permpd: 2634 ; SKX: # %bb.0: 2635 ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2636 ; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2637 ; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 2638 ; SKX-NEXT: retq # sched: [7:1.00] 2639 ; 2640 ; ZNVER1-LABEL: test_permpd: 2641 ; ZNVER1: # %bb.0: 2642 ; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50] 2643 ; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25] 2644 ; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 2645 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2646 %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3> 2647 %2 = load <4 x double>, <4 x double> *%a1, align 32 2648 %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3> 2649 %4 = fadd <4 x double> %1, %3 2650 ret <4 x double> %4 2651 } 2652 2653 define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) { 2654 ; GENERIC-LABEL: test_permps: 2655 ; GENERIC: # %bb.0: 2656 ; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00] 2657 ; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 2658 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2659 ; GENERIC-NEXT: retq # sched: [1:1.00] 2660 ; 2661 ; HASWELL-LABEL: test_permps: 2662 ; HASWELL: # %bb.0: 2663 ; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2664 ; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2665 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2666 ; HASWELL-NEXT: retq # sched: [7:1.00] 2667 ; 2668 ; BROADWELL-LABEL: test_permps: 2669 ; BROADWELL: # %bb.0: 2670 ; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2671 ; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 2672 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2673 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2674 ; 2675 ; SKYLAKE-LABEL: test_permps: 2676 ; SKYLAKE: # %bb.0: 2677 ; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2678 ; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2679 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 2680 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2681 ; 2682 ; SKX-LABEL: test_permps: 2683 ; SKX: # %bb.0: 2684 ; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] 2685 ; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 2686 ; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 2687 ; SKX-NEXT: retq # sched: [7:1.00] 2688 ; 2689 ; ZNVER1-LABEL: test_permps: 2690 ; ZNVER1: # %bb.0: 2691 ; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25] 2692 ; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50] 2693 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 2694 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2695 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0) 2696 %2 = load <8 x float>, <8 x float> *%a2, align 32 2697 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0) 2698 %4 = fadd <8 x float> %1, %3 2699 ret <8 x float> %4 2700 } 2701 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly 2702 2703 define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) { 2704 ; GENERIC-LABEL: test_permq: 2705 ; GENERIC: # %bb.0: 2706 ; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00] 2707 ; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00] 2708 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2709 ; GENERIC-NEXT: retq # sched: [1:1.00] 2710 ; 2711 ; HASWELL-LABEL: test_permq: 2712 ; HASWELL: # %bb.0: 2713 ; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2714 ; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2715 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2716 ; HASWELL-NEXT: retq # sched: [7:1.00] 2717 ; 2718 ; BROADWELL-LABEL: test_permq: 2719 ; BROADWELL: # %bb.0: 2720 ; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2721 ; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00] 2722 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 2723 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2724 ; 2725 ; SKYLAKE-LABEL: test_permq: 2726 ; SKYLAKE: # %bb.0: 2727 ; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2728 ; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2729 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2730 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2731 ; 2732 ; SKX-LABEL: test_permq: 2733 ; SKX: # %bb.0: 2734 ; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] 2735 ; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] 2736 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 2737 ; SKX-NEXT: retq # sched: [7:1.00] 2738 ; 2739 ; ZNVER1-LABEL: test_permq: 2740 ; ZNVER1: # %bb.0: 2741 ; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50] 2742 ; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25] 2743 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 2744 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2745 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3> 2746 %2 = load <4 x i64>, <4 x i64> *%a1, align 32 2747 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3> 2748 %4 = add <4 x i64> %1, %3 2749 ret <4 x i64> %4 2750 } 2751 2752 define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) { 2753 ; GENERIC-LABEL: test_pgatherdd: 2754 ; GENERIC: # %bb.0: 2755 ; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2756 ; GENERIC-NEXT: retq # sched: [1:1.00] 2757 ; 2758 ; HASWELL-LABEL: test_pgatherdd: 2759 ; HASWELL: # %bb.0: 2760 ; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67] 2761 ; HASWELL-NEXT: retq # sched: [7:1.00] 2762 ; 2763 ; BROADWELL-LABEL: test_pgatherdd: 2764 ; BROADWELL: # %bb.0: 2765 ; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2766 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2767 ; 2768 ; SKYLAKE-LABEL: test_pgatherdd: 2769 ; SKYLAKE: # %bb.0: 2770 ; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2771 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2772 ; 2773 ; SKX-LABEL: test_pgatherdd: 2774 ; SKX: # %bb.0: 2775 ; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2776 ; SKX-NEXT: retq # sched: [7:1.00] 2777 ; 2778 ; ZNVER1-LABEL: test_pgatherdd: 2779 ; ZNVER1: # %bb.0: 2780 ; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 2781 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2782 %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2) 2783 ret <4 x i32> %1 2784 } 2785 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly 2786 2787 define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) { 2788 ; GENERIC-LABEL: test_pgatherdd_ymm: 2789 ; GENERIC: # %bb.0: 2790 ; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] 2791 ; GENERIC-NEXT: retq # sched: [1:1.00] 2792 ; 2793 ; HASWELL-LABEL: test_pgatherdd_ymm: 2794 ; HASWELL: # %bb.0: 2795 ; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [27:6.50] 2796 ; HASWELL-NEXT: retq # sched: [7:1.00] 2797 ; 2798 ; BROADWELL-LABEL: test_pgatherdd_ymm: 2799 ; BROADWELL: # %bb.0: 2800 ; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] 2801 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2802 ; 2803 ; SKYLAKE-LABEL: test_pgatherdd_ymm: 2804 ; SKYLAKE: # %bb.0: 2805 ; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] 2806 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2807 ; 2808 ; SKX-LABEL: test_pgatherdd_ymm: 2809 ; SKX: # %bb.0: 2810 ; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] 2811 ; SKX-NEXT: retq # sched: [7:1.00] 2812 ; 2813 ; ZNVER1-LABEL: test_pgatherdd_ymm: 2814 ; ZNVER1: # %bb.0: 2815 ; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25] 2816 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2817 %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2) 2818 ret <8 x i32> %1 2819 } 2820 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly 2821 2822 define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) { 2823 ; GENERIC-LABEL: test_pgatherdq: 2824 ; GENERIC: # %bb.0: 2825 ; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2826 ; GENERIC-NEXT: retq # sched: [1:1.00] 2827 ; 2828 ; HASWELL-LABEL: test_pgatherdq: 2829 ; HASWELL: # %bb.0: 2830 ; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67] 2831 ; HASWELL-NEXT: retq # sched: [7:1.00] 2832 ; 2833 ; BROADWELL-LABEL: test_pgatherdq: 2834 ; BROADWELL: # %bb.0: 2835 ; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2836 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2837 ; 2838 ; SKYLAKE-LABEL: test_pgatherdq: 2839 ; SKYLAKE: # %bb.0: 2840 ; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2841 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2842 ; 2843 ; SKX-LABEL: test_pgatherdq: 2844 ; SKX: # %bb.0: 2845 ; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2846 ; SKX-NEXT: retq # sched: [7:1.00] 2847 ; 2848 ; ZNVER1-LABEL: test_pgatherdq: 2849 ; ZNVER1: # %bb.0: 2850 ; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 2851 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2852 %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2) 2853 ret <2 x i64> %1 2854 } 2855 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly 2856 2857 define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) { 2858 ; GENERIC-LABEL: test_pgatherdq_ymm: 2859 ; GENERIC: # %bb.0: 2860 ; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50] 2861 ; GENERIC-NEXT: retq # sched: [1:1.00] 2862 ; 2863 ; HASWELL-LABEL: test_pgatherdq_ymm: 2864 ; HASWELL: # %bb.0: 2865 ; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [27:4.00] 2866 ; HASWELL-NEXT: retq # sched: [7:1.00] 2867 ; 2868 ; BROADWELL-LABEL: test_pgatherdq_ymm: 2869 ; BROADWELL: # %bb.0: 2870 ; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50] 2871 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2872 ; 2873 ; SKYLAKE-LABEL: test_pgatherdq_ymm: 2874 ; SKYLAKE: # %bb.0: 2875 ; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00] 2876 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2877 ; 2878 ; SKX-LABEL: test_pgatherdq_ymm: 2879 ; SKX: # %bb.0: 2880 ; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00] 2881 ; SKX-NEXT: retq # sched: [7:1.00] 2882 ; 2883 ; ZNVER1-LABEL: test_pgatherdq_ymm: 2884 ; ZNVER1: # %bb.0: 2885 ; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:0.25] 2886 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2887 %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2) 2888 ret <4 x i64> %1 2889 } 2890 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly 2891 2892 define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) { 2893 ; GENERIC-LABEL: test_pgatherqd: 2894 ; GENERIC: # %bb.0: 2895 ; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2896 ; GENERIC-NEXT: retq # sched: [1:1.00] 2897 ; 2898 ; HASWELL-LABEL: test_pgatherqd: 2899 ; HASWELL: # %bb.0: 2900 ; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:5.00] 2901 ; HASWELL-NEXT: retq # sched: [7:1.00] 2902 ; 2903 ; BROADWELL-LABEL: test_pgatherqd: 2904 ; BROADWELL: # %bb.0: 2905 ; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2906 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2907 ; 2908 ; SKYLAKE-LABEL: test_pgatherqd: 2909 ; SKYLAKE: # %bb.0: 2910 ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2911 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2912 ; 2913 ; SKX-LABEL: test_pgatherqd: 2914 ; SKX: # %bb.0: 2915 ; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2916 ; SKX-NEXT: retq # sched: [7:1.00] 2917 ; 2918 ; ZNVER1-LABEL: test_pgatherqd: 2919 ; ZNVER1: # %bb.0: 2920 ; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 2921 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2922 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2) 2923 ret <4 x i32> %1 2924 } 2925 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly 2926 2927 define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) { 2928 ; GENERIC-LABEL: test_pgatherqd_ymm: 2929 ; GENERIC: # %bb.0: 2930 ; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50] 2931 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 2932 ; GENERIC-NEXT: retq # sched: [1:1.00] 2933 ; 2934 ; HASWELL-LABEL: test_pgatherqd_ymm: 2935 ; HASWELL: # %bb.0: 2936 ; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [28:5.00] 2937 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 2938 ; HASWELL-NEXT: retq # sched: [7:1.00] 2939 ; 2940 ; BROADWELL-LABEL: test_pgatherqd_ymm: 2941 ; BROADWELL: # %bb.0: 2942 ; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50] 2943 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 2944 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2945 ; 2946 ; SKYLAKE-LABEL: test_pgatherqd_ymm: 2947 ; SKYLAKE: # %bb.0: 2948 ; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] 2949 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 2950 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2951 ; 2952 ; SKX-LABEL: test_pgatherqd_ymm: 2953 ; SKX: # %bb.0: 2954 ; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00] 2955 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 2956 ; SKX-NEXT: retq # sched: [7:1.00] 2957 ; 2958 ; ZNVER1-LABEL: test_pgatherqd_ymm: 2959 ; ZNVER1: # %bb.0: 2960 ; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:0.25] 2961 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 2962 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2963 %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2) 2964 ret <4 x i32> %1 2965 } 2966 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly 2967 2968 define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) { 2969 ; GENERIC-LABEL: test_pgatherqq: 2970 ; GENERIC: # %bb.0: 2971 ; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2972 ; GENERIC-NEXT: retq # sched: [1:1.00] 2973 ; 2974 ; HASWELL-LABEL: test_pgatherqq: 2975 ; HASWELL: # %bb.0: 2976 ; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33] 2977 ; HASWELL-NEXT: retq # sched: [7:1.00] 2978 ; 2979 ; BROADWELL-LABEL: test_pgatherqq: 2980 ; BROADWELL: # %bb.0: 2981 ; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50] 2982 ; BROADWELL-NEXT: retq # sched: [7:1.00] 2983 ; 2984 ; SKYLAKE-LABEL: test_pgatherqq: 2985 ; SKYLAKE: # %bb.0: 2986 ; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2987 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 2988 ; 2989 ; SKX-LABEL: test_pgatherqq: 2990 ; SKX: # %bb.0: 2991 ; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00] 2992 ; SKX-NEXT: retq # sched: [7:1.00] 2993 ; 2994 ; ZNVER1-LABEL: test_pgatherqq: 2995 ; ZNVER1: # %bb.0: 2996 ; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25] 2997 ; ZNVER1-NEXT: retq # sched: [1:0.50] 2998 %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2) 2999 ret <2 x i64> %1 3000 } 3001 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly 3002 3003 define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) { 3004 ; GENERIC-LABEL: test_pgatherqq_ymm: 3005 ; GENERIC: # %bb.0: 3006 ; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] 3007 ; GENERIC-NEXT: retq # sched: [1:1.00] 3008 ; 3009 ; HASWELL-LABEL: test_pgatherqq_ymm: 3010 ; HASWELL: # %bb.0: 3011 ; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [24:5.00] 3012 ; HASWELL-NEXT: retq # sched: [7:1.00] 3013 ; 3014 ; BROADWELL-LABEL: test_pgatherqq_ymm: 3015 ; BROADWELL: # %bb.0: 3016 ; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50] 3017 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3018 ; 3019 ; SKYLAKE-LABEL: test_pgatherqq_ymm: 3020 ; SKYLAKE: # %bb.0: 3021 ; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] 3022 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3023 ; 3024 ; SKX-LABEL: test_pgatherqq_ymm: 3025 ; SKX: # %bb.0: 3026 ; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00] 3027 ; SKX-NEXT: retq # sched: [7:1.00] 3028 ; 3029 ; ZNVER1-LABEL: test_pgatherqq_ymm: 3030 ; ZNVER1: # %bb.0: 3031 ; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25] 3032 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3033 %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2) 3034 ret <4 x i64> %1 3035 } 3036 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly 3037 3038 define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3039 ; GENERIC-LABEL: test_phaddd: 3040 ; GENERIC: # %bb.0: 3041 ; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3042 ; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3043 ; GENERIC-NEXT: retq # sched: [1:1.00] 3044 ; 3045 ; HASWELL-LABEL: test_phaddd: 3046 ; HASWELL: # %bb.0: 3047 ; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3048 ; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3049 ; HASWELL-NEXT: retq # sched: [7:1.00] 3050 ; 3051 ; BROADWELL-LABEL: test_phaddd: 3052 ; BROADWELL: # %bb.0: 3053 ; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3054 ; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3055 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3056 ; 3057 ; SKYLAKE-LABEL: test_phaddd: 3058 ; SKYLAKE: # %bb.0: 3059 ; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3060 ; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3061 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3062 ; 3063 ; SKX-LABEL: test_phaddd: 3064 ; SKX: # %bb.0: 3065 ; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3066 ; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3067 ; SKX-NEXT: retq # sched: [7:1.00] 3068 ; 3069 ; ZNVER1-LABEL: test_phaddd: 3070 ; ZNVER1: # %bb.0: 3071 ; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3072 ; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3073 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3074 %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) 3075 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3076 %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2) 3077 ret <8 x i32> %3 3078 } 3079 declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone 3080 3081 define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3082 ; GENERIC-LABEL: test_phaddsw: 3083 ; GENERIC: # %bb.0: 3084 ; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3085 ; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3086 ; GENERIC-NEXT: retq # sched: [1:1.00] 3087 ; 3088 ; HASWELL-LABEL: test_phaddsw: 3089 ; HASWELL: # %bb.0: 3090 ; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3091 ; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3092 ; HASWELL-NEXT: retq # sched: [7:1.00] 3093 ; 3094 ; BROADWELL-LABEL: test_phaddsw: 3095 ; BROADWELL: # %bb.0: 3096 ; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3097 ; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3098 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3099 ; 3100 ; SKYLAKE-LABEL: test_phaddsw: 3101 ; SKYLAKE: # %bb.0: 3102 ; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3103 ; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3104 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3105 ; 3106 ; SKX-LABEL: test_phaddsw: 3107 ; SKX: # %bb.0: 3108 ; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3109 ; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3110 ; SKX-NEXT: retq # sched: [7:1.00] 3111 ; 3112 ; ZNVER1-LABEL: test_phaddsw: 3113 ; ZNVER1: # %bb.0: 3114 ; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3115 ; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3116 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3117 %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) 3118 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3119 %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2) 3120 ret <16 x i16> %3 3121 } 3122 declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone 3123 3124 define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3125 ; GENERIC-LABEL: test_phaddw: 3126 ; GENERIC: # %bb.0: 3127 ; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3128 ; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3129 ; GENERIC-NEXT: retq # sched: [1:1.00] 3130 ; 3131 ; HASWELL-LABEL: test_phaddw: 3132 ; HASWELL: # %bb.0: 3133 ; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3134 ; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3135 ; HASWELL-NEXT: retq # sched: [7:1.00] 3136 ; 3137 ; BROADWELL-LABEL: test_phaddw: 3138 ; BROADWELL: # %bb.0: 3139 ; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3140 ; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3141 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3142 ; 3143 ; SKYLAKE-LABEL: test_phaddw: 3144 ; SKYLAKE: # %bb.0: 3145 ; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3146 ; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3147 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3148 ; 3149 ; SKX-LABEL: test_phaddw: 3150 ; SKX: # %bb.0: 3151 ; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3152 ; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3153 ; SKX-NEXT: retq # sched: [7:1.00] 3154 ; 3155 ; ZNVER1-LABEL: test_phaddw: 3156 ; ZNVER1: # %bb.0: 3157 ; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3158 ; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3159 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3160 %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) 3161 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3162 %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2) 3163 ret <16 x i16> %3 3164 } 3165 declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone 3166 3167 define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3168 ; GENERIC-LABEL: test_phsubd: 3169 ; GENERIC: # %bb.0: 3170 ; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3171 ; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3172 ; GENERIC-NEXT: retq # sched: [1:1.00] 3173 ; 3174 ; HASWELL-LABEL: test_phsubd: 3175 ; HASWELL: # %bb.0: 3176 ; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3177 ; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3178 ; HASWELL-NEXT: retq # sched: [7:1.00] 3179 ; 3180 ; BROADWELL-LABEL: test_phsubd: 3181 ; BROADWELL: # %bb.0: 3182 ; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3183 ; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3184 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3185 ; 3186 ; SKYLAKE-LABEL: test_phsubd: 3187 ; SKYLAKE: # %bb.0: 3188 ; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3189 ; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3190 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3191 ; 3192 ; SKX-LABEL: test_phsubd: 3193 ; SKX: # %bb.0: 3194 ; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3195 ; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3196 ; SKX-NEXT: retq # sched: [7:1.00] 3197 ; 3198 ; ZNVER1-LABEL: test_phsubd: 3199 ; ZNVER1: # %bb.0: 3200 ; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3201 ; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3202 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3203 %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) 3204 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3205 %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2) 3206 ret <8 x i32> %3 3207 } 3208 declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone 3209 3210 define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3211 ; GENERIC-LABEL: test_phsubsw: 3212 ; GENERIC: # %bb.0: 3213 ; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3214 ; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3215 ; GENERIC-NEXT: retq # sched: [1:1.00] 3216 ; 3217 ; HASWELL-LABEL: test_phsubsw: 3218 ; HASWELL: # %bb.0: 3219 ; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3220 ; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3221 ; HASWELL-NEXT: retq # sched: [7:1.00] 3222 ; 3223 ; BROADWELL-LABEL: test_phsubsw: 3224 ; BROADWELL: # %bb.0: 3225 ; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3226 ; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3227 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3228 ; 3229 ; SKYLAKE-LABEL: test_phsubsw: 3230 ; SKYLAKE: # %bb.0: 3231 ; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3232 ; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3233 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3234 ; 3235 ; SKX-LABEL: test_phsubsw: 3236 ; SKX: # %bb.0: 3237 ; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3238 ; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3239 ; SKX-NEXT: retq # sched: [7:1.00] 3240 ; 3241 ; ZNVER1-LABEL: test_phsubsw: 3242 ; ZNVER1: # %bb.0: 3243 ; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3244 ; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3245 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3246 %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) 3247 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3248 %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2) 3249 ret <16 x i16> %3 3250 } 3251 declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone 3252 3253 define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3254 ; GENERIC-LABEL: test_phsubw: 3255 ; GENERIC: # %bb.0: 3256 ; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.50] 3257 ; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:1.50] 3258 ; GENERIC-NEXT: retq # sched: [1:1.00] 3259 ; 3260 ; HASWELL-LABEL: test_phsubw: 3261 ; HASWELL: # %bb.0: 3262 ; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3263 ; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3264 ; HASWELL-NEXT: retq # sched: [7:1.00] 3265 ; 3266 ; BROADWELL-LABEL: test_phsubw: 3267 ; BROADWELL: # %bb.0: 3268 ; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3269 ; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 3270 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3271 ; 3272 ; SKYLAKE-LABEL: test_phsubw: 3273 ; SKYLAKE: # %bb.0: 3274 ; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3275 ; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3276 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3277 ; 3278 ; SKX-LABEL: test_phsubw: 3279 ; SKX: # %bb.0: 3280 ; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 3281 ; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 3282 ; SKX-NEXT: retq # sched: [7:1.00] 3283 ; 3284 ; ZNVER1-LABEL: test_phsubw: 3285 ; ZNVER1: # %bb.0: 3286 ; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:0.25] 3287 ; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:0.25] 3288 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3289 %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) 3290 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3291 %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2) 3292 ret <16 x i16> %3 3293 } 3294 declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone 3295 3296 define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3297 ; GENERIC-LABEL: test_pmaddubsw: 3298 ; GENERIC: # %bb.0: 3299 ; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3300 ; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3301 ; GENERIC-NEXT: retq # sched: [1:1.00] 3302 ; 3303 ; HASWELL-LABEL: test_pmaddubsw: 3304 ; HASWELL: # %bb.0: 3305 ; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3306 ; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3307 ; HASWELL-NEXT: retq # sched: [7:1.00] 3308 ; 3309 ; BROADWELL-LABEL: test_pmaddubsw: 3310 ; BROADWELL: # %bb.0: 3311 ; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3312 ; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 3313 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3314 ; 3315 ; SKYLAKE-LABEL: test_pmaddubsw: 3316 ; SKYLAKE: # %bb.0: 3317 ; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3318 ; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3319 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3320 ; 3321 ; SKX-LABEL: test_pmaddubsw: 3322 ; SKX: # %bb.0: 3323 ; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3324 ; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3325 ; SKX-NEXT: retq # sched: [7:1.00] 3326 ; 3327 ; ZNVER1-LABEL: test_pmaddubsw: 3328 ; ZNVER1: # %bb.0: 3329 ; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 3330 ; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 3331 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3332 %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) 3333 %2 = bitcast <16 x i16> %1 to <32 x i8> 3334 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 3335 %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3) 3336 ret <16 x i16> %4 3337 } 3338 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone 3339 3340 define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3341 ; GENERIC-LABEL: test_pmaddwd: 3342 ; GENERIC: # %bb.0: 3343 ; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3344 ; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3345 ; GENERIC-NEXT: retq # sched: [1:1.00] 3346 ; 3347 ; HASWELL-LABEL: test_pmaddwd: 3348 ; HASWELL: # %bb.0: 3349 ; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3350 ; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 3351 ; HASWELL-NEXT: retq # sched: [7:1.00] 3352 ; 3353 ; BROADWELL-LABEL: test_pmaddwd: 3354 ; BROADWELL: # %bb.0: 3355 ; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 3356 ; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 3357 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3358 ; 3359 ; SKYLAKE-LABEL: test_pmaddwd: 3360 ; SKYLAKE: # %bb.0: 3361 ; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3362 ; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3363 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3364 ; 3365 ; SKX-LABEL: test_pmaddwd: 3366 ; SKX: # %bb.0: 3367 ; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 3368 ; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 3369 ; SKX-NEXT: retq # sched: [7:1.00] 3370 ; 3371 ; ZNVER1-LABEL: test_pmaddwd: 3372 ; ZNVER1: # %bb.0: 3373 ; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 3374 ; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 3375 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3376 %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) 3377 %2 = bitcast <8 x i32> %1 to <16 x i16> 3378 %3 = load <16 x i16>, <16 x i16> *%a2, align 32 3379 %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3) 3380 ret <8 x i32> %4 3381 } 3382 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone 3383 3384 define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { 3385 ; GENERIC-LABEL: test_pmaskmovd: 3386 ; GENERIC: # %bb.0: 3387 ; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 3388 ; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3389 ; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3390 ; GENERIC-NEXT: retq # sched: [1:1.00] 3391 ; 3392 ; HASWELL-LABEL: test_pmaskmovd: 3393 ; HASWELL: # %bb.0: 3394 ; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] 3395 ; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3396 ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3397 ; HASWELL-NEXT: retq # sched: [7:1.00] 3398 ; 3399 ; BROADWELL-LABEL: test_pmaskmovd: 3400 ; BROADWELL: # %bb.0: 3401 ; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00] 3402 ; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3403 ; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3404 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3405 ; 3406 ; SKYLAKE-LABEL: test_pmaskmovd: 3407 ; SKYLAKE: # %bb.0: 3408 ; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 3409 ; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 3410 ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3411 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3412 ; 3413 ; SKX-LABEL: test_pmaskmovd: 3414 ; SKX: # %bb.0: 3415 ; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 3416 ; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 3417 ; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3418 ; SKX-NEXT: retq # sched: [7:1.00] 3419 ; 3420 ; ZNVER1-LABEL: test_pmaskmovd: 3421 ; ZNVER1: # %bb.0: 3422 ; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:0.25] 3423 ; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:0.25] 3424 ; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] 3425 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3426 %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) 3427 call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) 3428 ret <4 x i32> %1 3429 } 3430 declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly 3431 declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind 3432 3433 define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { 3434 ; GENERIC-LABEL: test_pmaskmovd_ymm: 3435 ; GENERIC: # %bb.0: 3436 ; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] 3437 ; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3438 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 3439 ; GENERIC-NEXT: retq # sched: [1:1.00] 3440 ; 3441 ; HASWELL-LABEL: test_pmaskmovd_ymm: 3442 ; HASWELL: # %bb.0: 3443 ; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:2.00] 3444 ; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3445 ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3446 ; HASWELL-NEXT: retq # sched: [7:1.00] 3447 ; 3448 ; BROADWELL-LABEL: test_pmaskmovd_ymm: 3449 ; BROADWELL: # %bb.0: 3450 ; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00] 3451 ; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3452 ; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3453 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3454 ; 3455 ; SKYLAKE-LABEL: test_pmaskmovd_ymm: 3456 ; SKYLAKE: # %bb.0: 3457 ; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 3458 ; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 3459 ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3460 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3461 ; 3462 ; SKX-LABEL: test_pmaskmovd_ymm: 3463 ; SKX: # %bb.0: 3464 ; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 3465 ; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 3466 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3467 ; SKX-NEXT: retq # sched: [7:1.00] 3468 ; 3469 ; ZNVER1-LABEL: test_pmaskmovd_ymm: 3470 ; ZNVER1: # %bb.0: 3471 ; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:0.25] 3472 ; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:0.25] 3473 ; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25] 3474 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3475 %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) 3476 call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) 3477 ret <8 x i32> %1 3478 } 3479 declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly 3480 declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind 3481 3482 define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { 3483 ; GENERIC-LABEL: test_pmaskmovq: 3484 ; GENERIC: # %bb.0: 3485 ; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 3486 ; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3487 ; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3488 ; GENERIC-NEXT: retq # sched: [1:1.00] 3489 ; 3490 ; HASWELL-LABEL: test_pmaskmovq: 3491 ; HASWELL: # %bb.0: 3492 ; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:2.00] 3493 ; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3494 ; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3495 ; HASWELL-NEXT: retq # sched: [7:1.00] 3496 ; 3497 ; BROADWELL-LABEL: test_pmaskmovq: 3498 ; BROADWELL: # %bb.0: 3499 ; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00] 3500 ; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] 3501 ; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3502 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3503 ; 3504 ; SKYLAKE-LABEL: test_pmaskmovq: 3505 ; SKYLAKE: # %bb.0: 3506 ; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 3507 ; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 3508 ; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3509 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3510 ; 3511 ; SKX-LABEL: test_pmaskmovq: 3512 ; SKX: # %bb.0: 3513 ; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50] 3514 ; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00] 3515 ; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] 3516 ; SKX-NEXT: retq # sched: [7:1.00] 3517 ; 3518 ; ZNVER1-LABEL: test_pmaskmovq: 3519 ; ZNVER1: # %bb.0: 3520 ; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00] 3521 ; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:0.25] 3522 ; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25] 3523 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3524 %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) 3525 call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) 3526 ret <2 x i64> %1 3527 } 3528 declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly 3529 declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind 3530 3531 define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { 3532 ; GENERIC-LABEL: test_pmaskmovq_ymm: 3533 ; GENERIC: # %bb.0: 3534 ; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.00] 3535 ; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3536 ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] 3537 ; GENERIC-NEXT: retq # sched: [1:1.00] 3538 ; 3539 ; HASWELL-LABEL: test_pmaskmovq_ymm: 3540 ; HASWELL: # %bb.0: 3541 ; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:2.00] 3542 ; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3543 ; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3544 ; HASWELL-NEXT: retq # sched: [7:1.00] 3545 ; 3546 ; BROADWELL-LABEL: test_pmaskmovq_ymm: 3547 ; BROADWELL: # %bb.0: 3548 ; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00] 3549 ; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] 3550 ; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3551 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3552 ; 3553 ; SKYLAKE-LABEL: test_pmaskmovq_ymm: 3554 ; SKYLAKE: # %bb.0: 3555 ; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 3556 ; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 3557 ; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3558 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3559 ; 3560 ; SKX-LABEL: test_pmaskmovq_ymm: 3561 ; SKX: # %bb.0: 3562 ; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50] 3563 ; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00] 3564 ; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33] 3565 ; SKX-NEXT: retq # sched: [7:1.00] 3566 ; 3567 ; ZNVER1-LABEL: test_pmaskmovq_ymm: 3568 ; ZNVER1: # %bb.0: 3569 ; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50] 3570 ; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:0.25] 3571 ; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25] 3572 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3573 %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) 3574 call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) 3575 ret <4 x i64> %1 3576 } 3577 declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly 3578 declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind 3579 3580 define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3581 ; GENERIC-LABEL: test_pmaxsb: 3582 ; GENERIC: # %bb.0: 3583 ; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3584 ; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3585 ; GENERIC-NEXT: retq # sched: [1:1.00] 3586 ; 3587 ; HASWELL-LABEL: test_pmaxsb: 3588 ; HASWELL: # %bb.0: 3589 ; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3590 ; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3591 ; HASWELL-NEXT: retq # sched: [7:1.00] 3592 ; 3593 ; BROADWELL-LABEL: test_pmaxsb: 3594 ; BROADWELL: # %bb.0: 3595 ; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3596 ; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3597 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3598 ; 3599 ; SKYLAKE-LABEL: test_pmaxsb: 3600 ; SKYLAKE: # %bb.0: 3601 ; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3602 ; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3603 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3604 ; 3605 ; SKX-LABEL: test_pmaxsb: 3606 ; SKX: # %bb.0: 3607 ; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3608 ; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3609 ; SKX-NEXT: retq # sched: [7:1.00] 3610 ; 3611 ; ZNVER1-LABEL: test_pmaxsb: 3612 ; ZNVER1: # %bb.0: 3613 ; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3614 ; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3615 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3616 %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) 3617 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 3618 %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2) 3619 ret <32 x i8> %3 3620 } 3621 declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone 3622 3623 define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3624 ; GENERIC-LABEL: test_pmaxsd: 3625 ; GENERIC: # %bb.0: 3626 ; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3627 ; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3628 ; GENERIC-NEXT: retq # sched: [1:1.00] 3629 ; 3630 ; HASWELL-LABEL: test_pmaxsd: 3631 ; HASWELL: # %bb.0: 3632 ; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3633 ; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3634 ; HASWELL-NEXT: retq # sched: [7:1.00] 3635 ; 3636 ; BROADWELL-LABEL: test_pmaxsd: 3637 ; BROADWELL: # %bb.0: 3638 ; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3639 ; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3640 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3641 ; 3642 ; SKYLAKE-LABEL: test_pmaxsd: 3643 ; SKYLAKE: # %bb.0: 3644 ; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3645 ; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3646 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3647 ; 3648 ; SKX-LABEL: test_pmaxsd: 3649 ; SKX: # %bb.0: 3650 ; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3651 ; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3652 ; SKX-NEXT: retq # sched: [7:1.00] 3653 ; 3654 ; ZNVER1-LABEL: test_pmaxsd: 3655 ; ZNVER1: # %bb.0: 3656 ; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3657 ; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3658 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3659 %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) 3660 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3661 %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2) 3662 ret <8 x i32> %3 3663 } 3664 declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone 3665 3666 define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3667 ; GENERIC-LABEL: test_pmaxsw: 3668 ; GENERIC: # %bb.0: 3669 ; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3670 ; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3671 ; GENERIC-NEXT: retq # sched: [1:1.00] 3672 ; 3673 ; HASWELL-LABEL: test_pmaxsw: 3674 ; HASWELL: # %bb.0: 3675 ; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3676 ; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3677 ; HASWELL-NEXT: retq # sched: [7:1.00] 3678 ; 3679 ; BROADWELL-LABEL: test_pmaxsw: 3680 ; BROADWELL: # %bb.0: 3681 ; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3682 ; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3683 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3684 ; 3685 ; SKYLAKE-LABEL: test_pmaxsw: 3686 ; SKYLAKE: # %bb.0: 3687 ; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3688 ; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3689 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3690 ; 3691 ; SKX-LABEL: test_pmaxsw: 3692 ; SKX: # %bb.0: 3693 ; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3694 ; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3695 ; SKX-NEXT: retq # sched: [7:1.00] 3696 ; 3697 ; ZNVER1-LABEL: test_pmaxsw: 3698 ; ZNVER1: # %bb.0: 3699 ; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3700 ; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3701 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3702 %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) 3703 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3704 %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2) 3705 ret <16 x i16> %3 3706 } 3707 declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone 3708 3709 define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3710 ; GENERIC-LABEL: test_pmaxub: 3711 ; GENERIC: # %bb.0: 3712 ; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3713 ; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3714 ; GENERIC-NEXT: retq # sched: [1:1.00] 3715 ; 3716 ; HASWELL-LABEL: test_pmaxub: 3717 ; HASWELL: # %bb.0: 3718 ; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3719 ; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3720 ; HASWELL-NEXT: retq # sched: [7:1.00] 3721 ; 3722 ; BROADWELL-LABEL: test_pmaxub: 3723 ; BROADWELL: # %bb.0: 3724 ; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3725 ; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3726 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3727 ; 3728 ; SKYLAKE-LABEL: test_pmaxub: 3729 ; SKYLAKE: # %bb.0: 3730 ; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3731 ; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3732 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3733 ; 3734 ; SKX-LABEL: test_pmaxub: 3735 ; SKX: # %bb.0: 3736 ; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3737 ; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3738 ; SKX-NEXT: retq # sched: [7:1.00] 3739 ; 3740 ; ZNVER1-LABEL: test_pmaxub: 3741 ; ZNVER1: # %bb.0: 3742 ; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3743 ; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3744 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3745 %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) 3746 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 3747 %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2) 3748 ret <32 x i8> %3 3749 } 3750 declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone 3751 3752 define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3753 ; GENERIC-LABEL: test_pmaxud: 3754 ; GENERIC: # %bb.0: 3755 ; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3756 ; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3757 ; GENERIC-NEXT: retq # sched: [1:1.00] 3758 ; 3759 ; HASWELL-LABEL: test_pmaxud: 3760 ; HASWELL: # %bb.0: 3761 ; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3762 ; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3763 ; HASWELL-NEXT: retq # sched: [7:1.00] 3764 ; 3765 ; BROADWELL-LABEL: test_pmaxud: 3766 ; BROADWELL: # %bb.0: 3767 ; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3768 ; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3769 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3770 ; 3771 ; SKYLAKE-LABEL: test_pmaxud: 3772 ; SKYLAKE: # %bb.0: 3773 ; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3774 ; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3775 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3776 ; 3777 ; SKX-LABEL: test_pmaxud: 3778 ; SKX: # %bb.0: 3779 ; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3780 ; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3781 ; SKX-NEXT: retq # sched: [7:1.00] 3782 ; 3783 ; ZNVER1-LABEL: test_pmaxud: 3784 ; ZNVER1: # %bb.0: 3785 ; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3786 ; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3787 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3788 %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) 3789 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3790 %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2) 3791 ret <8 x i32> %3 3792 } 3793 declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone 3794 3795 define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3796 ; GENERIC-LABEL: test_pmaxuw: 3797 ; GENERIC: # %bb.0: 3798 ; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3799 ; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3800 ; GENERIC-NEXT: retq # sched: [1:1.00] 3801 ; 3802 ; HASWELL-LABEL: test_pmaxuw: 3803 ; HASWELL: # %bb.0: 3804 ; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3805 ; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3806 ; HASWELL-NEXT: retq # sched: [7:1.00] 3807 ; 3808 ; BROADWELL-LABEL: test_pmaxuw: 3809 ; BROADWELL: # %bb.0: 3810 ; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3811 ; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3812 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3813 ; 3814 ; SKYLAKE-LABEL: test_pmaxuw: 3815 ; SKYLAKE: # %bb.0: 3816 ; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3817 ; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3818 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3819 ; 3820 ; SKX-LABEL: test_pmaxuw: 3821 ; SKX: # %bb.0: 3822 ; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3823 ; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3824 ; SKX-NEXT: retq # sched: [7:1.00] 3825 ; 3826 ; ZNVER1-LABEL: test_pmaxuw: 3827 ; ZNVER1: # %bb.0: 3828 ; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3829 ; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3830 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3831 %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) 3832 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3833 %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2) 3834 ret <16 x i16> %3 3835 } 3836 declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone 3837 3838 define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3839 ; GENERIC-LABEL: test_pminsb: 3840 ; GENERIC: # %bb.0: 3841 ; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3842 ; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3843 ; GENERIC-NEXT: retq # sched: [1:1.00] 3844 ; 3845 ; HASWELL-LABEL: test_pminsb: 3846 ; HASWELL: # %bb.0: 3847 ; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3848 ; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3849 ; HASWELL-NEXT: retq # sched: [7:1.00] 3850 ; 3851 ; BROADWELL-LABEL: test_pminsb: 3852 ; BROADWELL: # %bb.0: 3853 ; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3854 ; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3855 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3856 ; 3857 ; SKYLAKE-LABEL: test_pminsb: 3858 ; SKYLAKE: # %bb.0: 3859 ; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3860 ; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3861 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3862 ; 3863 ; SKX-LABEL: test_pminsb: 3864 ; SKX: # %bb.0: 3865 ; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3866 ; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3867 ; SKX-NEXT: retq # sched: [7:1.00] 3868 ; 3869 ; ZNVER1-LABEL: test_pminsb: 3870 ; ZNVER1: # %bb.0: 3871 ; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3872 ; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3873 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3874 %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) 3875 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 3876 %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2) 3877 ret <32 x i8> %3 3878 } 3879 declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone 3880 3881 define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 3882 ; GENERIC-LABEL: test_pminsd: 3883 ; GENERIC: # %bb.0: 3884 ; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3885 ; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3886 ; GENERIC-NEXT: retq # sched: [1:1.00] 3887 ; 3888 ; HASWELL-LABEL: test_pminsd: 3889 ; HASWELL: # %bb.0: 3890 ; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3891 ; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3892 ; HASWELL-NEXT: retq # sched: [7:1.00] 3893 ; 3894 ; BROADWELL-LABEL: test_pminsd: 3895 ; BROADWELL: # %bb.0: 3896 ; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3897 ; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3898 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3899 ; 3900 ; SKYLAKE-LABEL: test_pminsd: 3901 ; SKYLAKE: # %bb.0: 3902 ; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3903 ; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3904 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3905 ; 3906 ; SKX-LABEL: test_pminsd: 3907 ; SKX: # %bb.0: 3908 ; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3909 ; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3910 ; SKX-NEXT: retq # sched: [7:1.00] 3911 ; 3912 ; ZNVER1-LABEL: test_pminsd: 3913 ; ZNVER1: # %bb.0: 3914 ; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3915 ; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3916 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3917 %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) 3918 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 3919 %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2) 3920 ret <8 x i32> %3 3921 } 3922 declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone 3923 3924 define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 3925 ; GENERIC-LABEL: test_pminsw: 3926 ; GENERIC: # %bb.0: 3927 ; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3928 ; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3929 ; GENERIC-NEXT: retq # sched: [1:1.00] 3930 ; 3931 ; HASWELL-LABEL: test_pminsw: 3932 ; HASWELL: # %bb.0: 3933 ; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3934 ; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3935 ; HASWELL-NEXT: retq # sched: [7:1.00] 3936 ; 3937 ; BROADWELL-LABEL: test_pminsw: 3938 ; BROADWELL: # %bb.0: 3939 ; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3940 ; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3941 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3942 ; 3943 ; SKYLAKE-LABEL: test_pminsw: 3944 ; SKYLAKE: # %bb.0: 3945 ; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3946 ; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3947 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3948 ; 3949 ; SKX-LABEL: test_pminsw: 3950 ; SKX: # %bb.0: 3951 ; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3952 ; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3953 ; SKX-NEXT: retq # sched: [7:1.00] 3954 ; 3955 ; ZNVER1-LABEL: test_pminsw: 3956 ; ZNVER1: # %bb.0: 3957 ; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 3958 ; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3959 ; ZNVER1-NEXT: retq # sched: [1:0.50] 3960 %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) 3961 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 3962 %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2) 3963 ret <16 x i16> %3 3964 } 3965 declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone 3966 3967 define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 3968 ; GENERIC-LABEL: test_pminub: 3969 ; GENERIC: # %bb.0: 3970 ; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3971 ; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3972 ; GENERIC-NEXT: retq # sched: [1:1.00] 3973 ; 3974 ; HASWELL-LABEL: test_pminub: 3975 ; HASWELL: # %bb.0: 3976 ; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3977 ; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3978 ; HASWELL-NEXT: retq # sched: [7:1.00] 3979 ; 3980 ; BROADWELL-LABEL: test_pminub: 3981 ; BROADWELL: # %bb.0: 3982 ; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3983 ; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 3984 ; BROADWELL-NEXT: retq # sched: [7:1.00] 3985 ; 3986 ; SKYLAKE-LABEL: test_pminub: 3987 ; SKYLAKE: # %bb.0: 3988 ; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3989 ; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3990 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 3991 ; 3992 ; SKX-LABEL: test_pminub: 3993 ; SKX: # %bb.0: 3994 ; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 3995 ; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 3996 ; SKX-NEXT: retq # sched: [7:1.00] 3997 ; 3998 ; ZNVER1-LABEL: test_pminub: 3999 ; ZNVER1: # %bb.0: 4000 ; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4001 ; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4002 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4003 %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) 4004 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 4005 %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2) 4006 ret <32 x i8> %3 4007 } 4008 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone 4009 4010 define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 4011 ; GENERIC-LABEL: test_pminud: 4012 ; GENERIC: # %bb.0: 4013 ; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4014 ; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4015 ; GENERIC-NEXT: retq # sched: [1:1.00] 4016 ; 4017 ; HASWELL-LABEL: test_pminud: 4018 ; HASWELL: # %bb.0: 4019 ; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4020 ; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4021 ; HASWELL-NEXT: retq # sched: [7:1.00] 4022 ; 4023 ; BROADWELL-LABEL: test_pminud: 4024 ; BROADWELL: # %bb.0: 4025 ; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4026 ; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 4027 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4028 ; 4029 ; SKYLAKE-LABEL: test_pminud: 4030 ; SKYLAKE: # %bb.0: 4031 ; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4032 ; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4033 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4034 ; 4035 ; SKX-LABEL: test_pminud: 4036 ; SKX: # %bb.0: 4037 ; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4038 ; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4039 ; SKX-NEXT: retq # sched: [7:1.00] 4040 ; 4041 ; ZNVER1-LABEL: test_pminud: 4042 ; ZNVER1: # %bb.0: 4043 ; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4044 ; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4045 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4046 %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) 4047 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 4048 %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2) 4049 ret <8 x i32> %3 4050 } 4051 declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone 4052 4053 define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4054 ; GENERIC-LABEL: test_pminuw: 4055 ; GENERIC: # %bb.0: 4056 ; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4057 ; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4058 ; GENERIC-NEXT: retq # sched: [1:1.00] 4059 ; 4060 ; HASWELL-LABEL: test_pminuw: 4061 ; HASWELL: # %bb.0: 4062 ; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4063 ; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4064 ; HASWELL-NEXT: retq # sched: [7:1.00] 4065 ; 4066 ; BROADWELL-LABEL: test_pminuw: 4067 ; BROADWELL: # %bb.0: 4068 ; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4069 ; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 4070 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4071 ; 4072 ; SKYLAKE-LABEL: test_pminuw: 4073 ; SKYLAKE: # %bb.0: 4074 ; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4075 ; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4076 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4077 ; 4078 ; SKX-LABEL: test_pminuw: 4079 ; SKX: # %bb.0: 4080 ; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4081 ; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4082 ; SKX-NEXT: retq # sched: [7:1.00] 4083 ; 4084 ; ZNVER1-LABEL: test_pminuw: 4085 ; ZNVER1: # %bb.0: 4086 ; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4087 ; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 4088 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4089 %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) 4090 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4091 %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2) 4092 ret <16 x i16> %3 4093 } 4094 declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone 4095 4096 define i32 @test_pmovmskb(<32 x i8> %a0) { 4097 ; GENERIC-LABEL: test_pmovmskb: 4098 ; GENERIC: # %bb.0: 4099 ; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] 4100 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 4101 ; GENERIC-NEXT: retq # sched: [1:1.00] 4102 ; 4103 ; HASWELL-LABEL: test_pmovmskb: 4104 ; HASWELL: # %bb.0: 4105 ; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] 4106 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 4107 ; HASWELL-NEXT: retq # sched: [7:1.00] 4108 ; 4109 ; BROADWELL-LABEL: test_pmovmskb: 4110 ; BROADWELL: # %bb.0: 4111 ; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00] 4112 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 4113 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4114 ; 4115 ; SKYLAKE-LABEL: test_pmovmskb: 4116 ; SKYLAKE: # %bb.0: 4117 ; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] 4118 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 4119 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4120 ; 4121 ; SKX-LABEL: test_pmovmskb: 4122 ; SKX: # %bb.0: 4123 ; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00] 4124 ; SKX-NEXT: vzeroupper # sched: [4:1.00] 4125 ; SKX-NEXT: retq # sched: [7:1.00] 4126 ; 4127 ; ZNVER1-LABEL: test_pmovmskb: 4128 ; ZNVER1: # %bb.0: 4129 ; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:2.00] 4130 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 4131 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4132 %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) 4133 ret i32 %1 4134 } 4135 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone 4136 4137 define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) { 4138 ; GENERIC-LABEL: test_pmovsxbd: 4139 ; GENERIC: # %bb.0: 4140 ; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00] 4141 ; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4142 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4143 ; GENERIC-NEXT: retq # sched: [1:1.00] 4144 ; 4145 ; HASWELL-LABEL: test_pmovsxbd: 4146 ; HASWELL: # %bb.0: 4147 ; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] 4148 ; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4149 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4150 ; HASWELL-NEXT: retq # sched: [7:1.00] 4151 ; 4152 ; BROADWELL-LABEL: test_pmovsxbd: 4153 ; BROADWELL: # %bb.0: 4154 ; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] 4155 ; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4156 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4157 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4158 ; 4159 ; SKYLAKE-LABEL: test_pmovsxbd: 4160 ; SKYLAKE: # %bb.0: 4161 ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] 4162 ; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4163 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4164 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4165 ; 4166 ; SKX-LABEL: test_pmovsxbd: 4167 ; SKX: # %bb.0: 4168 ; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00] 4169 ; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00] 4170 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4171 ; SKX-NEXT: retq # sched: [7:1.00] 4172 ; 4173 ; ZNVER1-LABEL: test_pmovsxbd: 4174 ; ZNVER1: # %bb.0: 4175 ; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50] 4176 ; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.50] 4177 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4178 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4179 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4180 %2 = sext <8 x i8> %1 to <8 x i32> 4181 %3 = load <16 x i8>, <16 x i8> *%a1, align 16 4182 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4183 %5 = sext <8 x i8> %4 to <8 x i32> 4184 %6 = add <8 x i32> %2, %5 4185 ret <8 x i32> %6 4186 } 4187 4188 define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) { 4189 ; GENERIC-LABEL: test_pmovsxbq: 4190 ; GENERIC: # %bb.0: 4191 ; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00] 4192 ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4193 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4194 ; GENERIC-NEXT: retq # sched: [1:1.00] 4195 ; 4196 ; HASWELL-LABEL: test_pmovsxbq: 4197 ; HASWELL: # %bb.0: 4198 ; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] 4199 ; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4200 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4201 ; HASWELL-NEXT: retq # sched: [7:1.00] 4202 ; 4203 ; BROADWELL-LABEL: test_pmovsxbq: 4204 ; BROADWELL: # %bb.0: 4205 ; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] 4206 ; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4207 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4208 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4209 ; 4210 ; SKYLAKE-LABEL: test_pmovsxbq: 4211 ; SKYLAKE: # %bb.0: 4212 ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] 4213 ; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4214 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4215 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4216 ; 4217 ; SKX-LABEL: test_pmovsxbq: 4218 ; SKX: # %bb.0: 4219 ; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00] 4220 ; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00] 4221 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4222 ; SKX-NEXT: retq # sched: [7:1.00] 4223 ; 4224 ; ZNVER1-LABEL: test_pmovsxbq: 4225 ; ZNVER1: # %bb.0: 4226 ; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50] 4227 ; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50] 4228 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4229 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4230 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4231 %2 = sext <4 x i8> %1 to <4 x i64> 4232 %3 = load <16 x i8>, <16 x i8> *%a1, align 16 4233 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4234 %5 = sext <4 x i8> %4 to <4 x i64> 4235 %6 = add <4 x i64> %2, %5 4236 ret <4 x i64> %6 4237 } 4238 4239 define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) { 4240 ; GENERIC-LABEL: test_pmovsxbw: 4241 ; GENERIC: # %bb.0: 4242 ; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00] 4243 ; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00] 4244 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4245 ; GENERIC-NEXT: retq # sched: [1:1.00] 4246 ; 4247 ; HASWELL-LABEL: test_pmovsxbw: 4248 ; HASWELL: # %bb.0: 4249 ; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 4250 ; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] 4251 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4252 ; HASWELL-NEXT: retq # sched: [7:1.00] 4253 ; 4254 ; BROADWELL-LABEL: test_pmovsxbw: 4255 ; BROADWELL: # %bb.0: 4256 ; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 4257 ; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00] 4258 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4259 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4260 ; 4261 ; SKYLAKE-LABEL: test_pmovsxbw: 4262 ; SKYLAKE: # %bb.0: 4263 ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 4264 ; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] 4265 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4266 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4267 ; 4268 ; SKX-LABEL: test_pmovsxbw: 4269 ; SKX: # %bb.0: 4270 ; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00] 4271 ; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00] 4272 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4273 ; SKX-NEXT: retq # sched: [7:1.00] 4274 ; 4275 ; ZNVER1-LABEL: test_pmovsxbw: 4276 ; ZNVER1: # %bb.0: 4277 ; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50] 4278 ; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50] 4279 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4280 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4281 %1 = sext <16 x i8> %a0 to <16 x i16> 4282 %2 = load <16 x i8>, <16 x i8> *%a1, align 16 4283 %3 = sext <16 x i8> %2 to <16 x i16> 4284 %4 = add <16 x i16> %1, %3 4285 ret <16 x i16> %4 4286 } 4287 4288 define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) { 4289 ; GENERIC-LABEL: test_pmovsxdq: 4290 ; GENERIC: # %bb.0: 4291 ; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00] 4292 ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00] 4293 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4294 ; GENERIC-NEXT: retq # sched: [1:1.00] 4295 ; 4296 ; HASWELL-LABEL: test_pmovsxdq: 4297 ; HASWELL: # %bb.0: 4298 ; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4299 ; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] 4300 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4301 ; HASWELL-NEXT: retq # sched: [7:1.00] 4302 ; 4303 ; BROADWELL-LABEL: test_pmovsxdq: 4304 ; BROADWELL: # %bb.0: 4305 ; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4306 ; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00] 4307 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4308 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4309 ; 4310 ; SKYLAKE-LABEL: test_pmovsxdq: 4311 ; SKYLAKE: # %bb.0: 4312 ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4313 ; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] 4314 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4315 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4316 ; 4317 ; SKX-LABEL: test_pmovsxdq: 4318 ; SKX: # %bb.0: 4319 ; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00] 4320 ; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00] 4321 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4322 ; SKX-NEXT: retq # sched: [7:1.00] 4323 ; 4324 ; ZNVER1-LABEL: test_pmovsxdq: 4325 ; ZNVER1: # %bb.0: 4326 ; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50] 4327 ; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50] 4328 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4329 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4330 %1 = sext <4 x i32> %a0 to <4 x i64> 4331 %2 = load <4 x i32>, <4 x i32> *%a1, align 16 4332 %3 = sext <4 x i32> %2 to <4 x i64> 4333 %4 = add <4 x i64> %1, %3 4334 ret <4 x i64> %4 4335 } 4336 4337 define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) { 4338 ; GENERIC-LABEL: test_pmovsxwd: 4339 ; GENERIC: # %bb.0: 4340 ; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00] 4341 ; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00] 4342 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4343 ; GENERIC-NEXT: retq # sched: [1:1.00] 4344 ; 4345 ; HASWELL-LABEL: test_pmovsxwd: 4346 ; HASWELL: # %bb.0: 4347 ; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 4348 ; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] 4349 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4350 ; HASWELL-NEXT: retq # sched: [7:1.00] 4351 ; 4352 ; BROADWELL-LABEL: test_pmovsxwd: 4353 ; BROADWELL: # %bb.0: 4354 ; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 4355 ; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00] 4356 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4357 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4358 ; 4359 ; SKYLAKE-LABEL: test_pmovsxwd: 4360 ; SKYLAKE: # %bb.0: 4361 ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 4362 ; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] 4363 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4364 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4365 ; 4366 ; SKX-LABEL: test_pmovsxwd: 4367 ; SKX: # %bb.0: 4368 ; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00] 4369 ; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00] 4370 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4371 ; SKX-NEXT: retq # sched: [7:1.00] 4372 ; 4373 ; ZNVER1-LABEL: test_pmovsxwd: 4374 ; ZNVER1: # %bb.0: 4375 ; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50] 4376 ; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.50] 4377 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4378 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4379 %1 = sext <8 x i16> %a0 to <8 x i32> 4380 %2 = load <8 x i16>, <8 x i16> *%a1, align 16 4381 %3 = sext <8 x i16> %2 to <8 x i32> 4382 %4 = add <8 x i32> %1, %3 4383 ret <8 x i32> %4 4384 } 4385 4386 define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) { 4387 ; GENERIC-LABEL: test_pmovsxwq: 4388 ; GENERIC: # %bb.0: 4389 ; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00] 4390 ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4391 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4392 ; GENERIC-NEXT: retq # sched: [1:1.00] 4393 ; 4394 ; HASWELL-LABEL: test_pmovsxwq: 4395 ; HASWELL: # %bb.0: 4396 ; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] 4397 ; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4398 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4399 ; HASWELL-NEXT: retq # sched: [7:1.00] 4400 ; 4401 ; BROADWELL-LABEL: test_pmovsxwq: 4402 ; BROADWELL: # %bb.0: 4403 ; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] 4404 ; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4405 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4406 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4407 ; 4408 ; SKYLAKE-LABEL: test_pmovsxwq: 4409 ; SKYLAKE: # %bb.0: 4410 ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] 4411 ; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4412 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4413 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4414 ; 4415 ; SKX-LABEL: test_pmovsxwq: 4416 ; SKX: # %bb.0: 4417 ; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00] 4418 ; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00] 4419 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4420 ; SKX-NEXT: retq # sched: [7:1.00] 4421 ; 4422 ; ZNVER1-LABEL: test_pmovsxwq: 4423 ; ZNVER1: # %bb.0: 4424 ; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50] 4425 ; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.50] 4426 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4427 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4428 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4429 %2 = sext <4 x i16> %1 to <4 x i64> 4430 %3 = load <8 x i16>, <8 x i16> *%a1, align 16 4431 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4432 %5 = sext <4 x i16> %4 to <4 x i64> 4433 %6 = add <4 x i64> %2, %5 4434 ret <4 x i64> %6 4435 } 4436 4437 define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) { 4438 ; GENERIC-LABEL: test_pmovzxbd: 4439 ; GENERIC: # %bb.0: 4440 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] 4441 ; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00] 4442 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4443 ; GENERIC-NEXT: retq # sched: [1:1.00] 4444 ; 4445 ; HASWELL-LABEL: test_pmovzxbd: 4446 ; HASWELL: # %bb.0: 4447 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4448 ; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 4449 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4450 ; HASWELL-NEXT: retq # sched: [7:1.00] 4451 ; 4452 ; BROADWELL-LABEL: test_pmovzxbd: 4453 ; BROADWELL: # %bb.0: 4454 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4455 ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00] 4456 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4457 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4458 ; 4459 ; SKYLAKE-LABEL: test_pmovzxbd: 4460 ; SKYLAKE: # %bb.0: 4461 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4462 ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 4463 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4464 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4465 ; 4466 ; SKX-LABEL: test_pmovzxbd: 4467 ; SKX: # %bb.0: 4468 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00] 4469 ; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00] 4470 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4471 ; SKX-NEXT: retq # sched: [7:1.00] 4472 ; 4473 ; ZNVER1-LABEL: test_pmovzxbd: 4474 ; ZNVER1: # %bb.0: 4475 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50] 4476 ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.50] 4477 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4478 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4479 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4480 %2 = zext <8 x i8> %1 to <8 x i32> 4481 %3 = load <16 x i8>, <16 x i8> *%a1, align 16 4482 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4483 %5 = zext <8 x i8> %4 to <8 x i32> 4484 %6 = add <8 x i32> %2, %5 4485 ret <8 x i32> %6 4486 } 4487 4488 define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) { 4489 ; GENERIC-LABEL: test_pmovzxbq: 4490 ; GENERIC: # %bb.0: 4491 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] 4492 ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00] 4493 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4494 ; GENERIC-NEXT: retq # sched: [1:1.00] 4495 ; 4496 ; HASWELL-LABEL: test_pmovzxbq: 4497 ; HASWELL: # %bb.0: 4498 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4499 ; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 4500 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4501 ; HASWELL-NEXT: retq # sched: [7:1.00] 4502 ; 4503 ; BROADWELL-LABEL: test_pmovzxbq: 4504 ; BROADWELL: # %bb.0: 4505 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4506 ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] 4507 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4508 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4509 ; 4510 ; SKYLAKE-LABEL: test_pmovzxbq: 4511 ; SKYLAKE: # %bb.0: 4512 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4513 ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 4514 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4515 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4516 ; 4517 ; SKX-LABEL: test_pmovzxbq: 4518 ; SKX: # %bb.0: 4519 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00] 4520 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] 4521 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4522 ; SKX-NEXT: retq # sched: [7:1.00] 4523 ; 4524 ; ZNVER1-LABEL: test_pmovzxbq: 4525 ; ZNVER1: # %bb.0: 4526 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] 4527 ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] 4528 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4529 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4530 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4531 %2 = zext <4 x i8> %1 to <4 x i64> 4532 %3 = load <16 x i8>, <16 x i8> *%a1, align 16 4533 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4534 %5 = zext <4 x i8> %4 to <4 x i64> 4535 %6 = add <4 x i64> %2, %5 4536 ret <4 x i64> %6 4537 } 4538 4539 define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) { 4540 ; GENERIC-LABEL: test_pmovzxbw: 4541 ; GENERIC: # %bb.0: 4542 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] 4543 ; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00] 4544 ; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4545 ; GENERIC-NEXT: retq # sched: [1:1.00] 4546 ; 4547 ; HASWELL-LABEL: test_pmovzxbw: 4548 ; HASWELL: # %bb.0: 4549 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4550 ; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 4551 ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4552 ; HASWELL-NEXT: retq # sched: [7:1.00] 4553 ; 4554 ; BROADWELL-LABEL: test_pmovzxbw: 4555 ; BROADWELL: # %bb.0: 4556 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4557 ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00] 4558 ; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4559 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4560 ; 4561 ; SKYLAKE-LABEL: test_pmovzxbw: 4562 ; SKYLAKE: # %bb.0: 4563 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4564 ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 4565 ; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4566 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4567 ; 4568 ; SKX-LABEL: test_pmovzxbw: 4569 ; SKX: # %bb.0: 4570 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00] 4571 ; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00] 4572 ; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4573 ; SKX-NEXT: retq # sched: [7:1.00] 4574 ; 4575 ; ZNVER1-LABEL: test_pmovzxbw: 4576 ; ZNVER1: # %bb.0: 4577 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50] 4578 ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50] 4579 ; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4580 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4581 %1 = zext <16 x i8> %a0 to <16 x i16> 4582 %2 = load <16 x i8>, <16 x i8> *%a1, align 16 4583 %3 = zext <16 x i8> %2 to <16 x i16> 4584 %4 = add <16 x i16> %1, %3 4585 ret <16 x i16> %4 4586 } 4587 4588 define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) { 4589 ; GENERIC-LABEL: test_pmovzxdq: 4590 ; GENERIC: # %bb.0: 4591 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] 4592 ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00] 4593 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4594 ; GENERIC-NEXT: retq # sched: [1:1.00] 4595 ; 4596 ; HASWELL-LABEL: test_pmovzxdq: 4597 ; HASWELL: # %bb.0: 4598 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4599 ; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] 4600 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4601 ; HASWELL-NEXT: retq # sched: [7:1.00] 4602 ; 4603 ; BROADWELL-LABEL: test_pmovzxdq: 4604 ; BROADWELL: # %bb.0: 4605 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4606 ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] 4607 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4608 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4609 ; 4610 ; SKYLAKE-LABEL: test_pmovzxdq: 4611 ; SKYLAKE: # %bb.0: 4612 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4613 ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] 4614 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4615 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4616 ; 4617 ; SKX-LABEL: test_pmovzxdq: 4618 ; SKX: # %bb.0: 4619 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] 4620 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] 4621 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4622 ; SKX-NEXT: retq # sched: [7:1.00] 4623 ; 4624 ; ZNVER1-LABEL: test_pmovzxdq: 4625 ; ZNVER1: # %bb.0: 4626 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] 4627 ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] 4628 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4629 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4630 %1 = zext <4 x i32> %a0 to <4 x i64> 4631 %2 = load <4 x i32>, <4 x i32> *%a1, align 16 4632 %3 = zext <4 x i32> %2 to <4 x i64> 4633 %4 = add <4 x i64> %1, %3 4634 ret <4 x i64> %4 4635 } 4636 4637 define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) { 4638 ; GENERIC-LABEL: test_pmovzxwd: 4639 ; GENERIC: # %bb.0: 4640 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] 4641 ; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 4642 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4643 ; GENERIC-NEXT: retq # sched: [1:1.00] 4644 ; 4645 ; HASWELL-LABEL: test_pmovzxwd: 4646 ; HASWELL: # %bb.0: 4647 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4648 ; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] 4649 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4650 ; HASWELL-NEXT: retq # sched: [7:1.00] 4651 ; 4652 ; BROADWELL-LABEL: test_pmovzxwd: 4653 ; BROADWELL: # %bb.0: 4654 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4655 ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00] 4656 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4657 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4658 ; 4659 ; SKYLAKE-LABEL: test_pmovzxwd: 4660 ; SKYLAKE: # %bb.0: 4661 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4662 ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] 4663 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4664 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4665 ; 4666 ; SKX-LABEL: test_pmovzxwd: 4667 ; SKX: # %bb.0: 4668 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00] 4669 ; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00] 4670 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4671 ; SKX-NEXT: retq # sched: [7:1.00] 4672 ; 4673 ; ZNVER1-LABEL: test_pmovzxwd: 4674 ; ZNVER1: # %bb.0: 4675 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] 4676 ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] 4677 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4678 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4679 %1 = zext <8 x i16> %a0 to <8 x i32> 4680 %2 = load <8 x i16>, <8 x i16> *%a1, align 16 4681 %3 = zext <8 x i16> %2 to <8 x i32> 4682 %4 = add <8 x i32> %1, %3 4683 ret <8 x i32> %4 4684 } 4685 4686 define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) { 4687 ; GENERIC-LABEL: test_pmovzxwq: 4688 ; GENERIC: # %bb.0: 4689 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] 4690 ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00] 4691 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4692 ; GENERIC-NEXT: retq # sched: [1:1.00] 4693 ; 4694 ; HASWELL-LABEL: test_pmovzxwq: 4695 ; HASWELL: # %bb.0: 4696 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4697 ; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] 4698 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4699 ; HASWELL-NEXT: retq # sched: [7:1.00] 4700 ; 4701 ; BROADWELL-LABEL: test_pmovzxwq: 4702 ; BROADWELL: # %bb.0: 4703 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4704 ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] 4705 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 4706 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4707 ; 4708 ; SKYLAKE-LABEL: test_pmovzxwq: 4709 ; SKYLAKE: # %bb.0: 4710 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4711 ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] 4712 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4713 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4714 ; 4715 ; SKX-LABEL: test_pmovzxwq: 4716 ; SKX: # %bb.0: 4717 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00] 4718 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] 4719 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 4720 ; SKX-NEXT: retq # sched: [7:1.00] 4721 ; 4722 ; ZNVER1-LABEL: test_pmovzxwq: 4723 ; ZNVER1: # %bb.0: 4724 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] 4725 ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] 4726 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 4727 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4728 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4729 %2 = zext <4 x i16> %1 to <4 x i64> 4730 %3 = load <8 x i16>, <8 x i16> *%a1, align 16 4731 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4732 %5 = zext <4 x i16> %4 to <4 x i64> 4733 %6 = add <4 x i64> %2, %5 4734 ret <4 x i64> %6 4735 } 4736 4737 define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 4738 ; GENERIC-LABEL: test_pmuldq: 4739 ; GENERIC: # %bb.0: 4740 ; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4741 ; GENERIC-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4742 ; GENERIC-NEXT: retq # sched: [1:1.00] 4743 ; 4744 ; HASWELL-LABEL: test_pmuldq: 4745 ; HASWELL: # %bb.0: 4746 ; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4747 ; HASWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4748 ; HASWELL-NEXT: retq # sched: [7:1.00] 4749 ; 4750 ; BROADWELL-LABEL: test_pmuldq: 4751 ; BROADWELL: # %bb.0: 4752 ; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4753 ; BROADWELL-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4754 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4755 ; 4756 ; SKYLAKE-LABEL: test_pmuldq: 4757 ; SKYLAKE: # %bb.0: 4758 ; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4759 ; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4760 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4761 ; 4762 ; SKX-LABEL: test_pmuldq: 4763 ; SKX: # %bb.0: 4764 ; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4765 ; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4766 ; SKX-NEXT: retq # sched: [7:1.00] 4767 ; 4768 ; ZNVER1-LABEL: test_pmuldq: 4769 ; ZNVER1: # %bb.0: 4770 ; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4771 ; ZNVER1-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4772 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4773 %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) 4774 %2 = bitcast <4 x i64> %1 to <8 x i32> 4775 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 4776 %4 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %2, <8 x i32> %3) 4777 ret <4 x i64> %4 4778 } 4779 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone 4780 4781 define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4782 ; GENERIC-LABEL: test_pmulhrsw: 4783 ; GENERIC: # %bb.0: 4784 ; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4785 ; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4786 ; GENERIC-NEXT: retq # sched: [1:1.00] 4787 ; 4788 ; HASWELL-LABEL: test_pmulhrsw: 4789 ; HASWELL: # %bb.0: 4790 ; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4791 ; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4792 ; HASWELL-NEXT: retq # sched: [7:1.00] 4793 ; 4794 ; BROADWELL-LABEL: test_pmulhrsw: 4795 ; BROADWELL: # %bb.0: 4796 ; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4797 ; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4798 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4799 ; 4800 ; SKYLAKE-LABEL: test_pmulhrsw: 4801 ; SKYLAKE: # %bb.0: 4802 ; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4803 ; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4804 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4805 ; 4806 ; SKX-LABEL: test_pmulhrsw: 4807 ; SKX: # %bb.0: 4808 ; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4809 ; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4810 ; SKX-NEXT: retq # sched: [7:1.00] 4811 ; 4812 ; ZNVER1-LABEL: test_pmulhrsw: 4813 ; ZNVER1: # %bb.0: 4814 ; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4815 ; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4816 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4817 %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) 4818 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4819 %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2) 4820 ret <16 x i16> %3 4821 } 4822 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone 4823 4824 define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4825 ; GENERIC-LABEL: test_pmulhuw: 4826 ; GENERIC: # %bb.0: 4827 ; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4828 ; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4829 ; GENERIC-NEXT: retq # sched: [1:1.00] 4830 ; 4831 ; HASWELL-LABEL: test_pmulhuw: 4832 ; HASWELL: # %bb.0: 4833 ; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4834 ; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4835 ; HASWELL-NEXT: retq # sched: [7:1.00] 4836 ; 4837 ; BROADWELL-LABEL: test_pmulhuw: 4838 ; BROADWELL: # %bb.0: 4839 ; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4840 ; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4841 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4842 ; 4843 ; SKYLAKE-LABEL: test_pmulhuw: 4844 ; SKYLAKE: # %bb.0: 4845 ; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4846 ; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4847 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4848 ; 4849 ; SKX-LABEL: test_pmulhuw: 4850 ; SKX: # %bb.0: 4851 ; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4852 ; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4853 ; SKX-NEXT: retq # sched: [7:1.00] 4854 ; 4855 ; ZNVER1-LABEL: test_pmulhuw: 4856 ; ZNVER1: # %bb.0: 4857 ; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4858 ; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4859 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4860 %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) 4861 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4862 %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2) 4863 ret <16 x i16> %3 4864 } 4865 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone 4866 4867 define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4868 ; GENERIC-LABEL: test_pmulhw: 4869 ; GENERIC: # %bb.0: 4870 ; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4871 ; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4872 ; GENERIC-NEXT: retq # sched: [1:1.00] 4873 ; 4874 ; HASWELL-LABEL: test_pmulhw: 4875 ; HASWELL: # %bb.0: 4876 ; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4877 ; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4878 ; HASWELL-NEXT: retq # sched: [7:1.00] 4879 ; 4880 ; BROADWELL-LABEL: test_pmulhw: 4881 ; BROADWELL: # %bb.0: 4882 ; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4883 ; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4884 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4885 ; 4886 ; SKYLAKE-LABEL: test_pmulhw: 4887 ; SKYLAKE: # %bb.0: 4888 ; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4889 ; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4890 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4891 ; 4892 ; SKX-LABEL: test_pmulhw: 4893 ; SKX: # %bb.0: 4894 ; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4895 ; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4896 ; SKX-NEXT: retq # sched: [7:1.00] 4897 ; 4898 ; ZNVER1-LABEL: test_pmulhw: 4899 ; ZNVER1: # %bb.0: 4900 ; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4901 ; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4902 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4903 %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) 4904 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4905 %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2) 4906 ret <16 x i16> %3 4907 } 4908 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone 4909 4910 define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 4911 ; GENERIC-LABEL: test_pmulld: 4912 ; GENERIC: # %bb.0: 4913 ; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4914 ; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4915 ; GENERIC-NEXT: retq # sched: [1:1.00] 4916 ; 4917 ; HASWELL-LABEL: test_pmulld: 4918 ; HASWELL: # %bb.0: 4919 ; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] 4920 ; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:2.00] 4921 ; HASWELL-NEXT: retq # sched: [7:1.00] 4922 ; 4923 ; BROADWELL-LABEL: test_pmulld: 4924 ; BROADWELL: # %bb.0: 4925 ; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] 4926 ; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00] 4927 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4928 ; 4929 ; SKYLAKE-LABEL: test_pmulld: 4930 ; SKYLAKE: # %bb.0: 4931 ; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00] 4932 ; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00] 4933 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4934 ; 4935 ; SKX-LABEL: test_pmulld: 4936 ; SKX: # %bb.0: 4937 ; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00] 4938 ; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00] 4939 ; SKX-NEXT: retq # sched: [7:1.00] 4940 ; 4941 ; ZNVER1-LABEL: test_pmulld: 4942 ; ZNVER1: # %bb.0: 4943 ; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00] 4944 ; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00] 4945 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4946 %1 = mul <8 x i32> %a0, %a1 4947 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 4948 %3 = mul <8 x i32> %1, %2 4949 ret <8 x i32> %3 4950 } 4951 4952 define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 4953 ; GENERIC-LABEL: test_pmullw: 4954 ; GENERIC: # %bb.0: 4955 ; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4956 ; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4957 ; GENERIC-NEXT: retq # sched: [1:1.00] 4958 ; 4959 ; HASWELL-LABEL: test_pmullw: 4960 ; HASWELL: # %bb.0: 4961 ; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4962 ; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4963 ; HASWELL-NEXT: retq # sched: [7:1.00] 4964 ; 4965 ; BROADWELL-LABEL: test_pmullw: 4966 ; BROADWELL: # %bb.0: 4967 ; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4968 ; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4969 ; BROADWELL-NEXT: retq # sched: [7:1.00] 4970 ; 4971 ; SKYLAKE-LABEL: test_pmullw: 4972 ; SKYLAKE: # %bb.0: 4973 ; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4974 ; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4975 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 4976 ; 4977 ; SKX-LABEL: test_pmullw: 4978 ; SKX: # %bb.0: 4979 ; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 4980 ; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 4981 ; SKX-NEXT: retq # sched: [7:1.00] 4982 ; 4983 ; ZNVER1-LABEL: test_pmullw: 4984 ; ZNVER1: # %bb.0: 4985 ; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 4986 ; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 4987 ; ZNVER1-NEXT: retq # sched: [1:0.50] 4988 %1 = mul <16 x i16> %a0, %a1 4989 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 4990 %3 = mul <16 x i16> %1, %2 4991 ret <16 x i16> %3 4992 } 4993 4994 define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 4995 ; GENERIC-LABEL: test_pmuludq: 4996 ; GENERIC: # %bb.0: 4997 ; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 4998 ; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 4999 ; GENERIC-NEXT: retq # sched: [1:1.00] 5000 ; 5001 ; HASWELL-LABEL: test_pmuludq: 5002 ; HASWELL: # %bb.0: 5003 ; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5004 ; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 5005 ; HASWELL-NEXT: retq # sched: [7:1.00] 5006 ; 5007 ; BROADWELL-LABEL: test_pmuludq: 5008 ; BROADWELL: # %bb.0: 5009 ; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5010 ; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5011 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5012 ; 5013 ; SKYLAKE-LABEL: test_pmuludq: 5014 ; SKYLAKE: # %bb.0: 5015 ; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 5016 ; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 5017 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5018 ; 5019 ; SKX-LABEL: test_pmuludq: 5020 ; SKX: # %bb.0: 5021 ; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] 5022 ; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] 5023 ; SKX-NEXT: retq # sched: [7:1.00] 5024 ; 5025 ; ZNVER1-LABEL: test_pmuludq: 5026 ; ZNVER1: # %bb.0: 5027 ; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00] 5028 ; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5029 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5030 %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) 5031 %2 = bitcast <4 x i64> %1 to <8 x i32> 5032 %3 = load <8 x i32>, <8 x i32> *%a2, align 32 5033 %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3) 5034 ret <4 x i64> %4 5035 } 5036 declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone 5037 5038 define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 5039 ; GENERIC-LABEL: test_por: 5040 ; GENERIC: # %bb.0: 5041 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5042 ; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5043 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5044 ; GENERIC-NEXT: retq # sched: [1:1.00] 5045 ; 5046 ; HASWELL-LABEL: test_por: 5047 ; HASWELL: # %bb.0: 5048 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5049 ; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5050 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5051 ; HASWELL-NEXT: retq # sched: [7:1.00] 5052 ; 5053 ; BROADWELL-LABEL: test_por: 5054 ; BROADWELL: # %bb.0: 5055 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5056 ; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 5057 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5058 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5059 ; 5060 ; SKYLAKE-LABEL: test_por: 5061 ; SKYLAKE: # %bb.0: 5062 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5063 ; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5064 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5065 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5066 ; 5067 ; SKX-LABEL: test_por: 5068 ; SKX: # %bb.0: 5069 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5070 ; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5071 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5072 ; SKX-NEXT: retq # sched: [7:1.00] 5073 ; 5074 ; ZNVER1-LABEL: test_por: 5075 ; ZNVER1: # %bb.0: 5076 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5077 ; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5078 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5079 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5080 %1 = or <4 x i64> %a0, %a1 5081 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 5082 %3 = or <4 x i64> %1, %2 5083 %4 = add <4 x i64> %3, %a1 5084 ret <4 x i64> %4 5085 } 5086 5087 define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 5088 ; GENERIC-LABEL: test_psadbw: 5089 ; GENERIC: # %bb.0: 5090 ; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5091 ; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 5092 ; GENERIC-NEXT: retq # sched: [1:1.00] 5093 ; 5094 ; HASWELL-LABEL: test_psadbw: 5095 ; HASWELL: # %bb.0: 5096 ; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5097 ; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00] 5098 ; HASWELL-NEXT: retq # sched: [7:1.00] 5099 ; 5100 ; BROADWELL-LABEL: test_psadbw: 5101 ; BROADWELL: # %bb.0: 5102 ; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] 5103 ; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5104 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5105 ; 5106 ; SKYLAKE-LABEL: test_psadbw: 5107 ; SKYLAKE: # %bb.0: 5108 ; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5109 ; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 5110 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5111 ; 5112 ; SKX-LABEL: test_psadbw: 5113 ; SKX: # %bb.0: 5114 ; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5115 ; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 5116 ; SKX-NEXT: retq # sched: [7:1.00] 5117 ; 5118 ; ZNVER1-LABEL: test_psadbw: 5119 ; ZNVER1: # %bb.0: 5120 ; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] 5121 ; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00] 5122 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5123 %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) 5124 %2 = bitcast <4 x i64> %1 to <32 x i8> 5125 %3 = load <32 x i8>, <32 x i8> *%a2, align 32 5126 %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3) 5127 ret <4 x i64> %4 5128 } 5129 declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone 5130 5131 define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 5132 ; GENERIC-LABEL: test_pshufb: 5133 ; GENERIC: # %bb.0: 5134 ; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5135 ; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5136 ; GENERIC-NEXT: retq # sched: [1:1.00] 5137 ; 5138 ; HASWELL-LABEL: test_pshufb: 5139 ; HASWELL: # %bb.0: 5140 ; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5141 ; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5142 ; HASWELL-NEXT: retq # sched: [7:1.00] 5143 ; 5144 ; BROADWELL-LABEL: test_pshufb: 5145 ; BROADWELL: # %bb.0: 5146 ; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5147 ; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5148 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5149 ; 5150 ; SKYLAKE-LABEL: test_pshufb: 5151 ; SKYLAKE: # %bb.0: 5152 ; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5153 ; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5154 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5155 ; 5156 ; SKX-LABEL: test_pshufb: 5157 ; SKX: # %bb.0: 5158 ; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5159 ; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5160 ; SKX-NEXT: retq # sched: [7:1.00] 5161 ; 5162 ; ZNVER1-LABEL: test_pshufb: 5163 ; ZNVER1: # %bb.0: 5164 ; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5165 ; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5166 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5167 %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) 5168 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 5169 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2) 5170 ret <32 x i8> %3 5171 } 5172 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone 5173 5174 define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) { 5175 ; GENERIC-LABEL: test_pshufd: 5176 ; GENERIC: # %bb.0: 5177 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5178 ; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] 5179 ; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5180 ; GENERIC-NEXT: retq # sched: [1:1.00] 5181 ; 5182 ; HASWELL-LABEL: test_pshufd: 5183 ; HASWELL: # %bb.0: 5184 ; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5185 ; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] 5186 ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5187 ; HASWELL-NEXT: retq # sched: [7:1.00] 5188 ; 5189 ; BROADWELL-LABEL: test_pshufd: 5190 ; BROADWELL: # %bb.0: 5191 ; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5192 ; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00] 5193 ; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5194 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5195 ; 5196 ; SKYLAKE-LABEL: test_pshufd: 5197 ; SKYLAKE: # %bb.0: 5198 ; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5199 ; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] 5200 ; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5201 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5202 ; 5203 ; SKX-LABEL: test_pshufd: 5204 ; SKX: # %bb.0: 5205 ; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] 5206 ; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00] 5207 ; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5208 ; SKX-NEXT: retq # sched: [7:1.00] 5209 ; 5210 ; ZNVER1-LABEL: test_pshufd: 5211 ; ZNVER1: # %bb.0: 5212 ; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50] 5213 ; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25] 5214 ; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5215 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5216 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 5217 %2 = load <8 x i32>, <8 x i32> *%a1, align 32 5218 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 5219 %4 = add <8 x i32> %1, %3 5220 ret <8 x i32> %4 5221 } 5222 5223 define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) { 5224 ; GENERIC-LABEL: test_pshufhw: 5225 ; GENERIC: # %bb.0: 5226 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5227 ; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] 5228 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5229 ; GENERIC-NEXT: retq # sched: [1:1.00] 5230 ; 5231 ; HASWELL-LABEL: test_pshufhw: 5232 ; HASWELL: # %bb.0: 5233 ; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5234 ; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] 5235 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5236 ; HASWELL-NEXT: retq # sched: [7:1.00] 5237 ; 5238 ; BROADWELL-LABEL: test_pshufhw: 5239 ; BROADWELL: # %bb.0: 5240 ; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5241 ; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00] 5242 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5243 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5244 ; 5245 ; SKYLAKE-LABEL: test_pshufhw: 5246 ; SKYLAKE: # %bb.0: 5247 ; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5248 ; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] 5249 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5250 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5251 ; 5252 ; SKX-LABEL: test_pshufhw: 5253 ; SKX: # %bb.0: 5254 ; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00] 5255 ; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00] 5256 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5257 ; SKX-NEXT: retq # sched: [7:1.00] 5258 ; 5259 ; ZNVER1-LABEL: test_pshufhw: 5260 ; ZNVER1: # %bb.0: 5261 ; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50] 5262 ; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25] 5263 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5264 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5265 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12> 5266 %2 = load <16 x i16>, <16 x i16> *%a1, align 32 5267 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14> 5268 %4 = or <16 x i16> %1, %3 5269 ret <16 x i16> %4 5270 } 5271 5272 define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) { 5273 ; GENERIC-LABEL: test_pshuflw: 5274 ; GENERIC: # %bb.0: 5275 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5276 ; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] 5277 ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5278 ; GENERIC-NEXT: retq # sched: [1:1.00] 5279 ; 5280 ; HASWELL-LABEL: test_pshuflw: 5281 ; HASWELL: # %bb.0: 5282 ; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5283 ; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] 5284 ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5285 ; HASWELL-NEXT: retq # sched: [7:1.00] 5286 ; 5287 ; BROADWELL-LABEL: test_pshuflw: 5288 ; BROADWELL: # %bb.0: 5289 ; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5290 ; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00] 5291 ; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5292 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5293 ; 5294 ; SKYLAKE-LABEL: test_pshuflw: 5295 ; SKYLAKE: # %bb.0: 5296 ; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5297 ; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] 5298 ; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5299 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5300 ; 5301 ; SKX-LABEL: test_pshuflw: 5302 ; SKX: # %bb.0: 5303 ; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00] 5304 ; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00] 5305 ; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 5306 ; SKX-NEXT: retq # sched: [7:1.00] 5307 ; 5308 ; ZNVER1-LABEL: test_pshuflw: 5309 ; ZNVER1: # %bb.0: 5310 ; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50] 5311 ; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25] 5312 ; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5313 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5314 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15> 5315 %2 = load <16 x i16>, <16 x i16> *%a1, align 32 5316 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15> 5317 %4 = or <16 x i16> %1, %3 5318 ret <16 x i16> %4 5319 } 5320 5321 define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 5322 ; GENERIC-LABEL: test_psignb: 5323 ; GENERIC: # %bb.0: 5324 ; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5325 ; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5326 ; GENERIC-NEXT: retq # sched: [1:1.00] 5327 ; 5328 ; HASWELL-LABEL: test_psignb: 5329 ; HASWELL: # %bb.0: 5330 ; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5331 ; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5332 ; HASWELL-NEXT: retq # sched: [7:1.00] 5333 ; 5334 ; BROADWELL-LABEL: test_psignb: 5335 ; BROADWELL: # %bb.0: 5336 ; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5337 ; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 5338 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5339 ; 5340 ; SKYLAKE-LABEL: test_psignb: 5341 ; SKYLAKE: # %bb.0: 5342 ; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5343 ; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5344 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5345 ; 5346 ; SKX-LABEL: test_psignb: 5347 ; SKX: # %bb.0: 5348 ; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5349 ; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5350 ; SKX-NEXT: retq # sched: [7:1.00] 5351 ; 5352 ; ZNVER1-LABEL: test_psignb: 5353 ; ZNVER1: # %bb.0: 5354 ; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5355 ; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5356 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5357 %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) 5358 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 5359 %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2) 5360 ret <32 x i8> %3 5361 } 5362 declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone 5363 5364 define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 5365 ; GENERIC-LABEL: test_psignd: 5366 ; GENERIC: # %bb.0: 5367 ; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5368 ; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5369 ; GENERIC-NEXT: retq # sched: [1:1.00] 5370 ; 5371 ; HASWELL-LABEL: test_psignd: 5372 ; HASWELL: # %bb.0: 5373 ; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5374 ; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5375 ; HASWELL-NEXT: retq # sched: [7:1.00] 5376 ; 5377 ; BROADWELL-LABEL: test_psignd: 5378 ; BROADWELL: # %bb.0: 5379 ; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5380 ; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 5381 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5382 ; 5383 ; SKYLAKE-LABEL: test_psignd: 5384 ; SKYLAKE: # %bb.0: 5385 ; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5386 ; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5387 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5388 ; 5389 ; SKX-LABEL: test_psignd: 5390 ; SKX: # %bb.0: 5391 ; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5392 ; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5393 ; SKX-NEXT: retq # sched: [7:1.00] 5394 ; 5395 ; ZNVER1-LABEL: test_psignd: 5396 ; ZNVER1: # %bb.0: 5397 ; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5398 ; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5399 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5400 %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) 5401 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 5402 %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2) 5403 ret <8 x i32> %3 5404 } 5405 declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone 5406 5407 define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 5408 ; GENERIC-LABEL: test_psignw: 5409 ; GENERIC: # %bb.0: 5410 ; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5411 ; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5412 ; GENERIC-NEXT: retq # sched: [1:1.00] 5413 ; 5414 ; HASWELL-LABEL: test_psignw: 5415 ; HASWELL: # %bb.0: 5416 ; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5417 ; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5418 ; HASWELL-NEXT: retq # sched: [7:1.00] 5419 ; 5420 ; BROADWELL-LABEL: test_psignw: 5421 ; BROADWELL: # %bb.0: 5422 ; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5423 ; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 5424 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5425 ; 5426 ; SKYLAKE-LABEL: test_psignw: 5427 ; SKYLAKE: # %bb.0: 5428 ; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5429 ; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5430 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5431 ; 5432 ; SKX-LABEL: test_psignw: 5433 ; SKX: # %bb.0: 5434 ; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5435 ; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5436 ; SKX-NEXT: retq # sched: [7:1.00] 5437 ; 5438 ; ZNVER1-LABEL: test_psignw: 5439 ; ZNVER1: # %bb.0: 5440 ; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 5441 ; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5442 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5443 %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) 5444 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 5445 %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2) 5446 ret <16 x i16> %3 5447 } 5448 declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone 5449 5450 define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5451 ; GENERIC-LABEL: test_pslld: 5452 ; GENERIC: # %bb.0: 5453 ; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5454 ; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5455 ; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] 5456 ; GENERIC-NEXT: retq # sched: [1:1.00] 5457 ; 5458 ; HASWELL-LABEL: test_pslld: 5459 ; HASWELL: # %bb.0: 5460 ; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5461 ; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5462 ; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] 5463 ; HASWELL-NEXT: retq # sched: [7:1.00] 5464 ; 5465 ; BROADWELL-LABEL: test_pslld: 5466 ; BROADWELL: # %bb.0: 5467 ; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5468 ; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5469 ; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00] 5470 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5471 ; 5472 ; SKYLAKE-LABEL: test_pslld: 5473 ; SKYLAKE: # %bb.0: 5474 ; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5475 ; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5476 ; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50] 5477 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5478 ; 5479 ; SKX-LABEL: test_pslld: 5480 ; SKX: # %bb.0: 5481 ; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5482 ; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5483 ; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50] 5484 ; SKX-NEXT: retq # sched: [7:1.00] 5485 ; 5486 ; ZNVER1-LABEL: test_pslld: 5487 ; ZNVER1: # %bb.0: 5488 ; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5489 ; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5490 ; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25] 5491 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5492 %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) 5493 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 5494 %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2) 5495 %4 = shl <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 5496 ret <8 x i32> %4 5497 } 5498 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone 5499 5500 define <32 x i8> @test_pslldq(<32 x i8> %a0) { 5501 ; GENERIC-LABEL: test_pslldq: 5502 ; GENERIC: # %bb.0: 5503 ; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5504 ; GENERIC-NEXT: retq # sched: [1:1.00] 5505 ; 5506 ; HASWELL-LABEL: test_pslldq: 5507 ; HASWELL: # %bb.0: 5508 ; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5509 ; HASWELL-NEXT: retq # sched: [7:1.00] 5510 ; 5511 ; BROADWELL-LABEL: test_pslldq: 5512 ; BROADWELL: # %bb.0: 5513 ; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5514 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5515 ; 5516 ; SKYLAKE-LABEL: test_pslldq: 5517 ; SKYLAKE: # %bb.0: 5518 ; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5519 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5520 ; 5521 ; SKX-LABEL: test_pslldq: 5522 ; SKX: # %bb.0: 5523 ; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00] 5524 ; SKX-NEXT: retq # sched: [7:1.00] 5525 ; 5526 ; ZNVER1-LABEL: test_pslldq: 5527 ; ZNVER1: # %bb.0: 5528 ; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00] 5529 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5530 %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60> 5531 ret <32 x i8> %1 5532 } 5533 5534 define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 5535 ; GENERIC-LABEL: test_psllq: 5536 ; GENERIC: # %bb.0: 5537 ; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5538 ; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5539 ; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] 5540 ; GENERIC-NEXT: retq # sched: [1:1.00] 5541 ; 5542 ; HASWELL-LABEL: test_psllq: 5543 ; HASWELL: # %bb.0: 5544 ; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5545 ; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5546 ; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] 5547 ; HASWELL-NEXT: retq # sched: [7:1.00] 5548 ; 5549 ; BROADWELL-LABEL: test_psllq: 5550 ; BROADWELL: # %bb.0: 5551 ; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5552 ; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5553 ; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00] 5554 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5555 ; 5556 ; SKYLAKE-LABEL: test_psllq: 5557 ; SKYLAKE: # %bb.0: 5558 ; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5559 ; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5560 ; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50] 5561 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5562 ; 5563 ; SKX-LABEL: test_psllq: 5564 ; SKX: # %bb.0: 5565 ; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5566 ; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5567 ; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50] 5568 ; SKX-NEXT: retq # sched: [7:1.00] 5569 ; 5570 ; ZNVER1-LABEL: test_psllq: 5571 ; ZNVER1: # %bb.0: 5572 ; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5573 ; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5574 ; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25] 5575 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5576 %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) 5577 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 5578 %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2) 5579 %4 = shl <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2> 5580 ret <4 x i64> %4 5581 } 5582 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone 5583 5584 define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5585 ; GENERIC-LABEL: test_psllvd: 5586 ; GENERIC: # %bb.0: 5587 ; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5588 ; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 5589 ; GENERIC-NEXT: retq # sched: [1:1.00] 5590 ; 5591 ; HASWELL-LABEL: test_psllvd: 5592 ; HASWELL: # %bb.0: 5593 ; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 5594 ; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 5595 ; HASWELL-NEXT: retq # sched: [7:1.00] 5596 ; 5597 ; BROADWELL-LABEL: test_psllvd: 5598 ; BROADWELL: # %bb.0: 5599 ; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 5600 ; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 5601 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5602 ; 5603 ; SKYLAKE-LABEL: test_psllvd: 5604 ; SKYLAKE: # %bb.0: 5605 ; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5606 ; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5607 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5608 ; 5609 ; SKX-LABEL: test_psllvd: 5610 ; SKX: # %bb.0: 5611 ; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5612 ; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5613 ; SKX-NEXT: retq # sched: [7:1.00] 5614 ; 5615 ; ZNVER1-LABEL: test_psllvd: 5616 ; ZNVER1: # %bb.0: 5617 ; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5618 ; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 5619 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5620 %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) 5621 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 5622 %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2) 5623 ret <4 x i32> %3 5624 } 5625 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone 5626 5627 define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 5628 ; GENERIC-LABEL: test_psllvd_ymm: 5629 ; GENERIC: # %bb.0: 5630 ; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5631 ; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5632 ; GENERIC-NEXT: retq # sched: [1:1.00] 5633 ; 5634 ; HASWELL-LABEL: test_psllvd_ymm: 5635 ; HASWELL: # %bb.0: 5636 ; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5637 ; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 5638 ; HASWELL-NEXT: retq # sched: [7:1.00] 5639 ; 5640 ; BROADWELL-LABEL: test_psllvd_ymm: 5641 ; BROADWELL: # %bb.0: 5642 ; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5643 ; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 5644 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5645 ; 5646 ; SKYLAKE-LABEL: test_psllvd_ymm: 5647 ; SKYLAKE: # %bb.0: 5648 ; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5649 ; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5650 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5651 ; 5652 ; SKX-LABEL: test_psllvd_ymm: 5653 ; SKX: # %bb.0: 5654 ; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5655 ; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5656 ; SKX-NEXT: retq # sched: [7:1.00] 5657 ; 5658 ; ZNVER1-LABEL: test_psllvd_ymm: 5659 ; ZNVER1: # %bb.0: 5660 ; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5661 ; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5662 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5663 %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) 5664 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 5665 %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2) 5666 ret <8 x i32> %3 5667 } 5668 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 5669 5670 define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 5671 ; GENERIC-LABEL: test_psllvq: 5672 ; GENERIC: # %bb.0: 5673 ; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5674 ; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 5675 ; GENERIC-NEXT: retq # sched: [1:1.00] 5676 ; 5677 ; HASWELL-LABEL: test_psllvq: 5678 ; HASWELL: # %bb.0: 5679 ; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5680 ; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 5681 ; HASWELL-NEXT: retq # sched: [7:1.00] 5682 ; 5683 ; BROADWELL-LABEL: test_psllvq: 5684 ; BROADWELL: # %bb.0: 5685 ; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5686 ; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 5687 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5688 ; 5689 ; SKYLAKE-LABEL: test_psllvq: 5690 ; SKYLAKE: # %bb.0: 5691 ; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5692 ; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5693 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5694 ; 5695 ; SKX-LABEL: test_psllvq: 5696 ; SKX: # %bb.0: 5697 ; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5698 ; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5699 ; SKX-NEXT: retq # sched: [7:1.00] 5700 ; 5701 ; ZNVER1-LABEL: test_psllvq: 5702 ; ZNVER1: # %bb.0: 5703 ; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5704 ; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 5705 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5706 %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) 5707 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 5708 %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2) 5709 ret <2 x i64> %3 5710 } 5711 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone 5712 5713 define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 5714 ; GENERIC-LABEL: test_psllvq_ymm: 5715 ; GENERIC: # %bb.0: 5716 ; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5717 ; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5718 ; GENERIC-NEXT: retq # sched: [1:1.00] 5719 ; 5720 ; HASWELL-LABEL: test_psllvq_ymm: 5721 ; HASWELL: # %bb.0: 5722 ; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5723 ; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5724 ; HASWELL-NEXT: retq # sched: [7:1.00] 5725 ; 5726 ; BROADWELL-LABEL: test_psllvq_ymm: 5727 ; BROADWELL: # %bb.0: 5728 ; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5729 ; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5730 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5731 ; 5732 ; SKYLAKE-LABEL: test_psllvq_ymm: 5733 ; SKYLAKE: # %bb.0: 5734 ; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5735 ; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5736 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5737 ; 5738 ; SKX-LABEL: test_psllvq_ymm: 5739 ; SKX: # %bb.0: 5740 ; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5741 ; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5742 ; SKX-NEXT: retq # sched: [7:1.00] 5743 ; 5744 ; ZNVER1-LABEL: test_psllvq_ymm: 5745 ; ZNVER1: # %bb.0: 5746 ; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5747 ; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5748 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5749 %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) 5750 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 5751 %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2) 5752 ret <4 x i64> %3 5753 } 5754 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 5755 5756 define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 5757 ; GENERIC-LABEL: test_psllw: 5758 ; GENERIC: # %bb.0: 5759 ; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5760 ; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5761 ; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] 5762 ; GENERIC-NEXT: retq # sched: [1:1.00] 5763 ; 5764 ; HASWELL-LABEL: test_psllw: 5765 ; HASWELL: # %bb.0: 5766 ; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5767 ; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5768 ; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] 5769 ; HASWELL-NEXT: retq # sched: [7:1.00] 5770 ; 5771 ; BROADWELL-LABEL: test_psllw: 5772 ; BROADWELL: # %bb.0: 5773 ; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5774 ; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5775 ; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00] 5776 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5777 ; 5778 ; SKYLAKE-LABEL: test_psllw: 5779 ; SKYLAKE: # %bb.0: 5780 ; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5781 ; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5782 ; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50] 5783 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5784 ; 5785 ; SKX-LABEL: test_psllw: 5786 ; SKX: # %bb.0: 5787 ; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5788 ; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5789 ; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50] 5790 ; SKX-NEXT: retq # sched: [7:1.00] 5791 ; 5792 ; ZNVER1-LABEL: test_psllw: 5793 ; ZNVER1: # %bb.0: 5794 ; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5795 ; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5796 ; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25] 5797 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5798 %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) 5799 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 5800 %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2) 5801 %4 = shl <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 5802 ret <16 x i16> %4 5803 } 5804 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone 5805 5806 define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5807 ; GENERIC-LABEL: test_psrad: 5808 ; GENERIC: # %bb.0: 5809 ; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5810 ; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5811 ; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] 5812 ; GENERIC-NEXT: retq # sched: [1:1.00] 5813 ; 5814 ; HASWELL-LABEL: test_psrad: 5815 ; HASWELL: # %bb.0: 5816 ; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5817 ; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5818 ; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] 5819 ; HASWELL-NEXT: retq # sched: [7:1.00] 5820 ; 5821 ; BROADWELL-LABEL: test_psrad: 5822 ; BROADWELL: # %bb.0: 5823 ; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5824 ; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5825 ; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00] 5826 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5827 ; 5828 ; SKYLAKE-LABEL: test_psrad: 5829 ; SKYLAKE: # %bb.0: 5830 ; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5831 ; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5832 ; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50] 5833 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5834 ; 5835 ; SKX-LABEL: test_psrad: 5836 ; SKX: # %bb.0: 5837 ; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5838 ; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5839 ; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50] 5840 ; SKX-NEXT: retq # sched: [7:1.00] 5841 ; 5842 ; ZNVER1-LABEL: test_psrad: 5843 ; ZNVER1: # %bb.0: 5844 ; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5845 ; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5846 ; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25] 5847 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5848 %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) 5849 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 5850 %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2) 5851 %4 = ashr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 5852 ret <8 x i32> %4 5853 } 5854 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone 5855 5856 define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5857 ; GENERIC-LABEL: test_psravd: 5858 ; GENERIC: # %bb.0: 5859 ; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 5860 ; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 5861 ; GENERIC-NEXT: retq # sched: [1:1.00] 5862 ; 5863 ; HASWELL-LABEL: test_psravd: 5864 ; HASWELL: # %bb.0: 5865 ; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 5866 ; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 5867 ; HASWELL-NEXT: retq # sched: [7:1.00] 5868 ; 5869 ; BROADWELL-LABEL: test_psravd: 5870 ; BROADWELL: # %bb.0: 5871 ; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 5872 ; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 5873 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5874 ; 5875 ; SKYLAKE-LABEL: test_psravd: 5876 ; SKYLAKE: # %bb.0: 5877 ; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5878 ; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5879 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5880 ; 5881 ; SKX-LABEL: test_psravd: 5882 ; SKX: # %bb.0: 5883 ; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5884 ; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 5885 ; SKX-NEXT: retq # sched: [7:1.00] 5886 ; 5887 ; ZNVER1-LABEL: test_psravd: 5888 ; ZNVER1: # %bb.0: 5889 ; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 5890 ; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 5891 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5892 %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) 5893 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 5894 %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2) 5895 ret <4 x i32> %3 5896 } 5897 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone 5898 5899 define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 5900 ; GENERIC-LABEL: test_psravd_ymm: 5901 ; GENERIC: # %bb.0: 5902 ; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 5903 ; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5904 ; GENERIC-NEXT: retq # sched: [1:1.00] 5905 ; 5906 ; HASWELL-LABEL: test_psravd_ymm: 5907 ; HASWELL: # %bb.0: 5908 ; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5909 ; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 5910 ; HASWELL-NEXT: retq # sched: [7:1.00] 5911 ; 5912 ; BROADWELL-LABEL: test_psravd_ymm: 5913 ; BROADWELL: # %bb.0: 5914 ; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 5915 ; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 5916 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5917 ; 5918 ; SKYLAKE-LABEL: test_psravd_ymm: 5919 ; SKYLAKE: # %bb.0: 5920 ; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5921 ; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5922 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5923 ; 5924 ; SKX-LABEL: test_psravd_ymm: 5925 ; SKX: # %bb.0: 5926 ; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5927 ; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5928 ; SKX-NEXT: retq # sched: [7:1.00] 5929 ; 5930 ; ZNVER1-LABEL: test_psravd_ymm: 5931 ; ZNVER1: # %bb.0: 5932 ; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 5933 ; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5934 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5935 %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) 5936 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 5937 %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2) 5938 ret <8 x i32> %3 5939 } 5940 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone 5941 5942 define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 5943 ; GENERIC-LABEL: test_psraw: 5944 ; GENERIC: # %bb.0: 5945 ; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5946 ; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5947 ; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] 5948 ; GENERIC-NEXT: retq # sched: [1:1.00] 5949 ; 5950 ; HASWELL-LABEL: test_psraw: 5951 ; HASWELL: # %bb.0: 5952 ; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5953 ; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 5954 ; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] 5955 ; HASWELL-NEXT: retq # sched: [7:1.00] 5956 ; 5957 ; BROADWELL-LABEL: test_psraw: 5958 ; BROADWELL: # %bb.0: 5959 ; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5960 ; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 5961 ; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00] 5962 ; BROADWELL-NEXT: retq # sched: [7:1.00] 5963 ; 5964 ; SKYLAKE-LABEL: test_psraw: 5965 ; SKYLAKE: # %bb.0: 5966 ; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5967 ; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5968 ; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50] 5969 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 5970 ; 5971 ; SKX-LABEL: test_psraw: 5972 ; SKX: # %bb.0: 5973 ; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5974 ; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 5975 ; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50] 5976 ; SKX-NEXT: retq # sched: [7:1.00] 5977 ; 5978 ; ZNVER1-LABEL: test_psraw: 5979 ; ZNVER1: # %bb.0: 5980 ; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 5981 ; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 5982 ; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25] 5983 ; ZNVER1-NEXT: retq # sched: [1:0.50] 5984 %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) 5985 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 5986 %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2) 5987 %4 = ashr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 5988 ret <16 x i16> %4 5989 } 5990 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone 5991 5992 define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 5993 ; GENERIC-LABEL: test_psrld: 5994 ; GENERIC: # %bb.0: 5995 ; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 5996 ; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 5997 ; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] 5998 ; GENERIC-NEXT: retq # sched: [1:1.00] 5999 ; 6000 ; HASWELL-LABEL: test_psrld: 6001 ; HASWELL: # %bb.0: 6002 ; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6003 ; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6004 ; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] 6005 ; HASWELL-NEXT: retq # sched: [7:1.00] 6006 ; 6007 ; BROADWELL-LABEL: test_psrld: 6008 ; BROADWELL: # %bb.0: 6009 ; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6010 ; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 6011 ; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00] 6012 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6013 ; 6014 ; SKYLAKE-LABEL: test_psrld: 6015 ; SKYLAKE: # %bb.0: 6016 ; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6017 ; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6018 ; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50] 6019 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6020 ; 6021 ; SKX-LABEL: test_psrld: 6022 ; SKX: # %bb.0: 6023 ; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6024 ; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6025 ; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50] 6026 ; SKX-NEXT: retq # sched: [7:1.00] 6027 ; 6028 ; ZNVER1-LABEL: test_psrld: 6029 ; ZNVER1: # %bb.0: 6030 ; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 6031 ; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 6032 ; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25] 6033 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6034 %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) 6035 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 6036 %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2) 6037 %4 = lshr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 6038 ret <8 x i32> %4 6039 } 6040 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone 6041 6042 define <32 x i8> @test_psrldq(<32 x i8> %a0) { 6043 ; GENERIC-LABEL: test_psrldq: 6044 ; GENERIC: # %bb.0: 6045 ; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6046 ; GENERIC-NEXT: retq # sched: [1:1.00] 6047 ; 6048 ; HASWELL-LABEL: test_psrldq: 6049 ; HASWELL: # %bb.0: 6050 ; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6051 ; HASWELL-NEXT: retq # sched: [7:1.00] 6052 ; 6053 ; BROADWELL-LABEL: test_psrldq: 6054 ; BROADWELL: # %bb.0: 6055 ; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6056 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6057 ; 6058 ; SKYLAKE-LABEL: test_psrldq: 6059 ; SKYLAKE: # %bb.0: 6060 ; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6061 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6062 ; 6063 ; SKX-LABEL: test_psrldq: 6064 ; SKX: # %bb.0: 6065 ; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00] 6066 ; SKX-NEXT: retq # sched: [7:1.00] 6067 ; 6068 ; ZNVER1-LABEL: test_psrldq: 6069 ; ZNVER1: # %bb.0: 6070 ; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00] 6071 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6072 %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50> 6073 ret <32 x i8> %1 6074 } 6075 6076 define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 6077 ; GENERIC-LABEL: test_psrlq: 6078 ; GENERIC: # %bb.0: 6079 ; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6080 ; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 6081 ; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] 6082 ; GENERIC-NEXT: retq # sched: [1:1.00] 6083 ; 6084 ; HASWELL-LABEL: test_psrlq: 6085 ; HASWELL: # %bb.0: 6086 ; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6087 ; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6088 ; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] 6089 ; HASWELL-NEXT: retq # sched: [7:1.00] 6090 ; 6091 ; BROADWELL-LABEL: test_psrlq: 6092 ; BROADWELL: # %bb.0: 6093 ; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6094 ; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 6095 ; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00] 6096 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6097 ; 6098 ; SKYLAKE-LABEL: test_psrlq: 6099 ; SKYLAKE: # %bb.0: 6100 ; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6101 ; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6102 ; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50] 6103 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6104 ; 6105 ; SKX-LABEL: test_psrlq: 6106 ; SKX: # %bb.0: 6107 ; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6108 ; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6109 ; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50] 6110 ; SKX-NEXT: retq # sched: [7:1.00] 6111 ; 6112 ; ZNVER1-LABEL: test_psrlq: 6113 ; ZNVER1: # %bb.0: 6114 ; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 6115 ; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 6116 ; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25] 6117 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6118 %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) 6119 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 6120 %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2) 6121 %4 = lshr <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2> 6122 ret <4 x i64> %4 6123 } 6124 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone 6125 6126 define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { 6127 ; GENERIC-LABEL: test_psrlvd: 6128 ; GENERIC: # %bb.0: 6129 ; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6130 ; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6131 ; GENERIC-NEXT: retq # sched: [1:1.00] 6132 ; 6133 ; HASWELL-LABEL: test_psrlvd: 6134 ; HASWELL: # %bb.0: 6135 ; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 6136 ; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] 6137 ; HASWELL-NEXT: retq # sched: [7:1.00] 6138 ; 6139 ; BROADWELL-LABEL: test_psrlvd: 6140 ; BROADWELL: # %bb.0: 6141 ; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] 6142 ; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] 6143 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6144 ; 6145 ; SKYLAKE-LABEL: test_psrlvd: 6146 ; SKYLAKE: # %bb.0: 6147 ; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6148 ; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6149 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6150 ; 6151 ; SKX-LABEL: test_psrlvd: 6152 ; SKX: # %bb.0: 6153 ; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6154 ; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6155 ; SKX-NEXT: retq # sched: [7:1.00] 6156 ; 6157 ; ZNVER1-LABEL: test_psrlvd: 6158 ; ZNVER1: # %bb.0: 6159 ; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6160 ; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6161 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6162 %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) 6163 %2 = load <4 x i32>, <4 x i32> *%a2, align 16 6164 %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2) 6165 ret <4 x i32> %3 6166 } 6167 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone 6168 6169 define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 6170 ; GENERIC-LABEL: test_psrlvd_ymm: 6171 ; GENERIC: # %bb.0: 6172 ; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 6173 ; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6174 ; GENERIC-NEXT: retq # sched: [1:1.00] 6175 ; 6176 ; HASWELL-LABEL: test_psrlvd_ymm: 6177 ; HASWELL: # %bb.0: 6178 ; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 6179 ; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00] 6180 ; HASWELL-NEXT: retq # sched: [7:1.00] 6181 ; 6182 ; BROADWELL-LABEL: test_psrlvd_ymm: 6183 ; BROADWELL: # %bb.0: 6184 ; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00] 6185 ; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] 6186 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6187 ; 6188 ; SKYLAKE-LABEL: test_psrlvd_ymm: 6189 ; SKYLAKE: # %bb.0: 6190 ; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6191 ; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6192 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6193 ; 6194 ; SKX-LABEL: test_psrlvd_ymm: 6195 ; SKX: # %bb.0: 6196 ; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6197 ; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6198 ; SKX-NEXT: retq # sched: [7:1.00] 6199 ; 6200 ; ZNVER1-LABEL: test_psrlvd_ymm: 6201 ; ZNVER1: # %bb.0: 6202 ; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6203 ; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6204 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6205 %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) 6206 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 6207 %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2) 6208 ret <8 x i32> %3 6209 } 6210 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 6211 6212 define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { 6213 ; GENERIC-LABEL: test_psrlvq: 6214 ; GENERIC: # %bb.0: 6215 ; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6216 ; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6217 ; GENERIC-NEXT: retq # sched: [1:1.00] 6218 ; 6219 ; HASWELL-LABEL: test_psrlvq: 6220 ; HASWELL: # %bb.0: 6221 ; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6222 ; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] 6223 ; HASWELL-NEXT: retq # sched: [7:1.00] 6224 ; 6225 ; BROADWELL-LABEL: test_psrlvq: 6226 ; BROADWELL: # %bb.0: 6227 ; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] 6228 ; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] 6229 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6230 ; 6231 ; SKYLAKE-LABEL: test_psrlvq: 6232 ; SKYLAKE: # %bb.0: 6233 ; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6234 ; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6235 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6236 ; 6237 ; SKX-LABEL: test_psrlvq: 6238 ; SKX: # %bb.0: 6239 ; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6240 ; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] 6241 ; SKX-NEXT: retq # sched: [7:1.00] 6242 ; 6243 ; ZNVER1-LABEL: test_psrlvq: 6244 ; ZNVER1: # %bb.0: 6245 ; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] 6246 ; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] 6247 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6248 %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) 6249 %2 = load <2 x i64>, <2 x i64> *%a2, align 16 6250 %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2) 6251 ret <2 x i64> %3 6252 } 6253 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone 6254 6255 define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 6256 ; GENERIC-LABEL: test_psrlvq_ymm: 6257 ; GENERIC: # %bb.0: 6258 ; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 6259 ; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6260 ; GENERIC-NEXT: retq # sched: [1:1.00] 6261 ; 6262 ; HASWELL-LABEL: test_psrlvq_ymm: 6263 ; HASWELL: # %bb.0: 6264 ; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 6265 ; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6266 ; HASWELL-NEXT: retq # sched: [7:1.00] 6267 ; 6268 ; BROADWELL-LABEL: test_psrlvq_ymm: 6269 ; BROADWELL: # %bb.0: 6270 ; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00] 6271 ; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 6272 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6273 ; 6274 ; SKYLAKE-LABEL: test_psrlvq_ymm: 6275 ; SKYLAKE: # %bb.0: 6276 ; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6277 ; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6278 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6279 ; 6280 ; SKX-LABEL: test_psrlvq_ymm: 6281 ; SKX: # %bb.0: 6282 ; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6283 ; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6284 ; SKX-NEXT: retq # sched: [7:1.00] 6285 ; 6286 ; ZNVER1-LABEL: test_psrlvq_ymm: 6287 ; ZNVER1: # %bb.0: 6288 ; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6289 ; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6290 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6291 %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) 6292 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 6293 %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2) 6294 ret <4 x i64> %3 6295 } 6296 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 6297 6298 define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { 6299 ; GENERIC-LABEL: test_psrlw: 6300 ; GENERIC: # %bb.0: 6301 ; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6302 ; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [11:1.00] 6303 ; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] 6304 ; GENERIC-NEXT: retq # sched: [1:1.00] 6305 ; 6306 ; HASWELL-LABEL: test_psrlw: 6307 ; HASWELL: # %bb.0: 6308 ; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6309 ; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:1.00] 6310 ; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] 6311 ; HASWELL-NEXT: retq # sched: [7:1.00] 6312 ; 6313 ; BROADWELL-LABEL: test_psrlw: 6314 ; BROADWELL: # %bb.0: 6315 ; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6316 ; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] 6317 ; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00] 6318 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6319 ; 6320 ; SKYLAKE-LABEL: test_psrlw: 6321 ; SKYLAKE: # %bb.0: 6322 ; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6323 ; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6324 ; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50] 6325 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6326 ; 6327 ; SKX-LABEL: test_psrlw: 6328 ; SKX: # %bb.0: 6329 ; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00] 6330 ; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6331 ; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50] 6332 ; SKX-NEXT: retq # sched: [7:1.00] 6333 ; 6334 ; ZNVER1-LABEL: test_psrlw: 6335 ; ZNVER1: # %bb.0: 6336 ; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00] 6337 ; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] 6338 ; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25] 6339 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6340 %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) 6341 %2 = load <8 x i16>, <8 x i16> *%a2, align 16 6342 %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2) 6343 %4 = lshr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 6344 ret <16 x i16> %4 6345 } 6346 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone 6347 6348 define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6349 ; GENERIC-LABEL: test_psubb: 6350 ; GENERIC: # %bb.0: 6351 ; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6352 ; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6353 ; GENERIC-NEXT: retq # sched: [1:1.00] 6354 ; 6355 ; HASWELL-LABEL: test_psubb: 6356 ; HASWELL: # %bb.0: 6357 ; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6358 ; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6359 ; HASWELL-NEXT: retq # sched: [7:1.00] 6360 ; 6361 ; BROADWELL-LABEL: test_psubb: 6362 ; BROADWELL: # %bb.0: 6363 ; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6364 ; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6365 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6366 ; 6367 ; SKYLAKE-LABEL: test_psubb: 6368 ; SKYLAKE: # %bb.0: 6369 ; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6370 ; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6371 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6372 ; 6373 ; SKX-LABEL: test_psubb: 6374 ; SKX: # %bb.0: 6375 ; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6376 ; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6377 ; SKX-NEXT: retq # sched: [7:1.00] 6378 ; 6379 ; ZNVER1-LABEL: test_psubb: 6380 ; ZNVER1: # %bb.0: 6381 ; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6382 ; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6383 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6384 %1 = sub <32 x i8> %a0, %a1 6385 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6386 %3 = sub <32 x i8> %1, %2 6387 ret <32 x i8> %3 6388 } 6389 6390 define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 6391 ; GENERIC-LABEL: test_psubd: 6392 ; GENERIC: # %bb.0: 6393 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6394 ; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6395 ; GENERIC-NEXT: retq # sched: [1:1.00] 6396 ; 6397 ; HASWELL-LABEL: test_psubd: 6398 ; HASWELL: # %bb.0: 6399 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6400 ; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6401 ; HASWELL-NEXT: retq # sched: [7:1.00] 6402 ; 6403 ; BROADWELL-LABEL: test_psubd: 6404 ; BROADWELL: # %bb.0: 6405 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6406 ; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6407 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6408 ; 6409 ; SKYLAKE-LABEL: test_psubd: 6410 ; SKYLAKE: # %bb.0: 6411 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6412 ; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6413 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6414 ; 6415 ; SKX-LABEL: test_psubd: 6416 ; SKX: # %bb.0: 6417 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6418 ; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6419 ; SKX-NEXT: retq # sched: [7:1.00] 6420 ; 6421 ; ZNVER1-LABEL: test_psubd: 6422 ; ZNVER1: # %bb.0: 6423 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6424 ; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6425 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6426 %1 = sub <8 x i32> %a0, %a1 6427 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 6428 %3 = sub <8 x i32> %1, %2 6429 ret <8 x i32> %3 6430 } 6431 6432 define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 6433 ; GENERIC-LABEL: test_psubq: 6434 ; GENERIC: # %bb.0: 6435 ; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6436 ; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6437 ; GENERIC-NEXT: retq # sched: [1:1.00] 6438 ; 6439 ; HASWELL-LABEL: test_psubq: 6440 ; HASWELL: # %bb.0: 6441 ; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6442 ; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6443 ; HASWELL-NEXT: retq # sched: [7:1.00] 6444 ; 6445 ; BROADWELL-LABEL: test_psubq: 6446 ; BROADWELL: # %bb.0: 6447 ; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6448 ; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6449 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6450 ; 6451 ; SKYLAKE-LABEL: test_psubq: 6452 ; SKYLAKE: # %bb.0: 6453 ; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6454 ; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6455 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6456 ; 6457 ; SKX-LABEL: test_psubq: 6458 ; SKX: # %bb.0: 6459 ; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6460 ; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6461 ; SKX-NEXT: retq # sched: [7:1.00] 6462 ; 6463 ; ZNVER1-LABEL: test_psubq: 6464 ; ZNVER1: # %bb.0: 6465 ; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6466 ; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6467 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6468 %1 = sub <4 x i64> %a0, %a1 6469 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 6470 %3 = sub <4 x i64> %1, %2 6471 ret <4 x i64> %3 6472 } 6473 6474 define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6475 ; GENERIC-LABEL: test_psubsb: 6476 ; GENERIC: # %bb.0: 6477 ; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6478 ; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6479 ; GENERIC-NEXT: retq # sched: [1:1.00] 6480 ; 6481 ; HASWELL-LABEL: test_psubsb: 6482 ; HASWELL: # %bb.0: 6483 ; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6484 ; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6485 ; HASWELL-NEXT: retq # sched: [7:1.00] 6486 ; 6487 ; BROADWELL-LABEL: test_psubsb: 6488 ; BROADWELL: # %bb.0: 6489 ; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6490 ; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6491 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6492 ; 6493 ; SKYLAKE-LABEL: test_psubsb: 6494 ; SKYLAKE: # %bb.0: 6495 ; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6496 ; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6497 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6498 ; 6499 ; SKX-LABEL: test_psubsb: 6500 ; SKX: # %bb.0: 6501 ; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6502 ; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6503 ; SKX-NEXT: retq # sched: [7:1.00] 6504 ; 6505 ; ZNVER1-LABEL: test_psubsb: 6506 ; ZNVER1: # %bb.0: 6507 ; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6508 ; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6509 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6510 %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) 6511 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6512 %3 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %1, <32 x i8> %2) 6513 ret <32 x i8> %3 6514 } 6515 declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone 6516 6517 define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 6518 ; GENERIC-LABEL: test_psubsw: 6519 ; GENERIC: # %bb.0: 6520 ; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6521 ; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6522 ; GENERIC-NEXT: retq # sched: [1:1.00] 6523 ; 6524 ; HASWELL-LABEL: test_psubsw: 6525 ; HASWELL: # %bb.0: 6526 ; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6527 ; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6528 ; HASWELL-NEXT: retq # sched: [7:1.00] 6529 ; 6530 ; BROADWELL-LABEL: test_psubsw: 6531 ; BROADWELL: # %bb.0: 6532 ; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6533 ; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6534 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6535 ; 6536 ; SKYLAKE-LABEL: test_psubsw: 6537 ; SKYLAKE: # %bb.0: 6538 ; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6539 ; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6540 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6541 ; 6542 ; SKX-LABEL: test_psubsw: 6543 ; SKX: # %bb.0: 6544 ; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6545 ; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6546 ; SKX-NEXT: retq # sched: [7:1.00] 6547 ; 6548 ; ZNVER1-LABEL: test_psubsw: 6549 ; ZNVER1: # %bb.0: 6550 ; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6551 ; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6552 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6553 %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) 6554 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 6555 %3 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %1, <16 x i16> %2) 6556 ret <16 x i16> %3 6557 } 6558 declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone 6559 6560 define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6561 ; GENERIC-LABEL: test_psubusb: 6562 ; GENERIC: # %bb.0: 6563 ; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6564 ; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6565 ; GENERIC-NEXT: retq # sched: [1:1.00] 6566 ; 6567 ; HASWELL-LABEL: test_psubusb: 6568 ; HASWELL: # %bb.0: 6569 ; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6570 ; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6571 ; HASWELL-NEXT: retq # sched: [7:1.00] 6572 ; 6573 ; BROADWELL-LABEL: test_psubusb: 6574 ; BROADWELL: # %bb.0: 6575 ; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6576 ; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6577 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6578 ; 6579 ; SKYLAKE-LABEL: test_psubusb: 6580 ; SKYLAKE: # %bb.0: 6581 ; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6582 ; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6583 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6584 ; 6585 ; SKX-LABEL: test_psubusb: 6586 ; SKX: # %bb.0: 6587 ; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6588 ; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6589 ; SKX-NEXT: retq # sched: [7:1.00] 6590 ; 6591 ; ZNVER1-LABEL: test_psubusb: 6592 ; ZNVER1: # %bb.0: 6593 ; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6594 ; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6595 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6596 %1 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) 6597 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6598 %3 = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %1, <32 x i8> %2) 6599 ret <32 x i8> %3 6600 } 6601 declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone 6602 6603 define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 6604 ; GENERIC-LABEL: test_psubusw: 6605 ; GENERIC: # %bb.0: 6606 ; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6607 ; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6608 ; GENERIC-NEXT: retq # sched: [1:1.00] 6609 ; 6610 ; HASWELL-LABEL: test_psubusw: 6611 ; HASWELL: # %bb.0: 6612 ; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6613 ; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6614 ; HASWELL-NEXT: retq # sched: [7:1.00] 6615 ; 6616 ; BROADWELL-LABEL: test_psubusw: 6617 ; BROADWELL: # %bb.0: 6618 ; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6619 ; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6620 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6621 ; 6622 ; SKYLAKE-LABEL: test_psubusw: 6623 ; SKYLAKE: # %bb.0: 6624 ; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6625 ; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6626 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6627 ; 6628 ; SKX-LABEL: test_psubusw: 6629 ; SKX: # %bb.0: 6630 ; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6631 ; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6632 ; SKX-NEXT: retq # sched: [7:1.00] 6633 ; 6634 ; ZNVER1-LABEL: test_psubusw: 6635 ; ZNVER1: # %bb.0: 6636 ; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6637 ; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6638 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6639 %1 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) 6640 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 6641 %3 = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %1, <16 x i16> %2) 6642 ret <16 x i16> %3 6643 } 6644 declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone 6645 6646 define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 6647 ; GENERIC-LABEL: test_psubw: 6648 ; GENERIC: # %bb.0: 6649 ; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6650 ; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6651 ; GENERIC-NEXT: retq # sched: [1:1.00] 6652 ; 6653 ; HASWELL-LABEL: test_psubw: 6654 ; HASWELL: # %bb.0: 6655 ; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6656 ; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6657 ; HASWELL-NEXT: retq # sched: [7:1.00] 6658 ; 6659 ; BROADWELL-LABEL: test_psubw: 6660 ; BROADWELL: # %bb.0: 6661 ; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6662 ; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 6663 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6664 ; 6665 ; SKYLAKE-LABEL: test_psubw: 6666 ; SKYLAKE: # %bb.0: 6667 ; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6668 ; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6669 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6670 ; 6671 ; SKX-LABEL: test_psubw: 6672 ; SKX: # %bb.0: 6673 ; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6674 ; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6675 ; SKX-NEXT: retq # sched: [7:1.00] 6676 ; 6677 ; ZNVER1-LABEL: test_psubw: 6678 ; ZNVER1: # %bb.0: 6679 ; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6680 ; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 6681 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6682 %1 = sub <16 x i16> %a0, %a1 6683 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 6684 %3 = sub <16 x i16> %1, %2 6685 ret <16 x i16> %3 6686 } 6687 6688 define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6689 ; GENERIC-LABEL: test_punpckhbw: 6690 ; GENERIC: # %bb.0: 6691 ; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6692 ; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] 6693 ; GENERIC-NEXT: retq # sched: [1:1.00] 6694 ; 6695 ; HASWELL-LABEL: test_punpckhbw: 6696 ; HASWELL: # %bb.0: 6697 ; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6698 ; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] 6699 ; HASWELL-NEXT: retq # sched: [7:1.00] 6700 ; 6701 ; BROADWELL-LABEL: test_punpckhbw: 6702 ; BROADWELL: # %bb.0: 6703 ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6704 ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00] 6705 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6706 ; 6707 ; SKYLAKE-LABEL: test_punpckhbw: 6708 ; SKYLAKE: # %bb.0: 6709 ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6710 ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] 6711 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6712 ; 6713 ; SKX-LABEL: test_punpckhbw: 6714 ; SKX: # %bb.0: 6715 ; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00] 6716 ; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00] 6717 ; SKX-NEXT: retq # sched: [7:1.00] 6718 ; 6719 ; ZNVER1-LABEL: test_punpckhbw: 6720 ; ZNVER1: # %bb.0: 6721 ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25] 6722 ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50] 6723 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6724 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 6725 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6726 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 6727 ret <32 x i8> %3 6728 } 6729 6730 define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 6731 ; GENERIC-LABEL: test_punpckhdq: 6732 ; GENERIC: # %bb.0: 6733 ; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6734 ; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 6735 ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6736 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6737 ; GENERIC-NEXT: retq # sched: [1:1.00] 6738 ; 6739 ; HASWELL-LABEL: test_punpckhdq: 6740 ; HASWELL: # %bb.0: 6741 ; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6742 ; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 6743 ; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6744 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6745 ; HASWELL-NEXT: retq # sched: [7:1.00] 6746 ; 6747 ; BROADWELL-LABEL: test_punpckhdq: 6748 ; BROADWELL: # %bb.0: 6749 ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6750 ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00] 6751 ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6752 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6753 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6754 ; 6755 ; SKYLAKE-LABEL: test_punpckhdq: 6756 ; SKYLAKE: # %bb.0: 6757 ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6758 ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 6759 ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6760 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6761 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6762 ; 6763 ; SKX-LABEL: test_punpckhdq: 6764 ; SKX: # %bb.0: 6765 ; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] 6766 ; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00] 6767 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6768 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6769 ; SKX-NEXT: retq # sched: [7:1.00] 6770 ; 6771 ; ZNVER1-LABEL: test_punpckhdq: 6772 ; ZNVER1: # %bb.0: 6773 ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25] 6774 ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50] 6775 ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25] 6776 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6777 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6778 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 6779 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 6780 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> 6781 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 6782 ret <8 x i32> %4 6783 } 6784 6785 define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 6786 ; GENERIC-LABEL: test_punpckhqdq: 6787 ; GENERIC: # %bb.0: 6788 ; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6789 ; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 6790 ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6791 ; GENERIC-NEXT: retq # sched: [1:1.00] 6792 ; 6793 ; HASWELL-LABEL: test_punpckhqdq: 6794 ; HASWELL: # %bb.0: 6795 ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6796 ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 6797 ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6798 ; HASWELL-NEXT: retq # sched: [7:1.00] 6799 ; 6800 ; BROADWELL-LABEL: test_punpckhqdq: 6801 ; BROADWELL: # %bb.0: 6802 ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6803 ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00] 6804 ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6805 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6806 ; 6807 ; SKYLAKE-LABEL: test_punpckhqdq: 6808 ; SKYLAKE: # %bb.0: 6809 ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6810 ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 6811 ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 6812 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6813 ; 6814 ; SKX-LABEL: test_punpckhqdq: 6815 ; SKX: # %bb.0: 6816 ; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] 6817 ; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00] 6818 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 6819 ; SKX-NEXT: retq # sched: [7:1.00] 6820 ; 6821 ; ZNVER1-LABEL: test_punpckhqdq: 6822 ; ZNVER1: # %bb.0: 6823 ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25] 6824 ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50] 6825 ; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 6826 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6827 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 6828 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 6829 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 6830 %4 = add <4 x i64> %1, %3 6831 ret <4 x i64> %4 6832 } 6833 6834 define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 6835 ; GENERIC-LABEL: test_punpckhwd: 6836 ; GENERIC: # %bb.0: 6837 ; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6838 ; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] 6839 ; GENERIC-NEXT: retq # sched: [1:1.00] 6840 ; 6841 ; HASWELL-LABEL: test_punpckhwd: 6842 ; HASWELL: # %bb.0: 6843 ; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6844 ; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] 6845 ; HASWELL-NEXT: retq # sched: [7:1.00] 6846 ; 6847 ; BROADWELL-LABEL: test_punpckhwd: 6848 ; BROADWELL: # %bb.0: 6849 ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6850 ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00] 6851 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6852 ; 6853 ; SKYLAKE-LABEL: test_punpckhwd: 6854 ; SKYLAKE: # %bb.0: 6855 ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6856 ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] 6857 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6858 ; 6859 ; SKX-LABEL: test_punpckhwd: 6860 ; SKX: # %bb.0: 6861 ; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00] 6862 ; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00] 6863 ; SKX-NEXT: retq # sched: [7:1.00] 6864 ; 6865 ; ZNVER1-LABEL: test_punpckhwd: 6866 ; ZNVER1: # %bb.0: 6867 ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25] 6868 ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50] 6869 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6870 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 6871 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 6872 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 6873 ret <16 x i16> %3 6874 } 6875 6876 define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { 6877 ; GENERIC-LABEL: test_punpcklbw: 6878 ; GENERIC: # %bb.0: 6879 ; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6880 ; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] 6881 ; GENERIC-NEXT: retq # sched: [1:1.00] 6882 ; 6883 ; HASWELL-LABEL: test_punpcklbw: 6884 ; HASWELL: # %bb.0: 6885 ; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6886 ; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] 6887 ; HASWELL-NEXT: retq # sched: [7:1.00] 6888 ; 6889 ; BROADWELL-LABEL: test_punpcklbw: 6890 ; BROADWELL: # %bb.0: 6891 ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6892 ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00] 6893 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6894 ; 6895 ; SKYLAKE-LABEL: test_punpcklbw: 6896 ; SKYLAKE: # %bb.0: 6897 ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6898 ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] 6899 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6900 ; 6901 ; SKX-LABEL: test_punpcklbw: 6902 ; SKX: # %bb.0: 6903 ; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00] 6904 ; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00] 6905 ; SKX-NEXT: retq # sched: [7:1.00] 6906 ; 6907 ; ZNVER1-LABEL: test_punpcklbw: 6908 ; ZNVER1: # %bb.0: 6909 ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25] 6910 ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50] 6911 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6912 %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55> 6913 %2 = load <32 x i8>, <32 x i8> *%a2, align 32 6914 %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55> 6915 ret <32 x i8> %3 6916 } 6917 6918 define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { 6919 ; GENERIC-LABEL: test_punpckldq: 6920 ; GENERIC: # %bb.0: 6921 ; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6922 ; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 6923 ; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6924 ; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6925 ; GENERIC-NEXT: retq # sched: [1:1.00] 6926 ; 6927 ; HASWELL-LABEL: test_punpckldq: 6928 ; HASWELL: # %bb.0: 6929 ; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6930 ; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 6931 ; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6932 ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6933 ; HASWELL-NEXT: retq # sched: [7:1.00] 6934 ; 6935 ; BROADWELL-LABEL: test_punpckldq: 6936 ; BROADWELL: # %bb.0: 6937 ; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6938 ; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00] 6939 ; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6940 ; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 6941 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6942 ; 6943 ; SKYLAKE-LABEL: test_punpckldq: 6944 ; SKYLAKE: # %bb.0: 6945 ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6946 ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 6947 ; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6948 ; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6949 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 6950 ; 6951 ; SKX-LABEL: test_punpckldq: 6952 ; SKX: # %bb.0: 6953 ; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] 6954 ; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] 6955 ; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50] 6956 ; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 6957 ; SKX-NEXT: retq # sched: [7:1.00] 6958 ; 6959 ; ZNVER1-LABEL: test_punpckldq: 6960 ; ZNVER1: # %bb.0: 6961 ; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25] 6962 ; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50] 6963 ; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25] 6964 ; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 6965 ; ZNVER1-NEXT: retq # sched: [1:0.50] 6966 %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 6967 %2 = load <8 x i32>, <8 x i32> *%a2, align 32 6968 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> 6969 %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 6970 ret <8 x i32> %4 6971 } 6972 6973 define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 6974 ; GENERIC-LABEL: test_punpcklqdq: 6975 ; GENERIC: # %bb.0: 6976 ; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 6977 ; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 6978 ; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6979 ; GENERIC-NEXT: retq # sched: [1:1.00] 6980 ; 6981 ; HASWELL-LABEL: test_punpcklqdq: 6982 ; HASWELL: # %bb.0: 6983 ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 6984 ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 6985 ; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6986 ; HASWELL-NEXT: retq # sched: [7:1.00] 6987 ; 6988 ; BROADWELL-LABEL: test_punpcklqdq: 6989 ; BROADWELL: # %bb.0: 6990 ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 6991 ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00] 6992 ; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50] 6993 ; BROADWELL-NEXT: retq # sched: [7:1.00] 6994 ; 6995 ; SKYLAKE-LABEL: test_punpcklqdq: 6996 ; SKYLAKE: # %bb.0: 6997 ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 6998 ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 6999 ; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 7000 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7001 ; 7002 ; SKX-LABEL: test_punpcklqdq: 7003 ; SKX: # %bb.0: 7004 ; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] 7005 ; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00] 7006 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] 7007 ; SKX-NEXT: retq # sched: [7:1.00] 7008 ; 7009 ; ZNVER1-LABEL: test_punpcklqdq: 7010 ; ZNVER1: # %bb.0: 7011 ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25] 7012 ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50] 7013 ; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25] 7014 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7015 %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 7016 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 7017 %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 7018 %4 = add <4 x i64> %1, %3 7019 ret <4 x i64> %4 7020 } 7021 7022 define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { 7023 ; GENERIC-LABEL: test_punpcklwd: 7024 ; GENERIC: # %bb.0: 7025 ; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7026 ; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] 7027 ; GENERIC-NEXT: retq # sched: [1:1.00] 7028 ; 7029 ; HASWELL-LABEL: test_punpcklwd: 7030 ; HASWELL: # %bb.0: 7031 ; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7032 ; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] 7033 ; HASWELL-NEXT: retq # sched: [7:1.00] 7034 ; 7035 ; BROADWELL-LABEL: test_punpcklwd: 7036 ; BROADWELL: # %bb.0: 7037 ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7038 ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00] 7039 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7040 ; 7041 ; SKYLAKE-LABEL: test_punpcklwd: 7042 ; SKYLAKE: # %bb.0: 7043 ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7044 ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] 7045 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7046 ; 7047 ; SKX-LABEL: test_punpcklwd: 7048 ; SKX: # %bb.0: 7049 ; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00] 7050 ; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00] 7051 ; SKX-NEXT: retq # sched: [7:1.00] 7052 ; 7053 ; ZNVER1-LABEL: test_punpcklwd: 7054 ; ZNVER1: # %bb.0: 7055 ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25] 7056 ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50] 7057 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7058 %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 7059 %2 = load <16 x i16>, <16 x i16> *%a2, align 32 7060 %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27> 7061 ret <16 x i16> %3 7062 } 7063 7064 define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { 7065 ; GENERIC-LABEL: test_pxor: 7066 ; GENERIC: # %bb.0: 7067 ; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7068 ; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7069 ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 7070 ; GENERIC-NEXT: retq # sched: [1:1.00] 7071 ; 7072 ; HASWELL-LABEL: test_pxor: 7073 ; HASWELL: # %bb.0: 7074 ; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7075 ; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7076 ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 7077 ; HASWELL-NEXT: retq # sched: [7:1.00] 7078 ; 7079 ; BROADWELL-LABEL: test_pxor: 7080 ; BROADWELL: # %bb.0: 7081 ; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7082 ; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] 7083 ; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] 7084 ; BROADWELL-NEXT: retq # sched: [7:1.00] 7085 ; 7086 ; SKYLAKE-LABEL: test_pxor: 7087 ; SKYLAKE: # %bb.0: 7088 ; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7089 ; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7090 ; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7091 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 7092 ; 7093 ; SKX-LABEL: test_pxor: 7094 ; SKX: # %bb.0: 7095 ; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7096 ; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7097 ; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33] 7098 ; SKX-NEXT: retq # sched: [7:1.00] 7099 ; 7100 ; ZNVER1-LABEL: test_pxor: 7101 ; ZNVER1: # %bb.0: 7102 ; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 7103 ; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] 7104 ; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25] 7105 ; ZNVER1-NEXT: retq # sched: [1:0.50] 7106 %1 = xor <4 x i64> %a0, %a1 7107 %2 = load <4 x i64>, <4 x i64> *%a2, align 32 7108 %3 = xor <4 x i64> %1, %2 7109 %4 = add <4 x i64> %3, %a1 7110 ret <4 x i64> %4 7111 } 7112 7113 !0 = !{i32 1} 7114