1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX 3 4 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind { 5 ; KNL-LABEL: test1: 6 ; KNL: ## BB#0: 7 ; KNL-NEXT: vcmpleps %zmm1, %zmm0, %k1 8 ; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1} 9 ; KNL-NEXT: vmovaps %zmm1, %zmm0 10 ; KNL-NEXT: retq 11 %mask = fcmp ole <16 x float> %x, %y 12 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y 13 ret <16 x float> %max 14 } 15 16 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind { 17 ; KNL-LABEL: test2: 18 ; KNL: ## BB#0: 19 ; KNL-NEXT: vcmplepd %zmm1, %zmm0, %k1 20 ; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1} 21 ; KNL-NEXT: vmovaps %zmm1, %zmm0 22 ; KNL-NEXT: retq 23 %mask = fcmp ole <8 x double> %x, %y 24 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y 25 ret <8 x double> %max 26 } 27 28 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind { 29 ; KNL-LABEL: test3: 30 ; KNL: ## BB#0: 31 ; KNL-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 32 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 33 ; KNL-NEXT: vmovaps %zmm1, %zmm0 34 ; KNL-NEXT: retq 35 %y = load <16 x i32>, <16 x i32>* %yp, align 4 36 %mask = icmp eq <16 x i32> %x, %y 37 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 38 ret <16 x i32> %max 39 } 40 41 define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind { 42 ; KNL-LABEL: test4_unsigned: 43 ; KNL: ## BB#0: 44 ; KNL-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 45 ; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} 46 ; KNL-NEXT: vmovaps %zmm1, %zmm0 47 ; KNL-NEXT: retq 48 %mask = icmp uge <16 x i32> %x, %y 49 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y 50 ret <16 x i32> %max 51 } 52 53 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind { 54 ; KNL-LABEL: test5: 55 ; KNL: ## BB#0: 56 ; KNL-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 57 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 58 ; KNL-NEXT: vmovaps %zmm1, %zmm0 59 ; KNL-NEXT: retq 60 %mask = icmp eq <8 x i64> %x, %y 61 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y 62 ret <8 x i64> %max 63 } 64 65 define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind { 66 ; KNL-LABEL: test6_unsigned: 67 ; KNL: ## BB#0: 68 ; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 69 ; KNL-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} 70 ; KNL-NEXT: vmovaps %zmm1, %zmm0 71 ; KNL-NEXT: retq 72 %mask = icmp ugt <8 x i64> %x, %y 73 %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y 74 ret <8 x i64> %max 75 } 76 77 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) { 78 ; KNL-LABEL: test7: 79 ; KNL: ## BB#0: 80 ; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 81 ; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 82 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 83 ; KNL-NEXT: retq 84 ; SKX-LABEL: test7: 85 ; SKX: ## BB#0: 86 ; SKX: vxorps %xmm2, %xmm2, %xmm2 87 ; SKX: vcmpltps %xmm2, %xmm0, %k1 88 ; SKX: vmovaps %xmm0, %xmm1 {%k1} 89 ; SKX: vmovaps %zmm1, %zmm0 90 ; SKX: retq 91 92 %mask = fcmp olt <4 x float> %a, zeroinitializer 93 %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b 94 ret <4 x float>%c 95 } 96 97 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) { 98 ; KNL-LABEL: test8: 99 ; KNL: ## BB#0: 100 ; KNL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 101 ; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 102 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 103 ; KNL-NEXT: retq 104 ; SKX-LABEL: test8: 105 ; SKX: ## BB#0: 106 ; SKX: vxorpd %xmm2, %xmm2, %xmm2 107 ; SKX: vcmpltpd %xmm2, %xmm0, %k1 108 ; SKX: vmovapd %xmm0, %xmm1 {%k1} 109 ; SKX: vmovaps %zmm1, %zmm0 110 ; SKX: retq 111 %mask = fcmp olt <2 x double> %a, zeroinitializer 112 %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b 113 ret <2 x double>%c 114 } 115 116 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { 117 ; KNL-LABEL: test9: 118 ; KNL: ## BB#0: 119 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 120 ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} 121 ; KNL-NEXT: retq 122 %mask = icmp eq <8 x i32> %x, %y 123 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y 124 ret <8 x i32> %max 125 } 126 127 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind { 128 ; KNL-LABEL: test10: 129 ; KNL: ## BB#0: 130 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 131 ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} 132 ; KNL-NEXT: retq 133 ; SKX-LABEL: test10: 134 ; SKX: ## BB#0: 135 ; SKX: vcmpeqps %ymm1, %ymm0, %k1 136 ; SKX: vmovaps %ymm0, %ymm1 {%k1} 137 ; SKX: vmovaps %zmm1, %zmm0 138 ; SKX: retq 139 140 %mask = fcmp oeq <8 x float> %x, %y 141 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y 142 ret <8 x float> %max 143 } 144 145 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind { 146 ; KNL-LABEL: test11_unsigned: 147 ; KNL: ## BB#0: 148 ; KNL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 149 ; KNL-NEXT: retq 150 %mask = icmp ugt <8 x i32> %x, %y 151 %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y 152 ret <8 x i32> %max 153 } 154 155 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind { 156 ; KNL-LABEL: test12: 157 ; KNL: ## BB#0: 158 ; KNL-NEXT: vpcmpeqq %zmm2, %zmm0, %k0 159 ; KNL-NEXT: vpcmpeqq %zmm3, %zmm1, %k1 160 ; KNL-NEXT: kunpckbw %k0, %k1, %k0 161 ; KNL-NEXT: kmovw %k0, %eax 162 ; KNL-NEXT: retq 163 %res = icmp eq <16 x i64> %a, %b 164 %res1 = bitcast <16 x i1> %res to i16 165 ret i16 %res1 166 } 167 168 define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind { 169 ; SKX-LABEL: test12_v32i32: 170 ; SKX: ## BB#0: 171 ; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 172 ; SKX-NEXT: vpcmpeqd %zmm3, %zmm1, %k1 173 ; SKX-NEXT: kunpckwd %k0, %k1, %k0 174 ; SKX-NEXT: kmovd %k0, %eax 175 ; SKX-NEXT: retq 176 %res = icmp eq <32 x i32> %a, %b 177 %res1 = bitcast <32 x i1> %res to i32 178 ret i32 %res1 179 } 180 181 define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind { 182 ; SKX-LABEL: test12_v64i16: 183 ; SKX: ## BB#0: 184 ; SKX-NEXT: vpcmpeqw %zmm2, %zmm0, %k0 185 ; SKX-NEXT: vpcmpeqw %zmm3, %zmm1, %k1 186 ; SKX-NEXT: kunpckdq %k0, %k1, %k0 187 ; SKX-NEXT: kmovq %k0, %rax 188 ; SKX-NEXT: retq 189 %res = icmp eq <64 x i16> %a, %b 190 %res1 = bitcast <64 x i1> %res to i64 191 ret i64 %res1 192 } 193 194 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b) 195 ; KNL-LABEL: test13: 196 ; KNL: ## BB#0: 197 ; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1 198 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 199 ; KNL-NEXT: retq 200 { 201 %cmpvector_i = fcmp oeq <16 x float> %a, %b 202 %conv = zext <16 x i1> %cmpvector_i to <16 x i32> 203 ret <16 x i32> %conv 204 } 205 206 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) { 207 ; KNL-LABEL: test14: 208 ; KNL: ## BB#0: 209 ; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm1 210 ; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 211 ; KNL-NEXT: knotw %k0, %k0 212 ; KNL-NEXT: knotw %k0, %k1 213 ; KNL-NEXT: vmovdqu32 %zmm1, %zmm0 {%k1} {z} 214 ; KNL-NEXT: retq 215 %sub_r = sub <16 x i32> %a, %b 216 %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a 217 %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32> 218 %mask = icmp eq <16 x i32> %sext.i3.i, zeroinitializer 219 %res = select <16 x i1> %mask, <16 x i32> zeroinitializer, <16 x i32> %sub_r 220 ret <16 x i32>%res 221 } 222 223 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) { 224 ; KNL-LABEL: test15: 225 ; KNL: ## BB#0: 226 ; KNL-NEXT: vpsubq %zmm1, %zmm0, %zmm1 227 ; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 228 ; KNL-NEXT: knotw %k0, %k0 229 ; KNL-NEXT: knotw %k0, %k1 230 ; KNL-NEXT: vmovdqu64 %zmm1, %zmm0 {%k1} {z} 231 ; KNL-NEXT: retq 232 %sub_r = sub <8 x i64> %a, %b 233 %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a 234 %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64> 235 %mask = icmp eq <8 x i64> %sext.i3.i, zeroinitializer 236 %res = select <8 x i1> %mask, <8 x i64> zeroinitializer, <8 x i64> %sub_r 237 ret <8 x i64>%res 238 } 239 240 define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind { 241 ; KNL-LABEL: test16: 242 ; KNL: ## BB#0: 243 ; KNL-NEXT: vpcmpled %zmm0, %zmm1, %k1 244 ; KNL-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} 245 ; KNL-NEXT: vmovaps %zmm1, %zmm0 246 ; KNL-NEXT: retq 247 %mask = icmp sge <16 x i32> %x, %y 248 %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y 249 ret <16 x i32> %max 250 } 251 252 define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { 253 ; KNL-LABEL: test17: 254 ; KNL: ## BB#0: 255 ; KNL-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 256 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 257 ; KNL-NEXT: vmovaps %zmm1, %zmm0 258 ; KNL-NEXT: retq 259 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 260 %mask = icmp sgt <16 x i32> %x, %y 261 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 262 ret <16 x i32> %max 263 } 264 265 define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { 266 ; KNL-LABEL: test18: 267 ; KNL: ## BB#0: 268 ; KNL-NEXT: vpcmpled (%rdi), %zmm0, %k1 269 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 270 ; KNL-NEXT: vmovaps %zmm1, %zmm0 271 ; KNL-NEXT: retq 272 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 273 %mask = icmp sle <16 x i32> %x, %y 274 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 275 ret <16 x i32> %max 276 } 277 278 define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind { 279 ; KNL-LABEL: test19: 280 ; KNL: ## BB#0: 281 ; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 282 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 283 ; KNL-NEXT: vmovaps %zmm1, %zmm0 284 ; KNL-NEXT: retq 285 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 286 %mask = icmp ule <16 x i32> %x, %y 287 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 288 ret <16 x i32> %max 289 } 290 291 define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind { 292 ; KNL-LABEL: test20: 293 ; KNL: ## BB#0: 294 ; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 295 ; KNL-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} 296 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 297 ; KNL-NEXT: vmovaps %zmm1, %zmm0 298 ; KNL-NEXT: retq 299 %mask1 = icmp eq <16 x i32> %x1, %y1 300 %mask0 = icmp eq <16 x i32> %x, %y 301 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer 302 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y 303 ret <16 x i32> %max 304 } 305 306 define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind { 307 ; KNL-LABEL: test21: 308 ; KNL: ## BB#0: 309 ; KNL-NEXT: vpcmpleq %zmm1, %zmm0, %k1 310 ; KNL-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} 311 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1} 312 ; KNL-NEXT: vmovaps %zmm2, %zmm0 313 ; KNL-NEXT: retq 314 %mask1 = icmp sge <8 x i64> %x1, %y1 315 %mask0 = icmp sle <8 x i64> %x, %y 316 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer 317 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 318 ret <8 x i64> %max 319 } 320 321 define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind { 322 ; KNL-LABEL: test22: 323 ; KNL: ## BB#0: 324 ; KNL-NEXT: vpcmpgtq %zmm2, %zmm1, %k1 325 ; KNL-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} 326 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 327 ; KNL-NEXT: vmovaps %zmm1, %zmm0 328 ; KNL-NEXT: retq 329 %mask1 = icmp sgt <8 x i64> %x1, %y1 330 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 331 %mask0 = icmp sgt <8 x i64> %x, %y 332 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer 333 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 334 ret <8 x i64> %max 335 } 336 337 define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind { 338 ; KNL-LABEL: test23: 339 ; KNL: ## BB#0: 340 ; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1 341 ; KNL-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} 342 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 343 ; KNL-NEXT: vmovaps %zmm1, %zmm0 344 ; KNL-NEXT: retq 345 %mask1 = icmp sge <16 x i32> %x1, %y1 346 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 347 %mask0 = icmp ule <16 x i32> %x, %y 348 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer 349 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 350 ret <16 x i32> %max 351 } 352 353 define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind { 354 ; KNL-LABEL: test24: 355 ; KNL: ## BB#0: 356 ; KNL-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 357 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 358 ; KNL-NEXT: vmovaps %zmm1, %zmm0 359 ; KNL-NEXT: retq 360 %yb = load i64, i64* %yb.ptr, align 4 361 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 362 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer 363 %mask = icmp eq <8 x i64> %x, %y 364 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 365 ret <8 x i64> %max 366 } 367 368 define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind { 369 ; KNL-LABEL: test25: 370 ; KNL: ## BB#0: 371 ; KNL-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 372 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 373 ; KNL-NEXT: vmovaps %zmm1, %zmm0 374 ; KNL-NEXT: retq 375 %yb = load i32, i32* %yb.ptr, align 4 376 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 377 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer 378 %mask = icmp sle <16 x i32> %x, %y 379 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 380 ret <16 x i32> %max 381 } 382 383 define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind { 384 ; KNL-LABEL: test26: 385 ; KNL: ## BB#0: 386 ; KNL-NEXT: vpcmpled %zmm1, %zmm2, %k1 387 ; KNL-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} 388 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} 389 ; KNL-NEXT: vmovaps %zmm1, %zmm0 390 ; KNL-NEXT: retq 391 %mask1 = icmp sge <16 x i32> %x1, %y1 392 %yb = load i32, i32* %yb.ptr, align 4 393 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 394 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer 395 %mask0 = icmp sgt <16 x i32> %x, %y 396 %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer 397 %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 398 ret <16 x i32> %max 399 } 400 401 define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind { 402 ; KNL-LABEL: test27: 403 ; KNL: ## BB#0: 404 ; KNL-NEXT: vpcmpleq %zmm1, %zmm2, %k1 405 ; KNL-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} 406 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} 407 ; KNL-NEXT: vmovaps %zmm1, %zmm0 408 ; KNL-NEXT: retq 409 %mask1 = icmp sge <8 x i64> %x1, %y1 410 %yb = load i64, i64* %yb.ptr, align 4 411 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 412 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer 413 %mask0 = icmp sle <8 x i64> %x, %y 414 %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer 415 %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1 416 ret <8 x i64> %max 417 } 418 419 ; KNL-LABEL: test28 420 ; KNL: vpcmpgtq 421 ; KNL: vpcmpgtq 422 ; KNL: kxnorw 423 define <8 x i32>@test28(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) { 424 %x_gt_y = icmp sgt <8 x i64> %x, %y 425 %x1_gt_y1 = icmp sgt <8 x i64> %x1, %y1 426 %res = icmp eq <8 x i1>%x_gt_y, %x1_gt_y1 427 %resse = sext <8 x i1>%res to <8 x i32> 428 ret <8 x i32> %resse 429 } 430 431 ; KNL-LABEL: test29 432 ; KNL: vpcmpgtd 433 ; KNL: vpcmpgtd 434 ; KNL: kxorw 435 define <16 x i8>@test29(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) { 436 %x_gt_y = icmp sgt <16 x i32> %x, %y 437 %x1_gt_y1 = icmp sgt <16 x i32> %x1, %y1 438 %res = icmp ne <16 x i1>%x_gt_y, %x1_gt_y1 439 %resse = sext <16 x i1>%res to <16 x i8> 440 ret <16 x i8> %resse 441 } 442 443 define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind { 444 ; SKX-LABEL: test30: 445 ; SKX: vcmpeqpd %ymm1, %ymm0, %k1 446 ; SKX: vmovapd %ymm0, %ymm1 {%k1} 447 448 %mask = fcmp oeq <4 x double> %x, %y 449 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y 450 ret <4 x double> %max 451 } 452 453 define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp) nounwind { 454 ; SKX-LABEL: test31: 455 ; SKX: vcmpltpd (%rdi), %xmm0, %k1 456 ; SKX: vmovapd %xmm0, %xmm1 {%k1} 457 458 %y = load <2 x double>, <2 x double>* %yp, align 4 459 %mask = fcmp olt <2 x double> %x, %y 460 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1 461 ret <2 x double> %max 462 } 463 464 define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp) nounwind { 465 ; SKX-LABEL: test32: 466 ; SKX: vcmpltpd (%rdi), %ymm0, %k1 467 ; SKX: vmovapd %ymm0, %ymm1 {%k1} 468 469 %y = load <4 x double>, <4 x double>* %yp, align 4 470 %mask = fcmp ogt <4 x double> %y, %x 471 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1 472 ret <4 x double> %max 473 } 474 475 define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp) nounwind { 476 ; SKX-LABEL: test33: 477 ; SKX: vcmpltpd (%rdi), %zmm0, %k1 478 ; SKX: vmovapd %zmm0, %zmm1 {%k1} 479 %y = load <8 x double>, <8 x double>* %yp, align 4 480 %mask = fcmp olt <8 x double> %x, %y 481 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 482 ret <8 x double> %max 483 } 484 485 define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) nounwind { 486 ; SKX-LABEL: test34: 487 ; SKX: vcmpltps (%rdi), %xmm0, %k1 488 ; SKX: vmovaps %xmm0, %xmm1 {%k1} 489 %y = load <4 x float>, <4 x float>* %yp, align 4 490 %mask = fcmp olt <4 x float> %x, %y 491 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 492 ret <4 x float> %max 493 } 494 495 define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind { 496 ; SKX-LABEL: test35: 497 ; SKX: vcmpltps (%rdi), %ymm0, %k1 498 ; SKX: vmovaps %ymm0, %ymm1 {%k1} 499 500 %y = load <8 x float>, <8 x float>* %yp, align 4 501 %mask = fcmp ogt <8 x float> %y, %x 502 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1 503 ret <8 x float> %max 504 } 505 506 define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp) nounwind { 507 ; SKX-LABEL: test36: 508 ; SKX: vcmpltps (%rdi), %zmm0, %k1 509 ; SKX: vmovaps %zmm0, %zmm1 {%k1} 510 %y = load <16 x float>, <16 x float>* %yp, align 4 511 %mask = fcmp olt <16 x float> %x, %y 512 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 513 ret <16 x float> %max 514 } 515 516 define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nounwind { 517 ; SKX-LABEL: test37: 518 ; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 519 ; SKX: vmovapd %zmm0, %zmm1 {%k1} 520 521 %a = load double, double* %ptr 522 %v = insertelement <8 x double> undef, double %a, i32 0 523 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer 524 525 %mask = fcmp ogt <8 x double> %shuffle, %x 526 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 527 ret <8 x double> %max 528 } 529 530 define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nounwind { 531 ; SKX-LABEL: test38: 532 ; SKX: vcmpltpd (%rdi){1to4}, %ymm0, %k1 533 ; SKX: vmovapd %ymm0, %ymm1 {%k1} 534 535 %a = load double, double* %ptr 536 %v = insertelement <4 x double> undef, double %a, i32 0 537 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer 538 539 %mask = fcmp ogt <4 x double> %shuffle, %x 540 %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %x1 541 ret <4 x double> %max 542 } 543 544 define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nounwind { 545 ; SKX-LABEL: test39: 546 ; SKX: vcmpltpd (%rdi){1to2}, %xmm0, %k1 547 ; SKX: vmovapd %xmm0, %xmm1 {%k1} 548 549 %a = load double, double* %ptr 550 %v = insertelement <2 x double> undef, double %a, i32 0 551 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 552 553 %mask = fcmp ogt <2 x double> %shuffle, %x 554 %max = select <2 x i1> %mask, <2 x double> %x, <2 x double> %x1 555 ret <2 x double> %max 556 } 557 558 559 define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) nounwind { 560 ; SKX-LABEL: test40: 561 ; SKX: vcmpltps (%rdi){1to16}, %zmm0, %k1 562 ; SKX: vmovaps %zmm0, %zmm1 {%k1} 563 564 %a = load float, float* %ptr 565 %v = insertelement <16 x float> undef, float %a, i32 0 566 %shuffle = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 567 568 %mask = fcmp ogt <16 x float> %shuffle, %x 569 %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 570 ret <16 x float> %max 571 } 572 573 define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind { 574 ; SKX-LABEL: test41: 575 ; SKX: vcmpltps (%rdi){1to8}, %ymm0, %k1 576 ; SKX: vmovaps %ymm0, %ymm1 {%k1} 577 578 %a = load float, float* %ptr 579 %v = insertelement <8 x float> undef, float %a, i32 0 580 %shuffle = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 581 582 %mask = fcmp ogt <8 x float> %shuffle, %x 583 %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %x1 584 ret <8 x float> %max 585 } 586 587 define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) nounwind { 588 ; SKX-LABEL: test42: 589 ; SKX: vcmpltps (%rdi){1to4}, %xmm0, %k1 590 ; SKX: vmovaps %xmm0, %xmm1 {%k1} 591 592 %a = load float, float* %ptr 593 %v = insertelement <4 x float> undef, float %a, i32 0 594 %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 595 596 %mask = fcmp ogt <4 x float> %shuffle, %x 597 %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 598 ret <4 x float> %max 599 } 600 601 define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x i1> %mask_in) nounwind { 602 ; SKX-LABEL: test43: 603 ; SKX: vpmovw2m %xmm2, %k1 604 ; SKX: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} 605 ; SKX: vmovapd %zmm0, %zmm1 {%k1} 606 607 %a = load double, double* %ptr 608 %v = insertelement <8 x double> undef, double %a, i32 0 609 %shuffle = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> zeroinitializer 610 611 %mask_cmp = fcmp ogt <8 x double> %shuffle, %x 612 %mask = and <8 x i1> %mask_cmp, %mask_in 613 %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 614 ret <8 x double> %max 615 } 616