1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 3 ; RUN: llc < %s -stack-symbol-ordering=0 -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 4 5 define i16 @mask16(i16 %x) { 6 ; CHECK-LABEL: mask16: 7 ; CHECK: ## BB#0: 8 ; CHECK-NEXT: kmovw %edi, %k0 9 ; CHECK-NEXT: knotw %k0, %k0 10 ; CHECK-NEXT: kmovw %k0, %eax 11 ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 12 ; CHECK-NEXT: retq 13 %m0 = bitcast i16 %x to <16 x i1> 14 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 15 %ret = bitcast <16 x i1> %m1 to i16 16 ret i16 %ret 17 } 18 19 define i32 @mask16_zext(i16 %x) { 20 ; CHECK-LABEL: mask16_zext: 21 ; CHECK: ## BB#0: 22 ; CHECK-NEXT: kmovw %edi, %k0 23 ; CHECK-NEXT: knotw %k0, %k0 24 ; CHECK-NEXT: kmovw %k0, %eax 25 ; CHECK-NEXT: retq 26 %m0 = bitcast i16 %x to <16 x i1> 27 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 28 %m2 = bitcast <16 x i1> %m1 to i16 29 %ret = zext i16 %m2 to i32 30 ret i32 %ret 31 } 32 33 define i8 @mask8(i8 %x) { 34 ; KNL-LABEL: mask8: 35 ; KNL: ## BB#0: 36 ; KNL-NEXT: kmovw %edi, %k0 37 ; KNL-NEXT: knotw %k0, %k0 38 ; KNL-NEXT: kmovw %k0, %eax 39 ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 40 ; KNL-NEXT: retq 41 ; 42 ; SKX-LABEL: mask8: 43 ; SKX: ## BB#0: 44 ; SKX-NEXT: kmovb %edi, %k0 45 ; SKX-NEXT: knotb %k0, %k0 46 ; SKX-NEXT: kmovb %k0, %eax 47 ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 48 ; SKX-NEXT: retq 49 %m0 = bitcast i8 %x to <8 x i1> 50 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 51 %ret = bitcast <8 x i1> %m1 to i8 52 ret i8 %ret 53 } 54 55 define i32 @mask8_zext(i8 %x) { 56 ; KNL-LABEL: mask8_zext: 57 ; KNL: ## BB#0: 58 ; KNL-NEXT: kmovw %edi, %k0 59 ; KNL-NEXT: knotw %k0, %k0 60 ; KNL-NEXT: kmovw %k0, %eax 61 ; KNL-NEXT: retq 62 ; 63 ; SKX-LABEL: mask8_zext: 64 ; SKX: ## BB#0: 65 ; SKX-NEXT: kmovb %edi, %k0 66 ; SKX-NEXT: knotb %k0, %k0 67 ; SKX-NEXT: kmovb %k0, %eax 68 ; SKX-NEXT: retq 69 %m0 = bitcast i8 %x to <8 x i1> 70 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 71 %m2 = bitcast <8 x i1> %m1 to i8 72 %ret = zext i8 %m2 to i32 73 ret i32 %ret 74 } 75 76 define void @mask16_mem(i16* %ptr) { 77 ; CHECK-LABEL: mask16_mem: 78 ; CHECK: ## BB#0: 79 ; CHECK-NEXT: kmovw (%rdi), %k0 80 ; CHECK-NEXT: knotw %k0, %k0 81 ; CHECK-NEXT: kmovw %k0, (%rdi) 82 ; CHECK-NEXT: retq 83 %x = load i16, i16* %ptr, align 4 84 %m0 = bitcast i16 %x to <16 x i1> 85 %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 86 %ret = bitcast <16 x i1> %m1 to i16 87 store i16 %ret, i16* %ptr, align 4 88 ret void 89 } 90 91 define void @mask8_mem(i8* %ptr) { 92 ; KNL-LABEL: mask8_mem: 93 ; KNL: ## BB#0: 94 ; KNL-NEXT: movzbl (%rdi), %eax 95 ; KNL-NEXT: kmovw %eax, %k0 96 ; KNL-NEXT: knotw %k0, %k0 97 ; KNL-NEXT: kmovw %k0, %eax 98 ; KNL-NEXT: movb %al, (%rdi) 99 ; KNL-NEXT: retq 100 ; 101 ; SKX-LABEL: mask8_mem: 102 ; SKX: ## BB#0: 103 ; SKX-NEXT: kmovb (%rdi), %k0 104 ; SKX-NEXT: knotb %k0, %k0 105 ; SKX-NEXT: kmovb %k0, (%rdi) 106 ; SKX-NEXT: retq 107 %x = load i8, i8* %ptr, align 4 108 %m0 = bitcast i8 %x to <8 x i1> 109 %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1> 110 %ret = bitcast <8 x i1> %m1 to i8 111 store i8 %ret, i8* %ptr, align 4 112 ret void 113 } 114 115 define i16 @mand16(i16 %x, i16 %y) { 116 ; CHECK-LABEL: mand16: 117 ; CHECK: ## BB#0: 118 ; CHECK-NEXT: movl %edi, %eax 119 ; CHECK-NEXT: xorl %esi, %eax 120 ; CHECK-NEXT: andl %esi, %edi 121 ; CHECK-NEXT: orl %eax, %edi 122 ; CHECK-NEXT: movl %edi, %eax 123 ; CHECK-NEXT: retq 124 %ma = bitcast i16 %x to <16 x i1> 125 %mb = bitcast i16 %y to <16 x i1> 126 %mc = and <16 x i1> %ma, %mb 127 %md = xor <16 x i1> %ma, %mb 128 %me = or <16 x i1> %mc, %md 129 %ret = bitcast <16 x i1> %me to i16 130 ret i16 %ret 131 } 132 133 define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) { 134 ; CHECK-LABEL: mand16_mem: 135 ; CHECK: ## BB#0: 136 ; CHECK-NEXT: kmovw (%rdi), %k0 137 ; CHECK-NEXT: kmovw (%rsi), %k1 138 ; CHECK-NEXT: kandw %k1, %k0, %k2 139 ; CHECK-NEXT: kxorw %k1, %k0, %k0 140 ; CHECK-NEXT: korw %k0, %k2, %k0 141 ; CHECK-NEXT: kmovw %k0, %eax 142 ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 143 ; CHECK-NEXT: retq 144 %ma = load <16 x i1>, <16 x i1>* %x 145 %mb = load <16 x i1>, <16 x i1>* %y 146 %mc = and <16 x i1> %ma, %mb 147 %md = xor <16 x i1> %ma, %mb 148 %me = or <16 x i1> %mc, %md 149 %ret = bitcast <16 x i1> %me to i16 150 ret i16 %ret 151 } 152 153 define i8 @shuf_test1(i16 %v) nounwind { 154 ; KNL-LABEL: shuf_test1: 155 ; KNL: ## BB#0: 156 ; KNL-NEXT: kmovw %edi, %k0 157 ; KNL-NEXT: kshiftrw $8, %k0, %k0 158 ; KNL-NEXT: kmovw %k0, %eax 159 ; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 160 ; KNL-NEXT: retq 161 ; 162 ; SKX-LABEL: shuf_test1: 163 ; SKX: ## BB#0: 164 ; SKX-NEXT: kmovw %edi, %k0 165 ; SKX-NEXT: kshiftrw $8, %k0, %k0 166 ; SKX-NEXT: kmovb %k0, %eax 167 ; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 168 ; SKX-NEXT: retq 169 %v1 = bitcast i16 %v to <16 x i1> 170 %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 171 %mask1 = bitcast <8 x i1> %mask to i8 172 ret i8 %mask1 173 } 174 175 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) { 176 ; CHECK-LABEL: zext_test1: 177 ; CHECK: ## BB#0: 178 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 179 ; CHECK-NEXT: kshiftlw $10, %k0, %k0 180 ; CHECK-NEXT: kshiftrw $15, %k0, %k0 181 ; CHECK-NEXT: kmovw %k0, %eax 182 ; CHECK-NEXT: retq 183 %cmp_res = icmp ugt <16 x i32> %a, %b 184 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 185 %res = zext i1 %cmp_res.i1 to i32 186 ret i32 %res 187 } 188 189 define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) { 190 ; CHECK-LABEL: zext_test2: 191 ; CHECK: ## BB#0: 192 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 193 ; CHECK-NEXT: kshiftlw $10, %k0, %k0 194 ; CHECK-NEXT: kshiftrw $15, %k0, %k0 195 ; CHECK-NEXT: kmovw %k0, %eax 196 ; CHECK-NEXT: retq 197 %cmp_res = icmp ugt <16 x i32> %a, %b 198 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 199 %res = zext i1 %cmp_res.i1 to i16 200 ret i16 %res 201 } 202 203 define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { 204 ; CHECK-LABEL: zext_test3: 205 ; CHECK: ## BB#0: 206 ; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 207 ; CHECK-NEXT: kshiftlw $10, %k0, %k0 208 ; CHECK-NEXT: kshiftrw $15, %k0, %k0 209 ; CHECK-NEXT: kmovw %k0, %eax 210 ; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 211 ; CHECK-NEXT: retq 212 %cmp_res = icmp ugt <16 x i32> %a, %b 213 %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5 214 %res = zext i1 %cmp_res.i1 to i8 215 ret i8 %res 216 } 217 218 define i8 @conv1(<8 x i1>* %R) { 219 ; KNL-LABEL: conv1: 220 ; KNL: ## BB#0: ## %entry 221 ; KNL-NEXT: kxnorw %k0, %k0, %k0 222 ; KNL-NEXT: kmovw %k0, %eax 223 ; KNL-NEXT: movb %al, (%rdi) 224 ; KNL-NEXT: movb $-2, -{{[0-9]+}}(%rsp) 225 ; KNL-NEXT: movb $-2, %al 226 ; KNL-NEXT: retq 227 ; 228 ; SKX-LABEL: conv1: 229 ; SKX: ## BB#0: ## %entry 230 ; SKX-NEXT: kxnorw %k0, %k0, %k0 231 ; SKX-NEXT: kmovb %k0, (%rdi) 232 ; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) 233 ; SKX-NEXT: movb $-2, %al 234 ; SKX-NEXT: retq 235 entry: 236 store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R 237 238 %maskPtr = alloca <8 x i1> 239 store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr 240 %mask = load <8 x i1>, <8 x i1>* %maskPtr 241 %mask_convert = bitcast <8 x i1> %mask to i8 242 ret i8 %mask_convert 243 } 244 245 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) { 246 ; KNL-LABEL: test4: 247 ; KNL: ## BB#0: 248 ; KNL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 249 ; KNL-NEXT: vpmovqd %zmm0, %ymm0 250 ; KNL-NEXT: vpslld $31, %xmm0, %xmm0 251 ; KNL-NEXT: vpsrad $31, %xmm0, %xmm0 252 ; KNL-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 253 ; KNL-NEXT: vpmovqd %zmm1, %ymm1 254 ; KNL-NEXT: vpslld $31, %xmm1, %xmm1 255 ; KNL-NEXT: vpsrad $31, %xmm1, %xmm1 256 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 257 ; KNL-NEXT: retq 258 ; 259 ; SKX-LABEL: test4: 260 ; SKX: ## BB#0: 261 ; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 262 ; SKX-NEXT: knotw %k0, %k1 263 ; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 264 ; SKX-NEXT: vpmovm2d %k0, %xmm0 265 ; SKX-NEXT: retq 266 %x_gt_y = icmp sgt <4 x i64> %x, %y 267 %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1 268 %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1 269 %resse = sext <4 x i1>%res to <4 x i32> 270 ret <4 x i32> %resse 271 } 272 273 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) { 274 ; KNL-LABEL: test5: 275 ; KNL: ## BB#0: 276 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 277 ; KNL-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 278 ; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 279 ; KNL-NEXT: retq 280 ; 281 ; SKX-LABEL: test5: 282 ; SKX: ## BB#0: 283 ; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 284 ; SKX-NEXT: knotw %k0, %k1 285 ; SKX-NEXT: vpcmpgtq %xmm3, %xmm2, %k0 {%k1} 286 ; SKX-NEXT: vpmovm2q %k0, %xmm0 287 ; SKX-NEXT: retq 288 %x_gt_y = icmp slt <2 x i64> %x, %y 289 %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1 290 %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1 291 %resse = sext <2 x i1>%res to <2 x i64> 292 ret <2 x i64> %resse 293 }define void @test6(<16 x i1> %mask) { 294 allocas: 295 %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 296 %b = bitcast <16 x i1> %a to i16 297 %c = icmp eq i16 %b, 0 298 br i1 %c, label %true, label %false 299 300 true: 301 ret void 302 303 false: 304 ret void 305 } 306 define void @test7(<8 x i1> %mask) { 307 ; KNL-LABEL: test7: 308 ; KNL: ## BB#0: ## %allocas 309 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 310 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 311 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 312 ; KNL-NEXT: movb $85, %al 313 ; KNL-NEXT: kmovw %eax, %k1 314 ; KNL-NEXT: korw %k1, %k0, %k0 315 ; KNL-NEXT: kmovw %k0, %eax 316 ; KNL-NEXT: testb %al, %al 317 ; KNL-NEXT: retq 318 ; 319 ; SKX-LABEL: test7: 320 ; SKX: ## BB#0: ## %allocas 321 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 322 ; SKX-NEXT: vpmovw2m %xmm0, %k0 323 ; SKX-NEXT: movb $85, %al 324 ; SKX-NEXT: kmovb %eax, %k1 325 ; SKX-NEXT: korb %k1, %k0, %k0 326 ; SKX-NEXT: ktestb %k0, %k0 327 ; SKX-NEXT: retq 328 allocas: 329 %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false> 330 %b = bitcast <8 x i1> %a to i8 331 %c = icmp eq i8 %b, 0 332 br i1 %c, label %true, label %false 333 334 true: 335 ret void 336 337 false: 338 ret void 339 } 340 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { 341 ; KNL-LABEL: test8: 342 ; KNL: ## BB#0: 343 ; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2 344 ; KNL-NEXT: cmpl %esi, %edi 345 ; KNL-NEXT: jg LBB17_1 346 ; KNL-NEXT: ## BB#2: 347 ; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1 348 ; KNL-NEXT: jmp LBB17_3 349 ; KNL-NEXT: LBB17_1: 350 ; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 351 ; KNL-NEXT: LBB17_3: 352 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 353 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 354 ; KNL-NEXT: vpmovdb %zmm0, %xmm0 355 ; KNL-NEXT: retq 356 ; 357 ; SKX-LABEL: test8: 358 ; SKX: ## BB#0: 359 ; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2 360 ; SKX-NEXT: cmpl %esi, %edi 361 ; SKX-NEXT: jg LBB17_1 362 ; SKX-NEXT: ## BB#2: 363 ; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 364 ; SKX-NEXT: vpmovm2b %k0, %xmm0 365 ; SKX-NEXT: retq 366 ; SKX-NEXT: LBB17_1: 367 ; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 368 ; SKX-NEXT: vpmovm2b %k0, %xmm0 369 ; SKX-NEXT: retq 370 %cond = icmp sgt i32 %a1, %b1 371 %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer 372 %cmp2 = icmp ult <16 x i32> %b, zeroinitializer 373 %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2 374 %res = sext <16 x i1> %mix to <16 x i8> 375 ret <16 x i8> %res 376 } 377 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { 378 ; KNL-LABEL: test9: 379 ; KNL: ## BB#0: 380 ; KNL-NEXT: cmpl %esi, %edi 381 ; KNL-NEXT: jg LBB18_1 382 ; KNL-NEXT: ## BB#2: 383 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm0 384 ; KNL-NEXT: jmp LBB18_3 385 ; KNL-NEXT: LBB18_1: 386 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 387 ; KNL-NEXT: LBB18_3: 388 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 389 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 390 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 391 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 392 ; KNL-NEXT: vpmovdb %zmm0, %xmm0 393 ; KNL-NEXT: retq 394 ; 395 ; SKX-LABEL: test9: 396 ; SKX: ## BB#0: 397 ; SKX-NEXT: cmpl %esi, %edi 398 ; SKX-NEXT: jg LBB18_1 399 ; SKX-NEXT: ## BB#2: 400 ; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 401 ; SKX-NEXT: jmp LBB18_3 402 ; SKX-NEXT: LBB18_1: 403 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 404 ; SKX-NEXT: LBB18_3: 405 ; SKX-NEXT: vpmovb2m %xmm0, %k0 406 ; SKX-NEXT: vpmovm2b %k0, %xmm0 407 ; SKX-NEXT: retq 408 %mask = icmp sgt i32 %a1, %b1 409 %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b 410 ret <16 x i1>%c 411 }define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) { 412 %mask = icmp sgt i32 %a1, %b1 413 %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b 414 ret <8 x i1>%c 415 } 416 417 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { 418 ; KNL-LABEL: test11: 419 ; KNL: ## BB#0: 420 ; KNL-NEXT: cmpl %esi, %edi 421 ; KNL-NEXT: jg LBB20_2 422 ; KNL-NEXT: ## BB#1: 423 ; KNL-NEXT: vmovaps %zmm1, %zmm0 424 ; KNL-NEXT: LBB20_2: 425 ; KNL-NEXT: retq 426 ; 427 ; SKX-LABEL: test11: 428 ; SKX: ## BB#0: 429 ; SKX-NEXT: cmpl %esi, %edi 430 ; SKX-NEXT: jg LBB20_1 431 ; SKX-NEXT: ## BB#2: 432 ; SKX-NEXT: vpslld $31, %xmm1, %xmm0 433 ; SKX-NEXT: jmp LBB20_3 434 ; SKX-NEXT: LBB20_1: 435 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 436 ; SKX-NEXT: LBB20_3: 437 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 438 ; SKX-NEXT: vpmovm2d %k0, %xmm0 439 ; SKX-NEXT: retq 440 %mask = icmp sgt i32 %a1, %b1 441 %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b 442 ret <4 x i1>%c 443 } 444 445 define i32 @test12(i32 %x, i32 %y) { 446 ; CHECK-LABEL: test12: 447 ; CHECK: ## BB#0: 448 ; CHECK-NEXT: movl %edi, %eax 449 ; CHECK-NEXT: retq 450 %a = bitcast i16 21845 to <16 x i1> 451 %b = extractelement <16 x i1> %a, i32 0 452 %c = select i1 %b, i32 %x, i32 %y 453 ret i32 %c 454 } 455 456 define i32 @test13(i32 %x, i32 %y) { 457 ; CHECK-LABEL: test13: 458 ; CHECK: ## BB#0: 459 ; CHECK-NEXT: movl %esi, %eax 460 ; CHECK-NEXT: retq 461 %a = bitcast i16 21845 to <16 x i1> 462 %b = extractelement <16 x i1> %a, i32 3 463 %c = select i1 %b, i32 %x, i32 %y 464 ret i32 %c 465 }define <4 x i1> @test14() { 466 %a = bitcast i16 21845 to <16 x i1> 467 %b = extractelement <16 x i1> %a, i32 2 468 %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1 469 ret <4 x i1> %c 470 } 471 472 define <16 x i1> @test15(i32 %x, i32 %y) { 473 ; KNL-LABEL: test15: 474 ; KNL: ## BB#0: 475 ; KNL-NEXT: cmpl %esi, %edi 476 ; KNL-NEXT: movw $21845, %ax ## imm = 0x5555 477 ; KNL-NEXT: movw $1, %cx 478 ; KNL-NEXT: cmovgw %ax, %cx 479 ; KNL-NEXT: kmovw %ecx, %k1 480 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 481 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 482 ; KNL-NEXT: vpmovdb %zmm0, %xmm0 483 ; KNL-NEXT: retq 484 ; 485 ; SKX-LABEL: test15: 486 ; SKX: ## BB#0: 487 ; SKX-NEXT: cmpl %esi, %edi 488 ; SKX-NEXT: movw $21845, %ax ## imm = 0x5555 489 ; SKX-NEXT: movw $1, %cx 490 ; SKX-NEXT: cmovgw %ax, %cx 491 ; SKX-NEXT: kmovw %ecx, %k0 492 ; SKX-NEXT: vpmovm2b %k0, %xmm0 493 ; SKX-NEXT: retq 494 %a = bitcast i16 21845 to <16 x i1> 495 %b = bitcast i16 1 to <16 x i1> 496 %mask = icmp sgt i32 %x, %y 497 %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b 498 ret <16 x i1> %c 499 } 500 501 define <64 x i8> @test16(i64 %x) { 502 ; 503 ; KNL-LABEL: test16: 504 ; KNL: ## BB#0: 505 ; KNL-NEXT: pushq %rbp 506 ; KNL-NEXT: Ltmp0: 507 ; KNL-NEXT: .cfi_def_cfa_offset 16 508 ; KNL-NEXT: Ltmp1: 509 ; KNL-NEXT: .cfi_offset %rbp, -16 510 ; KNL-NEXT: movq %rsp, %rbp 511 ; KNL-NEXT: Ltmp2: 512 ; KNL-NEXT: .cfi_def_cfa_register %rbp 513 ; KNL-NEXT: andq $-32, %rsp 514 ; KNL-NEXT: subq $64, %rsp 515 ; KNL-NEXT: movl %edi, (%rsp) 516 ; KNL-NEXT: shrq $32, %rdi 517 ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) 518 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 519 ; KNL-NEXT: kmovw (%rsp), %k1 520 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} 521 ; KNL-NEXT: vpmovdb %zmm1, %xmm1 522 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 523 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm2 {%k1} {z} 524 ; KNL-NEXT: vpmovdb %zmm2, %xmm2 525 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm2 526 ; KNL-NEXT: movl $1, %eax 527 ; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 528 ; KNL-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4,5,6,7] 529 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 530 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} {z} 531 ; KNL-NEXT: vpmovdb %zmm1, %xmm1 532 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 533 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 534 ; KNL-NEXT: vpmovdb %zmm0, %xmm0 535 ; KNL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 536 ; KNL-NEXT: vpsllw $7, %ymm2, %ymm0 537 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 538 ; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 539 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 540 ; KNL-NEXT: movq %rbp, %rsp 541 ; KNL-NEXT: popq %rbp 542 ; KNL-NEXT: retq 543 ; 544 ; SKX-LABEL: test16: 545 ; SKX: ## BB#0: 546 ; SKX-NEXT: kmovq %rdi, %k0 547 ; SKX-NEXT: kxnorw %k0, %k0, %k1 548 ; SKX-NEXT: kshiftrw $15, %k1, %k1 549 ; SKX-NEXT: kshiftlq $5, %k1, %k1 550 ; SKX-NEXT: korq %k1, %k0, %k0 551 ; SKX-NEXT: vpmovm2b %k0, %zmm0 552 ; SKX-NEXT: retq 553 %a = bitcast i64 %x to <64 x i1> 554 %b = insertelement <64 x i1>%a, i1 true, i32 5 555 %c = sext <64 x i1>%b to <64 x i8> 556 ret <64 x i8>%c 557 } 558 559 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { 560 ; 561 ; KNL-LABEL: test17: 562 ; KNL: ## BB#0: 563 ; KNL-NEXT: pushq %rbp 564 ; KNL-NEXT: Ltmp3: 565 ; KNL-NEXT: .cfi_def_cfa_offset 16 566 ; KNL-NEXT: Ltmp4: 567 ; KNL-NEXT: .cfi_offset %rbp, -16 568 ; KNL-NEXT: movq %rsp, %rbp 569 ; KNL-NEXT: Ltmp5: 570 ; KNL-NEXT: .cfi_def_cfa_register %rbp 571 ; KNL-NEXT: andq $-32, %rsp 572 ; KNL-NEXT: subq $64, %rsp 573 ; KNL-NEXT: movl %edi, (%rsp) 574 ; KNL-NEXT: shrq $32, %rdi 575 ; KNL-NEXT: movl %edi, {{[0-9]+}}(%rsp) 576 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 577 ; KNL-NEXT: kmovw (%rsp), %k1 578 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 579 ; KNL-NEXT: vpmovdb %zmm0, %xmm0 580 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 581 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} 582 ; KNL-NEXT: vpmovdb %zmm2, %xmm2 583 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 584 ; KNL-NEXT: xorl %eax, %eax 585 ; KNL-NEXT: cmpl %edx, %esi 586 ; KNL-NEXT: setg %al 587 ; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 588 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] 589 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 590 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 591 ; KNL-NEXT: vpxor %ymm2, %ymm2, %ymm2 592 ; KNL-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 593 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 594 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} 595 ; KNL-NEXT: vpmovdb %zmm2, %xmm2 596 ; KNL-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 597 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 598 ; KNL-NEXT: vpmovdb %zmm1, %xmm1 599 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 600 ; KNL-NEXT: movq %rbp, %rsp 601 ; KNL-NEXT: popq %rbp 602 ; KNL-NEXT: retq 603 ; 604 ; SKX-LABEL: test17: 605 ; SKX: ## BB#0: 606 ; SKX-NEXT: kmovq %rdi, %k0 607 ; SKX-NEXT: cmpl %edx, %esi 608 ; SKX-NEXT: setg %al 609 ; SKX-NEXT: kmovw %eax, %k1 610 ; SKX-NEXT: kshiftlq $5, %k1, %k1 611 ; SKX-NEXT: korq %k1, %k0, %k0 612 ; SKX-NEXT: vpmovm2b %k0, %zmm0 613 ; SKX-NEXT: retq 614 %a = bitcast i64 %x to <64 x i1> 615 %b = icmp sgt i32 %y, %z 616 %c = insertelement <64 x i1>%a, i1 %b, i32 5 617 %d = sext <64 x i1>%c to <64 x i8> 618 ret <64 x i8>%d 619 } 620 621 define <8 x i1> @test18(i8 %a, i16 %y) { 622 ; KNL-LABEL: test18: 623 ; KNL: ## BB#0: 624 ; KNL-NEXT: kmovw %edi, %k0 625 ; KNL-NEXT: kmovw %esi, %k1 626 ; KNL-NEXT: kshiftlw $7, %k1, %k2 627 ; KNL-NEXT: kshiftrw $15, %k2, %k2 628 ; KNL-NEXT: kshiftlw $6, %k1, %k1 629 ; KNL-NEXT: kshiftrw $15, %k1, %k1 630 ; KNL-NEXT: kshiftlw $6, %k1, %k1 631 ; KNL-NEXT: korw %k1, %k0, %k0 632 ; KNL-NEXT: kshiftlw $7, %k2, %k1 633 ; KNL-NEXT: korw %k1, %k0, %k1 634 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 635 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 636 ; KNL-NEXT: vpmovqw %zmm0, %xmm0 637 ; KNL-NEXT: retq 638 ; 639 ; SKX-LABEL: test18: 640 ; SKX: ## BB#0: 641 ; SKX-NEXT: kmovb %edi, %k0 642 ; SKX-NEXT: kmovw %esi, %k1 643 ; SKX-NEXT: kshiftlw $6, %k1, %k2 644 ; SKX-NEXT: kshiftrw $15, %k2, %k2 645 ; SKX-NEXT: kshiftlw $7, %k1, %k1 646 ; SKX-NEXT: kshiftrw $15, %k1, %k1 647 ; SKX-NEXT: kshiftlb $7, %k1, %k1 648 ; SKX-NEXT: kshiftlb $6, %k2, %k2 649 ; SKX-NEXT: korb %k2, %k0, %k0 650 ; SKX-NEXT: korb %k1, %k0, %k0 651 ; SKX-NEXT: vpmovm2w %k0, %xmm0 652 ; SKX-NEXT: retq 653 %b = bitcast i8 %a to <8 x i1> 654 %b1 = bitcast i16 %y to <16 x i1> 655 %el1 = extractelement <16 x i1>%b1, i32 8 656 %el2 = extractelement <16 x i1>%b1, i32 9 657 %c = insertelement <8 x i1>%b, i1 %el1, i32 7 658 %d = insertelement <8 x i1>%c, i1 %el2, i32 6 659 ret <8 x i1>%d 660 } 661 define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone { 662 ; KNL-LABEL: test21: 663 ; KNL: ## BB#0: 664 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 665 ; KNL-NEXT: vpsllw $15, %ymm3, %ymm3 666 ; KNL-NEXT: vpsraw $15, %ymm3, %ymm3 667 ; KNL-NEXT: vpand %ymm0, %ymm3, %ymm0 668 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2 669 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 670 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 671 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 672 ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 673 ; KNL-NEXT: retq 674 ; 675 ; SKX-LABEL: test21: 676 ; SKX: ## BB#0: 677 ; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 678 ; SKX-NEXT: vpmovb2m %ymm1, %k1 679 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 680 ; SKX-NEXT: retq 681 %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer 682 ret <32 x i16> %ret 683 } 684 685 define void @test22(<4 x i1> %a, <4 x i1>* %addr) { 686 ; KNL-LABEL: test22: 687 ; KNL: ## BB#0: 688 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def> 689 ; KNL-NEXT: vpslld $31, %ymm0, %ymm0 690 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 691 ; KNL-NEXT: kmovw %k0, %eax 692 ; KNL-NEXT: movb %al, (%rdi) 693 ; KNL-NEXT: retq 694 ; 695 ; SKX-LABEL: test22: 696 ; SKX: ## BB#0: 697 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 698 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 699 ; SKX-NEXT: kmovb %k0, (%rdi) 700 ; SKX-NEXT: retq 701 store <4 x i1> %a, <4 x i1>* %addr 702 ret void 703 } 704 705 define void @test23(<2 x i1> %a, <2 x i1>* %addr) { 706 ; KNL-LABEL: test23: 707 ; KNL: ## BB#0: 708 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 709 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 710 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 711 ; KNL-NEXT: kmovw %k0, %eax 712 ; KNL-NEXT: movb %al, (%rdi) 713 ; KNL-NEXT: retq 714 ; 715 ; SKX-LABEL: test23: 716 ; SKX: ## BB#0: 717 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 718 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 719 ; SKX-NEXT: kmovb %k0, (%rdi) 720 ; SKX-NEXT: retq 721 store <2 x i1> %a, <2 x i1>* %addr 722 ret void 723 } 724 725 define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { 726 ; KNL-LABEL: store_v1i1: 727 ; KNL: ## BB#0: 728 ; KNL-NEXT: andl $1, %edi 729 ; KNL-NEXT: kmovw %edi, %k0 730 ; KNL-NEXT: kxnorw %k0, %k0, %k1 731 ; KNL-NEXT: kshiftrw $15, %k1, %k1 732 ; KNL-NEXT: kxorw %k1, %k0, %k0 733 ; KNL-NEXT: kmovw %k0, %eax 734 ; KNL-NEXT: movb %al, (%rsi) 735 ; KNL-NEXT: retq 736 ; 737 ; SKX-LABEL: store_v1i1: 738 ; SKX: ## BB#0: 739 ; SKX-NEXT: andl $1, %edi 740 ; SKX-NEXT: kmovw %edi, %k0 741 ; SKX-NEXT: kxnorw %k0, %k0, %k1 742 ; SKX-NEXT: kshiftrw $15, %k1, %k1 743 ; SKX-NEXT: kxorw %k1, %k0, %k0 744 ; SKX-NEXT: kmovb %k0, (%rsi) 745 ; SKX-NEXT: retq 746 %x = xor <1 x i1> %c, <i1 1> 747 store <1 x i1> %x, <1 x i1>* %ptr, align 4 748 ret void 749 } 750 751 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { 752 ; KNL-LABEL: store_v2i1: 753 ; KNL: ## BB#0: 754 ; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 755 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 756 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 757 ; KNL-NEXT: kmovw %k0, %eax 758 ; KNL-NEXT: movb %al, (%rdi) 759 ; KNL-NEXT: retq 760 ; 761 ; SKX-LABEL: store_v2i1: 762 ; SKX: ## BB#0: 763 ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 764 ; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 765 ; SKX-NEXT: knotw %k0, %k0 766 ; SKX-NEXT: kmovb %k0, (%rdi) 767 ; SKX-NEXT: retq 768 %x = xor <2 x i1> %c, <i1 1, i1 1> 769 store <2 x i1> %x, <2 x i1>* %ptr, align 4 770 ret void 771 } 772 773 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { 774 ; KNL-LABEL: store_v4i1: 775 ; KNL: ## BB#0: 776 ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 777 ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 778 ; KNL-NEXT: vpslld $31, %ymm0, %ymm0 779 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 780 ; KNL-NEXT: kmovw %k0, %eax 781 ; KNL-NEXT: movb %al, (%rdi) 782 ; KNL-NEXT: retq 783 ; 784 ; SKX-LABEL: store_v4i1: 785 ; SKX: ## BB#0: 786 ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 787 ; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 788 ; SKX-NEXT: knotw %k0, %k0 789 ; SKX-NEXT: kmovb %k0, (%rdi) 790 ; SKX-NEXT: retq 791 %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1> 792 store <4 x i1> %x, <4 x i1>* %ptr, align 4 793 ret void 794 } 795 796 define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { 797 ; KNL-LABEL: store_v8i1: 798 ; KNL: ## BB#0: 799 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 800 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 801 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 802 ; KNL-NEXT: knotw %k0, %k0 803 ; KNL-NEXT: kmovw %k0, %eax 804 ; KNL-NEXT: movb %al, (%rdi) 805 ; KNL-NEXT: retq 806 ; 807 ; SKX-LABEL: store_v8i1: 808 ; SKX: ## BB#0: 809 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 810 ; SKX-NEXT: vpmovw2m %xmm0, %k0 811 ; SKX-NEXT: knotb %k0, %k0 812 ; SKX-NEXT: kmovb %k0, (%rdi) 813 ; SKX-NEXT: retq 814 %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 815 store <8 x i1> %x, <8 x i1>* %ptr, align 4 816 ret void 817 } 818 819 define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { 820 ; KNL-LABEL: store_v16i1: 821 ; KNL: ## BB#0: 822 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 823 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 824 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 825 ; KNL-NEXT: knotw %k0, %k0 826 ; KNL-NEXT: kmovw %k0, (%rdi) 827 ; KNL-NEXT: retq 828 ; 829 ; SKX-LABEL: store_v16i1: 830 ; SKX: ## BB#0: 831 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 832 ; SKX-NEXT: vpmovb2m %xmm0, %k0 833 ; SKX-NEXT: knotw %k0, %k0 834 ; SKX-NEXT: kmovw %k0, (%rdi) 835 ; SKX-NEXT: retq 836 %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1> 837 store <16 x i1> %x, <16 x i1>* %ptr, align 4 838 ret void 839 } 840 841 ;void f2(int); 842 ;void f1(int c) 843 ;{ 844 ; static int v = 0; 845 ; if (v == 0) 846 ; v = 1; 847 ; else 848 ; v = 0; 849 ; f2(v); 850 ;} 851 852 @f1.v = internal unnamed_addr global i1 false, align 4 853 854 define void @f1(i32 %c) { 855 ; KNL-LABEL: f1: 856 ; KNL: ## BB#0: ## %entry 857 ; KNL-NEXT: movzbl {{.*}}(%rip), %edi 858 ; KNL-NEXT: movl %edi, %eax 859 ; KNL-NEXT: andl $1, %eax 860 ; KNL-NEXT: kmovw %eax, %k0 861 ; KNL-NEXT: kxnorw %k0, %k0, %k1 862 ; KNL-NEXT: kshiftrw $15, %k1, %k1 863 ; KNL-NEXT: kxorw %k1, %k0, %k0 864 ; KNL-NEXT: kmovw %k0, %eax 865 ; KNL-NEXT: movb %al, {{.*}}(%rip) 866 ; KNL-NEXT: xorl $1, %edi 867 ; KNL-NEXT: jmp _f2 ## TAILCALL 868 ; 869 ; SKX-LABEL: f1: 870 ; SKX: ## BB#0: ## %entry 871 ; SKX-NEXT: movzbl {{.*}}(%rip), %edi 872 ; SKX-NEXT: movl %edi, %eax 873 ; SKX-NEXT: andl $1, %eax 874 ; SKX-NEXT: kmovw %eax, %k0 875 ; SKX-NEXT: kxnorw %k0, %k0, %k1 876 ; SKX-NEXT: kshiftrw $15, %k1, %k1 877 ; SKX-NEXT: kxorw %k1, %k0, %k0 878 ; SKX-NEXT: kmovb %k0, {{.*}}(%rip) 879 ; SKX-NEXT: xorl $1, %edi 880 ; SKX-NEXT: jmp _f2 ## TAILCALL 881 entry: 882 %.b1 = load i1, i1* @f1.v, align 4 883 %not..b1 = xor i1 %.b1, true 884 store i1 %not..b1, i1* @f1.v, align 4 885 %0 = zext i1 %not..b1 to i32 886 tail call void @f2(i32 %0) #2 887 ret void 888 } 889 890 declare void @f2(i32) #1 891 892 define void @store_i16_i1(i16 %x, i1 *%y) { 893 ; CHECK-LABEL: store_i16_i1: 894 ; CHECK: ## BB#0: 895 ; CHECK-NEXT: andl $1, %edi 896 ; CHECK-NEXT: movb %dil, (%rsi) 897 ; CHECK-NEXT: retq 898 %c = trunc i16 %x to i1 899 store i1 %c, i1* %y 900 ret void 901 } 902 903 define void @store_i8_i1(i8 %x, i1 *%y) { 904 ; CHECK-LABEL: store_i8_i1: 905 ; CHECK: ## BB#0: 906 ; CHECK-NEXT: andl $1, %edi 907 ; CHECK-NEXT: movb %dil, (%rsi) 908 ; CHECK-NEXT: retq 909 %c = trunc i8 %x to i1 910 store i1 %c, i1* %y 911 ret void 912 } 913 914 define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) { 915 ; KNL-LABEL: test_build_vec_v32i1: 916 ; KNL: ## BB#0: 917 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 918 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 919 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 920 ; KNL-NEXT: vpand %ymm0, %ymm2, %ymm0 921 ; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 922 ; KNL-NEXT: vpsllw $15, %ymm2, %ymm2 923 ; KNL-NEXT: vpsraw $15, %ymm2, %ymm2 924 ; KNL-NEXT: vpand %ymm1, %ymm2, %ymm1 925 ; KNL-NEXT: retq 926 ; 927 ; SKX-LABEL: test_build_vec_v32i1: 928 ; SKX: ## BB#0: 929 ; SKX-NEXT: movl $1497715861, %eax ## imm = 0x59455495 930 ; SKX-NEXT: kmovd %eax, %k1 931 ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 932 ; SKX-NEXT: retq 933 %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer 934 ret <32 x i16> %ret 935 } 936 937 define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { 938 ; KNL-LABEL: test_build_vec_v64i1: 939 ; KNL: ## BB#0: 940 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 941 ; KNL-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1 942 ; KNL-NEXT: retq 943 ; 944 ; SKX-LABEL: test_build_vec_v64i1: 945 ; SKX: ## BB#0: 946 ; SKX-NEXT: movabsq $6432645796886517060, %rax ## imm = 0x5945594549549544 947 ; SKX-NEXT: kmovq %rax, %k1 948 ; SKX-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} 949 ; SKX-NEXT: retq 950 %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer 951 ret <64 x i8> %ret 952 } 953 954 define void @ktest_1(<8 x double> %in, double * %base) { 955 ; KNL-LABEL: ktest_1: 956 ; KNL: ## BB#0: 957 ; KNL-NEXT: vmovupd (%rdi), %zmm1 958 ; KNL-NEXT: vcmpltpd %zmm0, %zmm1, %k1 959 ; KNL-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} 960 ; KNL-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} 961 ; KNL-NEXT: kmovw %k0, %eax 962 ; KNL-NEXT: testb %al, %al 963 ; KNL-NEXT: je LBB41_2 964 ; KNL-NEXT: ## BB#1: ## %L1 965 ; KNL-NEXT: vmovapd %zmm0, (%rdi) 966 ; KNL-NEXT: retq 967 ; KNL-NEXT: LBB41_2: ## %L2 968 ; KNL-NEXT: vmovapd %zmm0, 8(%rdi) 969 ; KNL-NEXT: retq 970 ; 971 ; SKX-LABEL: ktest_1: 972 ; SKX: ## BB#0: 973 ; SKX-NEXT: vmovupd (%rdi), %zmm1 974 ; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 975 ; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} 976 ; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} 977 ; SKX-NEXT: ktestb %k0, %k0 978 ; SKX-NEXT: je LBB41_2 979 ; SKX-NEXT: ## BB#1: ## %L1 980 ; SKX-NEXT: vmovapd %zmm0, (%rdi) 981 ; SKX-NEXT: retq 982 ; SKX-NEXT: LBB41_2: ## %L2 983 ; SKX-NEXT: vmovapd %zmm0, 8(%rdi) 984 ; SKX-NEXT: retq 985 %addr1 = getelementptr double, double * %base, i64 0 986 %addr2 = getelementptr double, double * %base, i64 1 987 988 %vaddr1 = bitcast double* %addr1 to <8 x double>* 989 %vaddr2 = bitcast double* %addr2 to <8 x double>* 990 991 %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1 992 %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1 993 994 %sel1 = fcmp ogt <8 x double>%in, %val1 995 %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer 996 %sel2 = fcmp olt <8 x double> %in, %val3 997 %sel3 = and <8 x i1> %sel1, %sel2 998 999 %int_sel3 = bitcast <8 x i1> %sel3 to i8 1000 %res = icmp eq i8 %int_sel3, zeroinitializer 1001 br i1 %res, label %L2, label %L1 1002 L1: 1003 store <8 x double> %in, <8 x double>* %vaddr1 1004 br label %End 1005 L2: 1006 store <8 x double> %in, <8 x double>* %vaddr2 1007 br label %End 1008 End: 1009 ret void 1010 } 1011 1012 define void @ktest_2(<32 x float> %in, float * %base) { 1013 ; 1014 ; KNL-LABEL: ktest_2: 1015 ; KNL: ## BB#0: 1016 ; KNL-NEXT: pushq %rbp 1017 ; KNL-NEXT: Ltmp6: 1018 ; KNL-NEXT: .cfi_def_cfa_offset 16 1019 ; KNL-NEXT: Ltmp7: 1020 ; KNL-NEXT: .cfi_offset %rbp, -16 1021 ; KNL-NEXT: movq %rsp, %rbp 1022 ; KNL-NEXT: Ltmp8: 1023 ; KNL-NEXT: .cfi_def_cfa_register %rbp 1024 ; KNL-NEXT: andq $-32, %rsp 1025 ; KNL-NEXT: subq $32, %rsp 1026 ; KNL-NEXT: vmovups (%rdi), %zmm2 1027 ; KNL-NEXT: vmovups 64(%rdi), %zmm3 1028 ; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 1029 ; KNL-NEXT: kshiftlw $14, %k1, %k0 1030 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1031 ; KNL-NEXT: kmovw %k0, %eax 1032 ; KNL-NEXT: kshiftlw $15, %k1, %k0 1033 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1034 ; KNL-NEXT: kmovw %k0, %ecx 1035 ; KNL-NEXT: vmovd %ecx, %xmm3 1036 ; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 1037 ; KNL-NEXT: kshiftlw $13, %k1, %k0 1038 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1039 ; KNL-NEXT: kmovw %k0, %eax 1040 ; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 1041 ; KNL-NEXT: kshiftlw $12, %k1, %k0 1042 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1043 ; KNL-NEXT: kmovw %k0, %eax 1044 ; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 1045 ; KNL-NEXT: kshiftlw $11, %k1, %k0 1046 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1047 ; KNL-NEXT: kmovw %k0, %eax 1048 ; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 1049 ; KNL-NEXT: kshiftlw $10, %k1, %k0 1050 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1051 ; KNL-NEXT: kmovw %k0, %eax 1052 ; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 1053 ; KNL-NEXT: kshiftlw $9, %k1, %k0 1054 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1055 ; KNL-NEXT: kmovw %k0, %eax 1056 ; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 1057 ; KNL-NEXT: kshiftlw $8, %k1, %k0 1058 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1059 ; KNL-NEXT: kmovw %k0, %eax 1060 ; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 1061 ; KNL-NEXT: kshiftlw $7, %k1, %k0 1062 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1063 ; KNL-NEXT: kmovw %k0, %eax 1064 ; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 1065 ; KNL-NEXT: kshiftlw $6, %k1, %k0 1066 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1067 ; KNL-NEXT: kmovw %k0, %eax 1068 ; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 1069 ; KNL-NEXT: kshiftlw $5, %k1, %k0 1070 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1071 ; KNL-NEXT: kmovw %k0, %eax 1072 ; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 1073 ; KNL-NEXT: kshiftlw $4, %k1, %k0 1074 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1075 ; KNL-NEXT: kmovw %k0, %eax 1076 ; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 1077 ; KNL-NEXT: kshiftlw $3, %k1, %k0 1078 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1079 ; KNL-NEXT: kmovw %k0, %eax 1080 ; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 1081 ; KNL-NEXT: kshiftlw $2, %k1, %k0 1082 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1083 ; KNL-NEXT: kmovw %k0, %eax 1084 ; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 1085 ; KNL-NEXT: kshiftlw $1, %k1, %k0 1086 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1087 ; KNL-NEXT: kmovw %k0, %eax 1088 ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1089 ; KNL-NEXT: kshiftlw $0, %k1, %k0 1090 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1091 ; KNL-NEXT: kmovw %k0, %eax 1092 ; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 1093 ; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 1094 ; KNL-NEXT: kshiftlw $14, %k2, %k0 1095 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1096 ; KNL-NEXT: kmovw %k0, %eax 1097 ; KNL-NEXT: kshiftlw $15, %k2, %k0 1098 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1099 ; KNL-NEXT: kmovw %k0, %ecx 1100 ; KNL-NEXT: vmovd %ecx, %xmm2 1101 ; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 1102 ; KNL-NEXT: kshiftlw $13, %k2, %k0 1103 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1104 ; KNL-NEXT: kmovw %k0, %eax 1105 ; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 1106 ; KNL-NEXT: kshiftlw $12, %k2, %k0 1107 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1108 ; KNL-NEXT: kmovw %k0, %eax 1109 ; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 1110 ; KNL-NEXT: kshiftlw $11, %k2, %k0 1111 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1112 ; KNL-NEXT: kmovw %k0, %eax 1113 ; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 1114 ; KNL-NEXT: kshiftlw $10, %k2, %k0 1115 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1116 ; KNL-NEXT: kmovw %k0, %eax 1117 ; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 1118 ; KNL-NEXT: kshiftlw $9, %k2, %k0 1119 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1120 ; KNL-NEXT: kmovw %k0, %eax 1121 ; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 1122 ; KNL-NEXT: kshiftlw $8, %k2, %k0 1123 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1124 ; KNL-NEXT: kmovw %k0, %eax 1125 ; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 1126 ; KNL-NEXT: kshiftlw $7, %k2, %k0 1127 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1128 ; KNL-NEXT: kmovw %k0, %eax 1129 ; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 1130 ; KNL-NEXT: kshiftlw $6, %k2, %k0 1131 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1132 ; KNL-NEXT: kmovw %k0, %eax 1133 ; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 1134 ; KNL-NEXT: kshiftlw $5, %k2, %k0 1135 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1136 ; KNL-NEXT: kmovw %k0, %eax 1137 ; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 1138 ; KNL-NEXT: kshiftlw $4, %k2, %k0 1139 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1140 ; KNL-NEXT: kmovw %k0, %eax 1141 ; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 1142 ; KNL-NEXT: kshiftlw $3, %k2, %k0 1143 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1144 ; KNL-NEXT: kmovw %k0, %eax 1145 ; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 1146 ; KNL-NEXT: kshiftlw $2, %k2, %k0 1147 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1148 ; KNL-NEXT: kmovw %k0, %eax 1149 ; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 1150 ; KNL-NEXT: kshiftlw $1, %k2, %k0 1151 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1152 ; KNL-NEXT: kmovw %k0, %eax 1153 ; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 1154 ; KNL-NEXT: kshiftlw $0, %k2, %k0 1155 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1156 ; KNL-NEXT: kmovw %k0, %eax 1157 ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 1158 ; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 1159 ; KNL-NEXT: vpsllw $7, %ymm2, %ymm2 1160 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 1161 ; KNL-NEXT: vpxor %ymm3, %ymm3, %ymm3 1162 ; KNL-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2 1163 ; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z} 1164 ; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z} 1165 ; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0 1166 ; KNL-NEXT: kshiftlw $14, %k0, %k1 1167 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1168 ; KNL-NEXT: kmovw %k1, %eax 1169 ; KNL-NEXT: kshiftlw $15, %k0, %k1 1170 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1171 ; KNL-NEXT: kmovw %k1, %ecx 1172 ; KNL-NEXT: vmovd %ecx, %xmm4 1173 ; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 1174 ; KNL-NEXT: kshiftlw $13, %k0, %k1 1175 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1176 ; KNL-NEXT: kmovw %k1, %eax 1177 ; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 1178 ; KNL-NEXT: kshiftlw $12, %k0, %k1 1179 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1180 ; KNL-NEXT: kmovw %k1, %eax 1181 ; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 1182 ; KNL-NEXT: kshiftlw $11, %k0, %k1 1183 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1184 ; KNL-NEXT: kmovw %k1, %eax 1185 ; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 1186 ; KNL-NEXT: kshiftlw $10, %k0, %k1 1187 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1188 ; KNL-NEXT: kmovw %k1, %eax 1189 ; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 1190 ; KNL-NEXT: kshiftlw $9, %k0, %k1 1191 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1192 ; KNL-NEXT: kmovw %k1, %eax 1193 ; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 1194 ; KNL-NEXT: kshiftlw $8, %k0, %k1 1195 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1196 ; KNL-NEXT: kmovw %k1, %eax 1197 ; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 1198 ; KNL-NEXT: kshiftlw $7, %k0, %k1 1199 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1200 ; KNL-NEXT: kmovw %k1, %eax 1201 ; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 1202 ; KNL-NEXT: kshiftlw $6, %k0, %k1 1203 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1204 ; KNL-NEXT: kmovw %k1, %eax 1205 ; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 1206 ; KNL-NEXT: kshiftlw $5, %k0, %k1 1207 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1208 ; KNL-NEXT: kmovw %k1, %eax 1209 ; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 1210 ; KNL-NEXT: kshiftlw $4, %k0, %k1 1211 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1212 ; KNL-NEXT: kmovw %k1, %eax 1213 ; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 1214 ; KNL-NEXT: kshiftlw $3, %k0, %k1 1215 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1216 ; KNL-NEXT: kmovw %k1, %eax 1217 ; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 1218 ; KNL-NEXT: kshiftlw $2, %k0, %k1 1219 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1220 ; KNL-NEXT: kmovw %k1, %eax 1221 ; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 1222 ; KNL-NEXT: kshiftlw $1, %k0, %k1 1223 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1224 ; KNL-NEXT: kmovw %k1, %eax 1225 ; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 1226 ; KNL-NEXT: kshiftlw $0, %k0, %k0 1227 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1228 ; KNL-NEXT: kmovw %k0, %eax 1229 ; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 1230 ; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 1231 ; KNL-NEXT: kshiftlw $14, %k0, %k1 1232 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1233 ; KNL-NEXT: kmovw %k1, %eax 1234 ; KNL-NEXT: kshiftlw $15, %k0, %k1 1235 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1236 ; KNL-NEXT: kmovw %k1, %ecx 1237 ; KNL-NEXT: vmovd %ecx, %xmm3 1238 ; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 1239 ; KNL-NEXT: kshiftlw $13, %k0, %k1 1240 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1241 ; KNL-NEXT: kmovw %k1, %eax 1242 ; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 1243 ; KNL-NEXT: kshiftlw $12, %k0, %k1 1244 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1245 ; KNL-NEXT: kmovw %k1, %eax 1246 ; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 1247 ; KNL-NEXT: kshiftlw $11, %k0, %k1 1248 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1249 ; KNL-NEXT: kmovw %k1, %eax 1250 ; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 1251 ; KNL-NEXT: kshiftlw $10, %k0, %k1 1252 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1253 ; KNL-NEXT: kmovw %k1, %eax 1254 ; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 1255 ; KNL-NEXT: kshiftlw $9, %k0, %k1 1256 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1257 ; KNL-NEXT: kmovw %k1, %eax 1258 ; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 1259 ; KNL-NEXT: kshiftlw $8, %k0, %k1 1260 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1261 ; KNL-NEXT: kmovw %k1, %eax 1262 ; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 1263 ; KNL-NEXT: kshiftlw $7, %k0, %k1 1264 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1265 ; KNL-NEXT: kmovw %k1, %eax 1266 ; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 1267 ; KNL-NEXT: kshiftlw $6, %k0, %k1 1268 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1269 ; KNL-NEXT: kmovw %k1, %eax 1270 ; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 1271 ; KNL-NEXT: kshiftlw $5, %k0, %k1 1272 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1273 ; KNL-NEXT: kmovw %k1, %eax 1274 ; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 1275 ; KNL-NEXT: kshiftlw $4, %k0, %k1 1276 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1277 ; KNL-NEXT: kmovw %k1, %eax 1278 ; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 1279 ; KNL-NEXT: kshiftlw $3, %k0, %k1 1280 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1281 ; KNL-NEXT: kmovw %k1, %eax 1282 ; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 1283 ; KNL-NEXT: kshiftlw $2, %k0, %k1 1284 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1285 ; KNL-NEXT: kmovw %k1, %eax 1286 ; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 1287 ; KNL-NEXT: kshiftlw $1, %k0, %k1 1288 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1289 ; KNL-NEXT: kmovw %k1, %eax 1290 ; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1291 ; KNL-NEXT: kshiftlw $0, %k0, %k0 1292 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1293 ; KNL-NEXT: kmovw %k0, %eax 1294 ; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 1295 ; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 1296 ; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2 1297 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 1298 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 1299 ; KNL-NEXT: vpslld $31, %zmm3, %zmm3 1300 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 1301 ; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) 1302 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 1303 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1304 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 1305 ; KNL-NEXT: kmovw %k0, (%rsp) 1306 ; KNL-NEXT: cmpl $0, (%rsp) 1307 ; KNL-NEXT: je LBB42_2 1308 ; KNL-NEXT: ## BB#1: ## %L1 1309 ; KNL-NEXT: vmovaps %zmm0, (%rdi) 1310 ; KNL-NEXT: vmovaps %zmm1, 64(%rdi) 1311 ; KNL-NEXT: jmp LBB42_3 1312 ; KNL-NEXT: LBB42_2: ## %L2 1313 ; KNL-NEXT: vmovaps %zmm0, 4(%rdi) 1314 ; KNL-NEXT: vmovaps %zmm1, 68(%rdi) 1315 ; KNL-NEXT: LBB42_3: ## %End 1316 ; KNL-NEXT: movq %rbp, %rsp 1317 ; KNL-NEXT: popq %rbp 1318 ; KNL-NEXT: retq 1319 ; 1320 ; SKX-LABEL: ktest_2: 1321 ; SKX: ## BB#0: 1322 ; SKX-NEXT: vmovups 64(%rdi), %zmm2 1323 ; SKX-NEXT: vmovups (%rdi), %zmm3 1324 ; SKX-NEXT: vcmpltps %zmm0, %zmm3, %k1 1325 ; SKX-NEXT: vcmpltps %zmm1, %zmm2, %k2 1326 ; SKX-NEXT: kunpckwd %k1, %k2, %k0 1327 ; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} 1328 ; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} 1329 ; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 1330 ; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 1331 ; SKX-NEXT: kunpckwd %k1, %k2, %k1 1332 ; SKX-NEXT: kord %k1, %k0, %k0 1333 ; SKX-NEXT: ktestd %k0, %k0 1334 ; SKX-NEXT: je LBB42_2 1335 ; SKX-NEXT: ## BB#1: ## %L1 1336 ; SKX-NEXT: vmovaps %zmm0, (%rdi) 1337 ; SKX-NEXT: vmovaps %zmm1, 64(%rdi) 1338 ; SKX-NEXT: retq 1339 ; SKX-NEXT: LBB42_2: ## %L2 1340 ; SKX-NEXT: vmovaps %zmm0, 4(%rdi) 1341 ; SKX-NEXT: vmovaps %zmm1, 68(%rdi) 1342 ; SKX-NEXT: retq 1343 %addr1 = getelementptr float, float * %base, i64 0 1344 %addr2 = getelementptr float, float * %base, i64 1 1345 1346 %vaddr1 = bitcast float* %addr1 to <32 x float>* 1347 %vaddr2 = bitcast float* %addr2 to <32 x float>* 1348 1349 %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1 1350 %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1 1351 1352 %sel1 = fcmp ogt <32 x float>%in, %val1 1353 %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer 1354 %sel2 = fcmp olt <32 x float> %in, %val3 1355 %sel3 = or <32 x i1> %sel1, %sel2 1356 1357 %int_sel3 = bitcast <32 x i1> %sel3 to i32 1358 %res = icmp eq i32 %int_sel3, zeroinitializer 1359 br i1 %res, label %L2, label %L1 1360 L1: 1361 store <32 x float> %in, <32 x float>* %vaddr1 1362 br label %End 1363 L2: 1364 store <32 x float> %in, <32 x float>* %vaddr2 1365 br label %End 1366 End: 1367 ret void 1368 } 1369 1370 define <8 x i64> @load_8i1(<8 x i1>* %a) { 1371 ; KNL-LABEL: load_8i1: 1372 ; KNL: ## BB#0: 1373 ; KNL-NEXT: movzbl (%rdi), %eax 1374 ; KNL-NEXT: kmovw %eax, %k1 1375 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1376 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1377 ; KNL-NEXT: retq 1378 ; 1379 ; SKX-LABEL: load_8i1: 1380 ; SKX: ## BB#0: 1381 ; SKX-NEXT: kmovb (%rdi), %k0 1382 ; SKX-NEXT: vpmovm2q %k0, %zmm0 1383 ; SKX-NEXT: retq 1384 %b = load <8 x i1>, <8 x i1>* %a 1385 %c = sext <8 x i1> %b to <8 x i64> 1386 ret <8 x i64> %c 1387 } 1388 1389 define <16 x i32> @load_16i1(<16 x i1>* %a) { 1390 ; KNL-LABEL: load_16i1: 1391 ; KNL: ## BB#0: 1392 ; KNL-NEXT: kmovw (%rdi), %k1 1393 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1394 ; KNL-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} 1395 ; KNL-NEXT: retq 1396 ; 1397 ; SKX-LABEL: load_16i1: 1398 ; SKX: ## BB#0: 1399 ; SKX-NEXT: kmovw (%rdi), %k0 1400 ; SKX-NEXT: vpmovm2d %k0, %zmm0 1401 ; SKX-NEXT: retq 1402 %b = load <16 x i1>, <16 x i1>* %a 1403 %c = sext <16 x i1> %b to <16 x i32> 1404 ret <16 x i32> %c 1405 } 1406 1407 define <2 x i16> @load_2i1(<2 x i1>* %a) { 1408 ; KNL-LABEL: load_2i1: 1409 ; KNL: ## BB#0: 1410 ; KNL-NEXT: movzbl (%rdi), %eax 1411 ; KNL-NEXT: kmovw %eax, %k1 1412 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1413 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1414 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 1415 ; KNL-NEXT: retq 1416 ; 1417 ; SKX-LABEL: load_2i1: 1418 ; SKX: ## BB#0: 1419 ; SKX-NEXT: kmovb (%rdi), %k0 1420 ; SKX-NEXT: vpmovm2q %k0, %xmm0 1421 ; SKX-NEXT: retq 1422 %b = load <2 x i1>, <2 x i1>* %a 1423 %c = sext <2 x i1> %b to <2 x i16> 1424 ret <2 x i16> %c 1425 } 1426 1427 define <4 x i16> @load_4i1(<4 x i1>* %a) { 1428 ; KNL-LABEL: load_4i1: 1429 ; KNL: ## BB#0: 1430 ; KNL-NEXT: movzbl (%rdi), %eax 1431 ; KNL-NEXT: kmovw %eax, %k1 1432 ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 1433 ; KNL-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} 1434 ; KNL-NEXT: vpmovqd %zmm0, %ymm0 1435 ; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1436 ; KNL-NEXT: retq 1437 ; 1438 ; SKX-LABEL: load_4i1: 1439 ; SKX: ## BB#0: 1440 ; SKX-NEXT: kmovb (%rdi), %k0 1441 ; SKX-NEXT: vpmovm2d %k0, %xmm0 1442 ; SKX-NEXT: retq 1443 %b = load <4 x i1>, <4 x i1>* %a 1444 %c = sext <4 x i1> %b to <4 x i16> 1445 ret <4 x i16> %c 1446 } 1447 1448 define <32 x i16> @load_32i1(<32 x i1>* %a) { 1449 ; KNL-LABEL: load_32i1: 1450 ; KNL: ## BB#0: 1451 ; KNL-NEXT: kmovw (%rdi), %k1 1452 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 1453 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 1454 ; KNL-NEXT: vpmovdw %zmm0, %ymm0 1455 ; KNL-NEXT: kmovw 2(%rdi), %k1 1456 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 1457 ; KNL-NEXT: vpmovdw %zmm1, %ymm1 1458 ; KNL-NEXT: retq 1459 ; 1460 ; SKX-LABEL: load_32i1: 1461 ; SKX: ## BB#0: 1462 ; SKX-NEXT: kmovd (%rdi), %k0 1463 ; SKX-NEXT: vpmovm2w %k0, %zmm0 1464 ; SKX-NEXT: retq 1465 %b = load <32 x i1>, <32 x i1>* %a 1466 %c = sext <32 x i1> %b to <32 x i16> 1467 ret <32 x i16> %c 1468 } 1469 1470 define <64 x i8> @load_64i1(<64 x i1>* %a) { 1471 ; KNL-LABEL: load_64i1: 1472 ; KNL: ## BB#0: 1473 ; KNL-NEXT: kmovw (%rdi), %k1 1474 ; KNL-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 1475 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} 1476 ; KNL-NEXT: vpmovdb %zmm0, %xmm0 1477 ; KNL-NEXT: kmovw 2(%rdi), %k1 1478 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} 1479 ; KNL-NEXT: vpmovdb %zmm2, %xmm2 1480 ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1481 ; KNL-NEXT: kmovw 4(%rdi), %k1 1482 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm2 {%k1} {z} 1483 ; KNL-NEXT: vpmovdb %zmm2, %xmm2 1484 ; KNL-NEXT: kmovw 6(%rdi), %k1 1485 ; KNL-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z} 1486 ; KNL-NEXT: vpmovdb %zmm1, %xmm1 1487 ; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1488 ; KNL-NEXT: retq 1489 ; 1490 ; SKX-LABEL: load_64i1: 1491 ; SKX: ## BB#0: 1492 ; SKX-NEXT: kmovq (%rdi), %k0 1493 ; SKX-NEXT: vpmovm2b %k0, %zmm0 1494 ; SKX-NEXT: retq 1495 %b = load <64 x i1>, <64 x i1>* %a 1496 %c = sext <64 x i1> %b to <64 x i8> 1497 ret <64 x i8> %c 1498 } 1499 1500 define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { 1501 ; KNL-LABEL: store_8i1: 1502 ; KNL: ## BB#0: 1503 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1504 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1505 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 1506 ; KNL-NEXT: kmovw %k0, %eax 1507 ; KNL-NEXT: movb %al, (%rdi) 1508 ; KNL-NEXT: retq 1509 ; 1510 ; SKX-LABEL: store_8i1: 1511 ; SKX: ## BB#0: 1512 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1513 ; SKX-NEXT: vpmovw2m %xmm0, %k0 1514 ; SKX-NEXT: kmovb %k0, (%rdi) 1515 ; SKX-NEXT: retq 1516 store <8 x i1> %v, <8 x i1>* %a 1517 ret void 1518 } 1519 1520 define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { 1521 ; KNL-LABEL: store_8i1_1: 1522 ; KNL: ## BB#0: 1523 ; KNL-NEXT: vpmovsxwq %xmm0, %zmm0 1524 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 1525 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 1526 ; KNL-NEXT: kmovw %k0, %eax 1527 ; KNL-NEXT: movb %al, (%rdi) 1528 ; KNL-NEXT: retq 1529 ; 1530 ; SKX-LABEL: store_8i1_1: 1531 ; SKX: ## BB#0: 1532 ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 1533 ; SKX-NEXT: vpmovw2m %xmm0, %k0 1534 ; SKX-NEXT: kmovb %k0, (%rdi) 1535 ; SKX-NEXT: retq 1536 %v1 = trunc <8 x i16> %v to <8 x i1> 1537 store <8 x i1> %v1, <8 x i1>* %a 1538 ret void 1539 } 1540 1541 define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { 1542 ; KNL-LABEL: store_16i1: 1543 ; KNL: ## BB#0: 1544 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1545 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1546 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1547 ; KNL-NEXT: kmovw %k0, (%rdi) 1548 ; KNL-NEXT: retq 1549 ; 1550 ; SKX-LABEL: store_16i1: 1551 ; SKX: ## BB#0: 1552 ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 1553 ; SKX-NEXT: vpmovb2m %xmm0, %k0 1554 ; SKX-NEXT: kmovw %k0, (%rdi) 1555 ; SKX-NEXT: retq 1556 store <16 x i1> %v, <16 x i1>* %a 1557 ret void 1558 } 1559 1560 define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { 1561 ; KNL-LABEL: store_32i1: 1562 ; KNL: ## BB#0: 1563 ; KNL-NEXT: vextractf128 $1, %ymm0, %xmm1 1564 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1565 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1566 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 1567 ; KNL-NEXT: kmovw %k0, 2(%rdi) 1568 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1569 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1570 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1571 ; KNL-NEXT: kmovw %k0, (%rdi) 1572 ; KNL-NEXT: retq 1573 ; 1574 ; SKX-LABEL: store_32i1: 1575 ; SKX: ## BB#0: 1576 ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 1577 ; SKX-NEXT: vpmovb2m %ymm0, %k0 1578 ; SKX-NEXT: kmovd %k0, (%rdi) 1579 ; SKX-NEXT: retq 1580 store <32 x i1> %v, <32 x i1>* %a 1581 ret void 1582 } 1583 1584 define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { 1585 ; KNL-LABEL: store_32i1_1: 1586 ; KNL: ## BB#0: 1587 ; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 1588 ; KNL-NEXT: vpmovdb %zmm0, %xmm0 1589 ; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 1590 ; KNL-NEXT: vpmovdb %zmm1, %xmm1 1591 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1592 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1593 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 1594 ; KNL-NEXT: kmovw %k0, 2(%rdi) 1595 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1596 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1597 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1598 ; KNL-NEXT: kmovw %k0, (%rdi) 1599 ; KNL-NEXT: retq 1600 ; 1601 ; SKX-LABEL: store_32i1_1: 1602 ; SKX: ## BB#0: 1603 ; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 1604 ; SKX-NEXT: vpmovw2m %zmm0, %k0 1605 ; SKX-NEXT: kmovd %k0, (%rdi) 1606 ; SKX-NEXT: retq 1607 %v1 = trunc <32 x i16> %v to <32 x i1> 1608 store <32 x i1> %v1, <32 x i1>* %a 1609 ret void 1610 } 1611 1612 1613 define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { 1614 ; 1615 ; KNL-LABEL: store_64i1: 1616 ; KNL: ## BB#0: 1617 ; KNL-NEXT: pushq %rbp 1618 ; KNL-NEXT: Ltmp9: 1619 ; KNL-NEXT: .cfi_def_cfa_offset 16 1620 ; KNL-NEXT: pushq %r15 1621 ; KNL-NEXT: Ltmp10: 1622 ; KNL-NEXT: .cfi_def_cfa_offset 24 1623 ; KNL-NEXT: pushq %r14 1624 ; KNL-NEXT: Ltmp11: 1625 ; KNL-NEXT: .cfi_def_cfa_offset 32 1626 ; KNL-NEXT: pushq %r13 1627 ; KNL-NEXT: Ltmp12: 1628 ; KNL-NEXT: .cfi_def_cfa_offset 40 1629 ; KNL-NEXT: pushq %r12 1630 ; KNL-NEXT: Ltmp13: 1631 ; KNL-NEXT: .cfi_def_cfa_offset 48 1632 ; KNL-NEXT: pushq %rbx 1633 ; KNL-NEXT: Ltmp14: 1634 ; KNL-NEXT: .cfi_def_cfa_offset 56 1635 ; KNL-NEXT: Ltmp15: 1636 ; KNL-NEXT: .cfi_offset %rbx, -56 1637 ; KNL-NEXT: Ltmp16: 1638 ; KNL-NEXT: .cfi_offset %r12, -48 1639 ; KNL-NEXT: Ltmp17: 1640 ; KNL-NEXT: .cfi_offset %r13, -40 1641 ; KNL-NEXT: Ltmp18: 1642 ; KNL-NEXT: .cfi_offset %r14, -32 1643 ; KNL-NEXT: Ltmp19: 1644 ; KNL-NEXT: .cfi_offset %r15, -24 1645 ; KNL-NEXT: Ltmp20: 1646 ; KNL-NEXT: .cfi_offset %rbp, -16 1647 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1648 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1649 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1650 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1651 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 1652 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1653 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 1654 ; KNL-NEXT: vpslld $31, %zmm3, %zmm3 1655 ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 1656 ; KNL-NEXT: kshiftlw $14, %k0, %k1 1657 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1658 ; KNL-NEXT: kmovw %k1, %r8d 1659 ; KNL-NEXT: kshiftlw $15, %k0, %k1 1660 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1661 ; KNL-NEXT: kmovw %k1, %r9d 1662 ; KNL-NEXT: kshiftlw $13, %k0, %k1 1663 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1664 ; KNL-NEXT: kmovw %k1, %r10d 1665 ; KNL-NEXT: kshiftlw $12, %k0, %k1 1666 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1667 ; KNL-NEXT: kmovw %k1, %r11d 1668 ; KNL-NEXT: kshiftlw $11, %k0, %k1 1669 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1670 ; KNL-NEXT: kmovw %k1, %r14d 1671 ; KNL-NEXT: kshiftlw $10, %k0, %k1 1672 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1673 ; KNL-NEXT: kmovw %k1, %r15d 1674 ; KNL-NEXT: kshiftlw $9, %k0, %k1 1675 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1676 ; KNL-NEXT: kmovw %k1, %r12d 1677 ; KNL-NEXT: kshiftlw $8, %k0, %k1 1678 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1679 ; KNL-NEXT: kmovw %k1, %r13d 1680 ; KNL-NEXT: kshiftlw $7, %k0, %k1 1681 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1682 ; KNL-NEXT: kmovw %k1, %ebx 1683 ; KNL-NEXT: kshiftlw $6, %k0, %k1 1684 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1685 ; KNL-NEXT: kmovw %k1, %ebp 1686 ; KNL-NEXT: kshiftlw $5, %k0, %k1 1687 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1688 ; KNL-NEXT: kmovw %k1, %eax 1689 ; KNL-NEXT: kshiftlw $4, %k0, %k1 1690 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1691 ; KNL-NEXT: kmovw %k1, %ecx 1692 ; KNL-NEXT: kshiftlw $3, %k0, %k1 1693 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1694 ; KNL-NEXT: kmovw %k1, %edx 1695 ; KNL-NEXT: kshiftlw $2, %k0, %k1 1696 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1697 ; KNL-NEXT: kmovw %k1, %esi 1698 ; KNL-NEXT: kshiftlw $1, %k0, %k1 1699 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1700 ; KNL-NEXT: vmovd %r9d, %xmm3 1701 ; KNL-NEXT: kmovw %k1, %r9d 1702 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2 1703 ; KNL-NEXT: kshiftlw $0, %k0, %k0 1704 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1705 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 1706 ; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2 1707 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 1708 ; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2 1709 ; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2 1710 ; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2 1711 ; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2 1712 ; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2 1713 ; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2 1714 ; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 1715 ; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 1716 ; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2 1717 ; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2 1718 ; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2 1719 ; KNL-NEXT: kmovw %k0, %eax 1720 ; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 1721 ; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 1722 ; KNL-NEXT: vpslld $31, %zmm2, %zmm2 1723 ; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 1724 ; KNL-NEXT: kmovw %k0, 6(%rdi) 1725 ; KNL-NEXT: kshiftlw $14, %k2, %k0 1726 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1727 ; KNL-NEXT: kmovw %k0, %r8d 1728 ; KNL-NEXT: kshiftlw $15, %k2, %k0 1729 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1730 ; KNL-NEXT: kmovw %k0, %r10d 1731 ; KNL-NEXT: kshiftlw $13, %k2, %k0 1732 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1733 ; KNL-NEXT: kmovw %k0, %r9d 1734 ; KNL-NEXT: kshiftlw $12, %k2, %k0 1735 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1736 ; KNL-NEXT: kmovw %k0, %r11d 1737 ; KNL-NEXT: kshiftlw $11, %k2, %k0 1738 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1739 ; KNL-NEXT: kmovw %k0, %r14d 1740 ; KNL-NEXT: kshiftlw $10, %k2, %k0 1741 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1742 ; KNL-NEXT: kmovw %k0, %r15d 1743 ; KNL-NEXT: kshiftlw $9, %k2, %k0 1744 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1745 ; KNL-NEXT: kmovw %k0, %r12d 1746 ; KNL-NEXT: kshiftlw $8, %k2, %k0 1747 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1748 ; KNL-NEXT: kmovw %k0, %r13d 1749 ; KNL-NEXT: kshiftlw $7, %k2, %k0 1750 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1751 ; KNL-NEXT: kmovw %k0, %edx 1752 ; KNL-NEXT: kshiftlw $6, %k2, %k0 1753 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1754 ; KNL-NEXT: kmovw %k0, %esi 1755 ; KNL-NEXT: kshiftlw $5, %k2, %k0 1756 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1757 ; KNL-NEXT: kmovw %k0, %ebp 1758 ; KNL-NEXT: kshiftlw $4, %k2, %k0 1759 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1760 ; KNL-NEXT: kmovw %k0, %ebx 1761 ; KNL-NEXT: kshiftlw $3, %k2, %k0 1762 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1763 ; KNL-NEXT: kmovw %k0, %eax 1764 ; KNL-NEXT: kshiftlw $2, %k2, %k0 1765 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1766 ; KNL-NEXT: kmovw %k0, %ecx 1767 ; KNL-NEXT: kshiftlw $1, %k2, %k0 1768 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1769 ; KNL-NEXT: vmovd %r10d, %xmm2 1770 ; KNL-NEXT: kmovw %k0, %r10d 1771 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 1772 ; KNL-NEXT: kshiftlw $0, %k2, %k0 1773 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1774 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1 1775 ; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 1776 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1 1777 ; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1 1778 ; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1 1779 ; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1 1780 ; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1 1781 ; KNL-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1 1782 ; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1 1783 ; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1 1784 ; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1 1785 ; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 1786 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1787 ; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1 1788 ; KNL-NEXT: kmovw %k0, %eax 1789 ; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1790 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 1791 ; KNL-NEXT: vpslld $31, %zmm1, %zmm1 1792 ; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 1793 ; KNL-NEXT: kmovw %k0, 4(%rdi) 1794 ; KNL-NEXT: kshiftlw $14, %k1, %k0 1795 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1796 ; KNL-NEXT: kmovw %k0, %r8d 1797 ; KNL-NEXT: kshiftlw $15, %k1, %k0 1798 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1799 ; KNL-NEXT: kmovw %k0, %r10d 1800 ; KNL-NEXT: kshiftlw $13, %k1, %k0 1801 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1802 ; KNL-NEXT: kmovw %k0, %r9d 1803 ; KNL-NEXT: kshiftlw $12, %k1, %k0 1804 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1805 ; KNL-NEXT: kmovw %k0, %r11d 1806 ; KNL-NEXT: kshiftlw $11, %k1, %k0 1807 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1808 ; KNL-NEXT: kmovw %k0, %r14d 1809 ; KNL-NEXT: kshiftlw $10, %k1, %k0 1810 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1811 ; KNL-NEXT: kmovw %k0, %r15d 1812 ; KNL-NEXT: kshiftlw $9, %k1, %k0 1813 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1814 ; KNL-NEXT: kmovw %k0, %r12d 1815 ; KNL-NEXT: kshiftlw $8, %k1, %k0 1816 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1817 ; KNL-NEXT: kmovw %k0, %r13d 1818 ; KNL-NEXT: kshiftlw $7, %k1, %k0 1819 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1820 ; KNL-NEXT: kmovw %k0, %edx 1821 ; KNL-NEXT: kshiftlw $6, %k1, %k0 1822 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1823 ; KNL-NEXT: kmovw %k0, %esi 1824 ; KNL-NEXT: kshiftlw $5, %k1, %k0 1825 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1826 ; KNL-NEXT: kmovw %k0, %ebp 1827 ; KNL-NEXT: kshiftlw $4, %k1, %k0 1828 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1829 ; KNL-NEXT: kmovw %k0, %ebx 1830 ; KNL-NEXT: kshiftlw $3, %k1, %k0 1831 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1832 ; KNL-NEXT: kmovw %k0, %eax 1833 ; KNL-NEXT: kshiftlw $2, %k1, %k0 1834 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1835 ; KNL-NEXT: kmovw %k0, %ecx 1836 ; KNL-NEXT: kshiftlw $1, %k1, %k0 1837 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1838 ; KNL-NEXT: vmovd %r10d, %xmm1 1839 ; KNL-NEXT: kmovw %k0, %r10d 1840 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1841 ; KNL-NEXT: kshiftlw $0, %k1, %k1 1842 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1843 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0 1844 ; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 1845 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 1846 ; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 1847 ; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 1848 ; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 1849 ; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 1850 ; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 1851 ; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 1852 ; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 1853 ; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 1854 ; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1855 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1856 ; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0 1857 ; KNL-NEXT: kmovw %k1, %eax 1858 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1859 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1860 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1861 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 1862 ; KNL-NEXT: kmovw %k1, 2(%rdi) 1863 ; KNL-NEXT: kshiftlw $14, %k0, %k1 1864 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1865 ; KNL-NEXT: kmovw %k1, %r8d 1866 ; KNL-NEXT: kshiftlw $15, %k0, %k1 1867 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1868 ; KNL-NEXT: kmovw %k1, %r9d 1869 ; KNL-NEXT: kshiftlw $13, %k0, %k1 1870 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1871 ; KNL-NEXT: kmovw %k1, %r10d 1872 ; KNL-NEXT: kshiftlw $12, %k0, %k1 1873 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1874 ; KNL-NEXT: kmovw %k1, %r11d 1875 ; KNL-NEXT: kshiftlw $11, %k0, %k1 1876 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1877 ; KNL-NEXT: kmovw %k1, %r14d 1878 ; KNL-NEXT: kshiftlw $10, %k0, %k1 1879 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1880 ; KNL-NEXT: kmovw %k1, %r15d 1881 ; KNL-NEXT: kshiftlw $9, %k0, %k1 1882 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1883 ; KNL-NEXT: kmovw %k1, %r12d 1884 ; KNL-NEXT: kshiftlw $8, %k0, %k1 1885 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1886 ; KNL-NEXT: kmovw %k1, %r13d 1887 ; KNL-NEXT: kshiftlw $7, %k0, %k1 1888 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1889 ; KNL-NEXT: kmovw %k1, %edx 1890 ; KNL-NEXT: kshiftlw $6, %k0, %k1 1891 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1892 ; KNL-NEXT: kmovw %k1, %esi 1893 ; KNL-NEXT: kshiftlw $5, %k0, %k1 1894 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1895 ; KNL-NEXT: kmovw %k1, %ebp 1896 ; KNL-NEXT: kshiftlw $4, %k0, %k1 1897 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1898 ; KNL-NEXT: kmovw %k1, %ebx 1899 ; KNL-NEXT: kshiftlw $3, %k0, %k1 1900 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1901 ; KNL-NEXT: kmovw %k1, %eax 1902 ; KNL-NEXT: kshiftlw $2, %k0, %k1 1903 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1904 ; KNL-NEXT: kmovw %k1, %ecx 1905 ; KNL-NEXT: kshiftlw $1, %k0, %k1 1906 ; KNL-NEXT: kshiftrw $15, %k1, %k1 1907 ; KNL-NEXT: vmovd %r9d, %xmm0 1908 ; KNL-NEXT: kmovw %k1, %r9d 1909 ; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 1910 ; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 1911 ; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 1912 ; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 1913 ; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 1914 ; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 1915 ; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 1916 ; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 1917 ; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 1918 ; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 1919 ; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 1920 ; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1921 ; KNL-NEXT: kshiftlw $0, %k0, %k0 1922 ; KNL-NEXT: kshiftrw $15, %k0, %k0 1923 ; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1924 ; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 1925 ; KNL-NEXT: kmovw %k0, %eax 1926 ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1927 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 1928 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1929 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1930 ; KNL-NEXT: kmovw %k0, (%rdi) 1931 ; KNL-NEXT: popq %rbx 1932 ; KNL-NEXT: popq %r12 1933 ; KNL-NEXT: popq %r13 1934 ; KNL-NEXT: popq %r14 1935 ; KNL-NEXT: popq %r15 1936 ; KNL-NEXT: popq %rbp 1937 ; KNL-NEXT: retq 1938 ; 1939 ; SKX-LABEL: store_64i1: 1940 ; SKX: ## BB#0: 1941 ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 1942 ; SKX-NEXT: vpmovb2m %zmm0, %k0 1943 ; SKX-NEXT: kmovq %k0, (%rdi) 1944 ; SKX-NEXT: retq 1945 store <64 x i1> %v, <64 x i1>* %a 1946 ret void 1947 } 1948