1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 4 5 ; 256-bit 6 7 define <8 x i32> @vpandd256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { 8 ; CHECK-LABEL: vpandd256: 9 ; CHECK: ## %bb.0: ## %entry 10 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 11 ; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 12 ; CHECK-NEXT: retq 13 entry: 14 ; Force the execution domain with an add. 15 %a2 = add <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 16 %x = and <8 x i32> %a2, %b 17 ret <8 x i32> %x 18 } 19 20 define <8 x i32> @vpandnd256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { 21 ; CHECK-LABEL: vpandnd256: 22 ; CHECK: ## %bb.0: ## %entry 23 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm1 24 ; CHECK-NEXT: vpandn %ymm1, %ymm0, %ymm0 25 ; CHECK-NEXT: retq 26 entry: 27 ; Force the execution domain with an add. 28 %a2 = add <8 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 29 %b2 = xor <8 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 30 %x = and <8 x i32> %a2, %b2 31 ret <8 x i32> %x 32 } 33 34 define <8 x i32> @vpord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { 35 ; CHECK-LABEL: vpord256: 36 ; CHECK: ## %bb.0: ## %entry 37 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 38 ; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 39 ; CHECK-NEXT: retq 40 entry: 41 ; Force the execution domain with an add. 42 %a2 = add <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 43 %x = or <8 x i32> %a2, %b 44 ret <8 x i32> %x 45 } 46 47 define <8 x i32> @vpxord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp { 48 ; CHECK-LABEL: vpxord256: 49 ; CHECK: ## %bb.0: ## %entry 50 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 51 ; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 52 ; CHECK-NEXT: retq 53 entry: 54 ; Force the execution domain with an add. 55 %a2 = add <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 56 %x = xor <8 x i32> %a2, %b 57 ret <8 x i32> %x 58 } 59 60 define <4 x i64> @vpandq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 61 ; CHECK-LABEL: vpandq256: 62 ; CHECK: ## %bb.0: ## %entry 63 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0 64 ; CHECK-NEXT: vpand %ymm1, %ymm0, %ymm0 65 ; CHECK-NEXT: retq 66 entry: 67 ; Force the execution domain with an add. 68 %a2 = add <4 x i64> %a, <i64 6, i64 6, i64 6, i64 6> 69 %x = and <4 x i64> %a2, %b 70 ret <4 x i64> %x 71 } 72 73 define <4 x i64> @vpandnq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 74 ; CHECK-LABEL: vpandnq256: 75 ; CHECK: ## %bb.0: ## %entry 76 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0 77 ; CHECK-NEXT: vpandn %ymm0, %ymm1, %ymm0 78 ; CHECK-NEXT: retq 79 entry: 80 ; Force the execution domain with an add. 81 %a2 = add <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7> 82 %b2 = xor <4 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1> 83 %x = and <4 x i64> %a2, %b2 84 ret <4 x i64> %x 85 } 86 87 define <4 x i64> @vporq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 88 ; CHECK-LABEL: vporq256: 89 ; CHECK: ## %bb.0: ## %entry 90 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0 91 ; CHECK-NEXT: vpor %ymm1, %ymm0, %ymm0 92 ; CHECK-NEXT: retq 93 entry: 94 ; Force the execution domain with an add. 95 %a2 = add <4 x i64> %a, <i64 21, i64 21, i64 21, i64 21> 96 %x = or <4 x i64> %a2, %b 97 ret <4 x i64> %x 98 } 99 100 define <4 x i64> @vpxorq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 101 ; CHECK-LABEL: vpxorq256: 102 ; CHECK: ## %bb.0: ## %entry 103 ; CHECK-NEXT: vpaddq {{.*}}(%rip){1to4}, %ymm0, %ymm0 104 ; CHECK-NEXT: vpxor %ymm1, %ymm0, %ymm0 105 ; CHECK-NEXT: retq 106 entry: 107 ; Force the execution domain with an add. 108 %a2 = add <4 x i64> %a, <i64 22, i64 22, i64 22, i64 22> 109 %x = xor <4 x i64> %a2, %b 110 ret <4 x i64> %x 111 } 112 113 ; 128-bit 114 115 define <4 x i32> @vpandd128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp { 116 ; CHECK-LABEL: vpandd128: 117 ; CHECK: ## %bb.0: ## %entry 118 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 119 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 120 ; CHECK-NEXT: retq 121 entry: 122 ; Force the execution domain with an add. 123 %a2 = add <4 x i32> %a, <i32 8, i32 8, i32 8, i32 8> 124 %x = and <4 x i32> %a2, %b 125 ret <4 x i32> %x 126 } 127 128 define <4 x i32> @vpandnd128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp { 129 ; CHECK-LABEL: vpandnd128: 130 ; CHECK: ## %bb.0: ## %entry 131 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 132 ; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0 133 ; CHECK-NEXT: retq 134 entry: 135 ; Force the execution domain with an add. 136 %a2 = add <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9> 137 %b2 = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> 138 %x = and <4 x i32> %a2, %b2 139 ret <4 x i32> %x 140 } 141 142 define <4 x i32> @vpord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp { 143 ; CHECK-LABEL: vpord128: 144 ; CHECK: ## %bb.0: ## %entry 145 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 146 ; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 147 ; CHECK-NEXT: retq 148 entry: 149 ; Force the execution domain with an add. 150 %a2 = add <4 x i32> %a, <i32 10, i32 10, i32 10, i32 10> 151 %x = or <4 x i32> %a2, %b 152 ret <4 x i32> %x 153 } 154 155 define <4 x i32> @vpxord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp { 156 ; CHECK-LABEL: vpxord128: 157 ; CHECK: ## %bb.0: ## %entry 158 ; CHECK-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 159 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 160 ; CHECK-NEXT: retq 161 entry: 162 ; Force the execution domain with an add. 163 %a2 = add <4 x i32> %a, <i32 11, i32 11, i32 11, i32 11> 164 %x = xor <4 x i32> %a2, %b 165 ret <4 x i32> %x 166 } 167 168 define <2 x i64> @vpandq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 169 ; CHECK-LABEL: vpandq128: 170 ; CHECK: ## %bb.0: ## %entry 171 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 172 ; CHECK-NEXT: vpand %xmm1, %xmm0, %xmm0 173 ; CHECK-NEXT: retq 174 entry: 175 ; Force the execution domain with an add. 176 %a2 = add <2 x i64> %a, <i64 12, i64 12> 177 %x = and <2 x i64> %a2, %b 178 ret <2 x i64> %x 179 } 180 181 define <2 x i64> @vpandnq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 182 ; CHECK-LABEL: vpandnq128: 183 ; CHECK: ## %bb.0: ## %entry 184 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 185 ; CHECK-NEXT: vpandn %xmm0, %xmm1, %xmm0 186 ; CHECK-NEXT: retq 187 entry: 188 ; Force the execution domain with an add. 189 %a2 = add <2 x i64> %a, <i64 13, i64 13> 190 %b2 = xor <2 x i64> %b, <i64 -1, i64 -1> 191 %x = and <2 x i64> %a2, %b2 192 ret <2 x i64> %x 193 } 194 195 define <2 x i64> @vporq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 196 ; CHECK-LABEL: vporq128: 197 ; CHECK: ## %bb.0: ## %entry 198 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 199 ; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 200 ; CHECK-NEXT: retq 201 entry: 202 ; Force the execution domain with an add. 203 %a2 = add <2 x i64> %a, <i64 14, i64 14> 204 %x = or <2 x i64> %a2, %b 205 ret <2 x i64> %x 206 } 207 208 define <2 x i64> @vpxorq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 209 ; CHECK-LABEL: vpxorq128: 210 ; CHECK: ## %bb.0: ## %entry 211 ; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0 212 ; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0 213 ; CHECK-NEXT: retq 214 entry: 215 ; Force the execution domain with an add. 216 %a2 = add <2 x i64> %a, <i64 15, i64 15> 217 %x = xor <2 x i64> %a2, %b 218 ret <2 x i64> %x 219 } 220 221 222 define <4 x double> @test_mm256_mask_andnot_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { 223 ; KNL-LABEL: test_mm256_mask_andnot_pd: 224 ; KNL: ## %bb.0: ## %entry 225 ; KNL-NEXT: kmovw %edi, %k1 226 ; KNL-NEXT: vpandnq %ymm2, %ymm1, %ymm0 {%k1} 227 ; KNL-NEXT: retq 228 ; 229 ; SKX-LABEL: test_mm256_mask_andnot_pd: 230 ; SKX: ## %bb.0: ## %entry 231 ; SKX-NEXT: kmovd %edi, %k1 232 ; SKX-NEXT: vandnpd %ymm2, %ymm1, %ymm0 {%k1} 233 ; SKX-NEXT: retq 234 entry: 235 %0 = bitcast <4 x double> %__A to <4 x i64> 236 %neg.i.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> 237 %1 = bitcast <4 x double> %__B to <4 x i64> 238 %and.i.i = and <4 x i64> %1, %neg.i.i 239 %2 = bitcast <4 x i64> %and.i.i to <4 x double> 240 %3 = bitcast i8 %__U to <8 x i1> 241 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 242 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W 243 ret <4 x double> %4 244 } 245 246 define <4 x double> @test_mm256_maskz_andnot_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { 247 ; KNL-LABEL: test_mm256_maskz_andnot_pd: 248 ; KNL: ## %bb.0: ## %entry 249 ; KNL-NEXT: kmovw %edi, %k1 250 ; KNL-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} 251 ; KNL-NEXT: retq 252 ; 253 ; SKX-LABEL: test_mm256_maskz_andnot_pd: 254 ; SKX: ## %bb.0: ## %entry 255 ; SKX-NEXT: kmovd %edi, %k1 256 ; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 {%k1} {z} 257 ; SKX-NEXT: retq 258 entry: 259 %0 = bitcast <4 x double> %__A to <4 x i64> 260 %neg.i.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> 261 %1 = bitcast <4 x double> %__B to <4 x i64> 262 %and.i.i = and <4 x i64> %1, %neg.i.i 263 %2 = bitcast <4 x i64> %and.i.i to <4 x double> 264 %3 = bitcast i8 %__U to <8 x i1> 265 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 266 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer 267 ret <4 x double> %4 268 } 269 270 define <2 x double> @test_mm_mask_andnot_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 271 ; KNL-LABEL: test_mm_mask_andnot_pd: 272 ; KNL: ## %bb.0: ## %entry 273 ; KNL-NEXT: kmovw %edi, %k1 274 ; KNL-NEXT: vpandnq %xmm2, %xmm1, %xmm0 {%k1} 275 ; KNL-NEXT: retq 276 ; 277 ; SKX-LABEL: test_mm_mask_andnot_pd: 278 ; SKX: ## %bb.0: ## %entry 279 ; SKX-NEXT: kmovd %edi, %k1 280 ; SKX-NEXT: vandnpd %xmm2, %xmm1, %xmm0 {%k1} 281 ; SKX-NEXT: retq 282 entry: 283 %0 = bitcast <2 x double> %__A to <2 x i64> 284 %neg.i.i = xor <2 x i64> %0, <i64 -1, i64 -1> 285 %1 = bitcast <2 x double> %__B to <2 x i64> 286 %and.i.i = and <2 x i64> %1, %neg.i.i 287 %2 = bitcast <2 x i64> %and.i.i to <2 x double> 288 %3 = bitcast i8 %__U to <8 x i1> 289 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 290 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W 291 ret <2 x double> %4 292 } 293 294 define <2 x double> @test_mm_maskz_andnot_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 295 ; KNL-LABEL: test_mm_maskz_andnot_pd: 296 ; KNL: ## %bb.0: ## %entry 297 ; KNL-NEXT: kmovw %edi, %k1 298 ; KNL-NEXT: vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} 299 ; KNL-NEXT: retq 300 ; 301 ; SKX-LABEL: test_mm_maskz_andnot_pd: 302 ; SKX: ## %bb.0: ## %entry 303 ; SKX-NEXT: kmovd %edi, %k1 304 ; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 {%k1} {z} 305 ; SKX-NEXT: retq 306 entry: 307 %0 = bitcast <2 x double> %__A to <2 x i64> 308 %neg.i.i = xor <2 x i64> %0, <i64 -1, i64 -1> 309 %1 = bitcast <2 x double> %__B to <2 x i64> 310 %and.i.i = and <2 x i64> %1, %neg.i.i 311 %2 = bitcast <2 x i64> %and.i.i to <2 x double> 312 %3 = bitcast i8 %__U to <8 x i1> 313 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 314 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer 315 ret <2 x double> %4 316 } 317 318 define <8 x float> @test_mm256_mask_andnot_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { 319 ; KNL-LABEL: test_mm256_mask_andnot_ps: 320 ; KNL: ## %bb.0: ## %entry 321 ; KNL-NEXT: kmovw %edi, %k1 322 ; KNL-NEXT: vpandnd %ymm2, %ymm1, %ymm0 {%k1} 323 ; KNL-NEXT: retq 324 ; 325 ; SKX-LABEL: test_mm256_mask_andnot_ps: 326 ; SKX: ## %bb.0: ## %entry 327 ; SKX-NEXT: kmovd %edi, %k1 328 ; SKX-NEXT: vandnps %ymm2, %ymm1, %ymm0 {%k1} 329 ; SKX-NEXT: retq 330 entry: 331 %0 = bitcast <8 x float> %__A to <8 x i32> 332 %neg.i.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 333 %1 = bitcast <8 x float> %__B to <8 x i32> 334 %and.i.i = and <8 x i32> %1, %neg.i.i 335 %2 = bitcast <8 x i32> %and.i.i to <8 x float> 336 %3 = bitcast i8 %__U to <8 x i1> 337 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W 338 ret <8 x float> %4 339 } 340 341 define <8 x float> @test_mm256_maskz_andnot_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { 342 ; KNL-LABEL: test_mm256_maskz_andnot_ps: 343 ; KNL: ## %bb.0: ## %entry 344 ; KNL-NEXT: kmovw %edi, %k1 345 ; KNL-NEXT: vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} 346 ; KNL-NEXT: retq 347 ; 348 ; SKX-LABEL: test_mm256_maskz_andnot_ps: 349 ; SKX: ## %bb.0: ## %entry 350 ; SKX-NEXT: kmovd %edi, %k1 351 ; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 {%k1} {z} 352 ; SKX-NEXT: retq 353 entry: 354 %0 = bitcast <8 x float> %__A to <8 x i32> 355 %neg.i.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 356 %1 = bitcast <8 x float> %__B to <8 x i32> 357 %and.i.i = and <8 x i32> %1, %neg.i.i 358 %2 = bitcast <8 x i32> %and.i.i to <8 x float> 359 %3 = bitcast i8 %__U to <8 x i1> 360 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer 361 ret <8 x float> %4 362 } 363 364 define <4 x float> @test_mm_mask_andnot_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 365 ; KNL-LABEL: test_mm_mask_andnot_ps: 366 ; KNL: ## %bb.0: ## %entry 367 ; KNL-NEXT: kmovw %edi, %k1 368 ; KNL-NEXT: vpandnd %xmm2, %xmm1, %xmm0 {%k1} 369 ; KNL-NEXT: retq 370 ; 371 ; SKX-LABEL: test_mm_mask_andnot_ps: 372 ; SKX: ## %bb.0: ## %entry 373 ; SKX-NEXT: kmovd %edi, %k1 374 ; SKX-NEXT: vandnps %xmm2, %xmm1, %xmm0 {%k1} 375 ; SKX-NEXT: retq 376 entry: 377 %0 = bitcast <4 x float> %__A to <4 x i32> 378 %neg.i.i = xor <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1> 379 %1 = bitcast <4 x float> %__B to <4 x i32> 380 %and.i.i = and <4 x i32> %1, %neg.i.i 381 %2 = bitcast <4 x i32> %and.i.i to <4 x float> 382 %3 = bitcast i8 %__U to <8 x i1> 383 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 384 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W 385 ret <4 x float> %4 386 } 387 388 define <4 x float> @test_mm_maskz_andnot_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 389 ; KNL-LABEL: test_mm_maskz_andnot_ps: 390 ; KNL: ## %bb.0: ## %entry 391 ; KNL-NEXT: kmovw %edi, %k1 392 ; KNL-NEXT: vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} 393 ; KNL-NEXT: retq 394 ; 395 ; SKX-LABEL: test_mm_maskz_andnot_ps: 396 ; SKX: ## %bb.0: ## %entry 397 ; SKX-NEXT: kmovd %edi, %k1 398 ; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 {%k1} {z} 399 ; SKX-NEXT: retq 400 entry: 401 %0 = bitcast <4 x float> %__A to <4 x i32> 402 %neg.i.i = xor <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1> 403 %1 = bitcast <4 x float> %__B to <4 x i32> 404 %and.i.i = and <4 x i32> %1, %neg.i.i 405 %2 = bitcast <4 x i32> %and.i.i to <4 x float> 406 %3 = bitcast i8 %__U to <8 x i1> 407 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 408 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer 409 ret <4 x float> %4 410 } 411 412 define <4 x double> @test_mm256_mask_and_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { 413 ; KNL-LABEL: test_mm256_mask_and_pd: 414 ; KNL: ## %bb.0: ## %entry 415 ; KNL-NEXT: kmovw %edi, %k1 416 ; KNL-NEXT: vpandq %ymm1, %ymm2, %ymm0 {%k1} 417 ; KNL-NEXT: retq 418 ; 419 ; SKX-LABEL: test_mm256_mask_and_pd: 420 ; SKX: ## %bb.0: ## %entry 421 ; SKX-NEXT: kmovd %edi, %k1 422 ; SKX-NEXT: vandpd %ymm1, %ymm2, %ymm0 {%k1} 423 ; SKX-NEXT: retq 424 entry: 425 %0 = bitcast <4 x double> %__A to <4 x i64> 426 %1 = bitcast <4 x double> %__B to <4 x i64> 427 %and.i.i = and <4 x i64> %1, %0 428 %2 = bitcast <4 x i64> %and.i.i to <4 x double> 429 %3 = bitcast i8 %__U to <8 x i1> 430 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 431 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W 432 ret <4 x double> %4 433 } 434 435 define <4 x double> @test_mm256_maskz_and_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { 436 ; KNL-LABEL: test_mm256_maskz_and_pd: 437 ; KNL: ## %bb.0: ## %entry 438 ; KNL-NEXT: kmovw %edi, %k1 439 ; KNL-NEXT: vpandq %ymm0, %ymm1, %ymm0 {%k1} {z} 440 ; KNL-NEXT: retq 441 ; 442 ; SKX-LABEL: test_mm256_maskz_and_pd: 443 ; SKX: ## %bb.0: ## %entry 444 ; SKX-NEXT: kmovd %edi, %k1 445 ; SKX-NEXT: vandpd %ymm0, %ymm1, %ymm0 {%k1} {z} 446 ; SKX-NEXT: retq 447 entry: 448 %0 = bitcast <4 x double> %__A to <4 x i64> 449 %1 = bitcast <4 x double> %__B to <4 x i64> 450 %and.i.i = and <4 x i64> %1, %0 451 %2 = bitcast <4 x i64> %and.i.i to <4 x double> 452 %3 = bitcast i8 %__U to <8 x i1> 453 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 454 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer 455 ret <4 x double> %4 456 } 457 458 define <2 x double> @test_mm_mask_and_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 459 ; KNL-LABEL: test_mm_mask_and_pd: 460 ; KNL: ## %bb.0: ## %entry 461 ; KNL-NEXT: kmovw %edi, %k1 462 ; KNL-NEXT: vpandq %xmm1, %xmm2, %xmm0 {%k1} 463 ; KNL-NEXT: retq 464 ; 465 ; SKX-LABEL: test_mm_mask_and_pd: 466 ; SKX: ## %bb.0: ## %entry 467 ; SKX-NEXT: kmovd %edi, %k1 468 ; SKX-NEXT: vandpd %xmm1, %xmm2, %xmm0 {%k1} 469 ; SKX-NEXT: retq 470 entry: 471 %0 = bitcast <2 x double> %__A to <2 x i64> 472 %1 = bitcast <2 x double> %__B to <2 x i64> 473 %and.i.i = and <2 x i64> %1, %0 474 %2 = bitcast <2 x i64> %and.i.i to <2 x double> 475 %3 = bitcast i8 %__U to <8 x i1> 476 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 477 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W 478 ret <2 x double> %4 479 } 480 481 define <2 x double> @test_mm_maskz_and_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 482 ; KNL-LABEL: test_mm_maskz_and_pd: 483 ; KNL: ## %bb.0: ## %entry 484 ; KNL-NEXT: kmovw %edi, %k1 485 ; KNL-NEXT: vpandq %xmm0, %xmm1, %xmm0 {%k1} {z} 486 ; KNL-NEXT: retq 487 ; 488 ; SKX-LABEL: test_mm_maskz_and_pd: 489 ; SKX: ## %bb.0: ## %entry 490 ; SKX-NEXT: kmovd %edi, %k1 491 ; SKX-NEXT: vandpd %xmm0, %xmm1, %xmm0 {%k1} {z} 492 ; SKX-NEXT: retq 493 entry: 494 %0 = bitcast <2 x double> %__A to <2 x i64> 495 %1 = bitcast <2 x double> %__B to <2 x i64> 496 %and.i.i = and <2 x i64> %1, %0 497 %2 = bitcast <2 x i64> %and.i.i to <2 x double> 498 %3 = bitcast i8 %__U to <8 x i1> 499 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 500 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer 501 ret <2 x double> %4 502 } 503 504 define <8 x float> @test_mm256_mask_and_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { 505 ; KNL-LABEL: test_mm256_mask_and_ps: 506 ; KNL: ## %bb.0: ## %entry 507 ; KNL-NEXT: kmovw %edi, %k1 508 ; KNL-NEXT: vpandd %ymm1, %ymm2, %ymm0 {%k1} 509 ; KNL-NEXT: retq 510 ; 511 ; SKX-LABEL: test_mm256_mask_and_ps: 512 ; SKX: ## %bb.0: ## %entry 513 ; SKX-NEXT: kmovd %edi, %k1 514 ; SKX-NEXT: vandps %ymm1, %ymm2, %ymm0 {%k1} 515 ; SKX-NEXT: retq 516 entry: 517 %0 = bitcast <8 x float> %__A to <8 x i32> 518 %1 = bitcast <8 x float> %__B to <8 x i32> 519 %and.i.i = and <8 x i32> %1, %0 520 %2 = bitcast <8 x i32> %and.i.i to <8 x float> 521 %3 = bitcast i8 %__U to <8 x i1> 522 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W 523 ret <8 x float> %4 524 } 525 526 define <8 x float> @test_mm256_maskz_and_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { 527 ; KNL-LABEL: test_mm256_maskz_and_ps: 528 ; KNL: ## %bb.0: ## %entry 529 ; KNL-NEXT: kmovw %edi, %k1 530 ; KNL-NEXT: vpandd %ymm0, %ymm1, %ymm0 {%k1} {z} 531 ; KNL-NEXT: retq 532 ; 533 ; SKX-LABEL: test_mm256_maskz_and_ps: 534 ; SKX: ## %bb.0: ## %entry 535 ; SKX-NEXT: kmovd %edi, %k1 536 ; SKX-NEXT: vandps %ymm0, %ymm1, %ymm0 {%k1} {z} 537 ; SKX-NEXT: retq 538 entry: 539 %0 = bitcast <8 x float> %__A to <8 x i32> 540 %1 = bitcast <8 x float> %__B to <8 x i32> 541 %and.i.i = and <8 x i32> %1, %0 542 %2 = bitcast <8 x i32> %and.i.i to <8 x float> 543 %3 = bitcast i8 %__U to <8 x i1> 544 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer 545 ret <8 x float> %4 546 } 547 548 define <4 x float> @test_mm_mask_and_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 549 ; KNL-LABEL: test_mm_mask_and_ps: 550 ; KNL: ## %bb.0: ## %entry 551 ; KNL-NEXT: kmovw %edi, %k1 552 ; KNL-NEXT: vpandd %xmm1, %xmm2, %xmm0 {%k1} 553 ; KNL-NEXT: retq 554 ; 555 ; SKX-LABEL: test_mm_mask_and_ps: 556 ; SKX: ## %bb.0: ## %entry 557 ; SKX-NEXT: kmovd %edi, %k1 558 ; SKX-NEXT: vandps %xmm1, %xmm2, %xmm0 {%k1} 559 ; SKX-NEXT: retq 560 entry: 561 %0 = bitcast <4 x float> %__A to <4 x i32> 562 %1 = bitcast <4 x float> %__B to <4 x i32> 563 %and.i.i = and <4 x i32> %1, %0 564 %2 = bitcast <4 x i32> %and.i.i to <4 x float> 565 %3 = bitcast i8 %__U to <8 x i1> 566 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 567 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W 568 ret <4 x float> %4 569 } 570 571 define <4 x float> @test_mm_maskz_and_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 572 ; KNL-LABEL: test_mm_maskz_and_ps: 573 ; KNL: ## %bb.0: ## %entry 574 ; KNL-NEXT: kmovw %edi, %k1 575 ; KNL-NEXT: vpandd %xmm0, %xmm1, %xmm0 {%k1} {z} 576 ; KNL-NEXT: retq 577 ; 578 ; SKX-LABEL: test_mm_maskz_and_ps: 579 ; SKX: ## %bb.0: ## %entry 580 ; SKX-NEXT: kmovd %edi, %k1 581 ; SKX-NEXT: vandps %xmm0, %xmm1, %xmm0 {%k1} {z} 582 ; SKX-NEXT: retq 583 entry: 584 %0 = bitcast <4 x float> %__A to <4 x i32> 585 %1 = bitcast <4 x float> %__B to <4 x i32> 586 %and.i.i = and <4 x i32> %1, %0 587 %2 = bitcast <4 x i32> %and.i.i to <4 x float> 588 %3 = bitcast i8 %__U to <8 x i1> 589 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 590 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer 591 ret <4 x float> %4 592 } 593 594 define <4 x double> @test_mm256_mask_xor_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { 595 ; KNL-LABEL: test_mm256_mask_xor_pd: 596 ; KNL: ## %bb.0: ## %entry 597 ; KNL-NEXT: kmovw %edi, %k1 598 ; KNL-NEXT: vpxorq %ymm2, %ymm1, %ymm0 {%k1} 599 ; KNL-NEXT: retq 600 ; 601 ; SKX-LABEL: test_mm256_mask_xor_pd: 602 ; SKX: ## %bb.0: ## %entry 603 ; SKX-NEXT: kmovd %edi, %k1 604 ; SKX-NEXT: vxorpd %ymm2, %ymm1, %ymm0 {%k1} 605 ; SKX-NEXT: retq 606 entry: 607 %0 = bitcast <4 x double> %__A to <4 x i64> 608 %1 = bitcast <4 x double> %__B to <4 x i64> 609 %xor.i.i = xor <4 x i64> %0, %1 610 %2 = bitcast <4 x i64> %xor.i.i to <4 x double> 611 %3 = bitcast i8 %__U to <8 x i1> 612 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 613 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W 614 ret <4 x double> %4 615 } 616 617 define <4 x double> @test_mm256_maskz_xor_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { 618 ; KNL-LABEL: test_mm256_maskz_xor_pd: 619 ; KNL: ## %bb.0: ## %entry 620 ; KNL-NEXT: kmovw %edi, %k1 621 ; KNL-NEXT: vpxorq %ymm1, %ymm0, %ymm0 {%k1} {z} 622 ; KNL-NEXT: retq 623 ; 624 ; SKX-LABEL: test_mm256_maskz_xor_pd: 625 ; SKX: ## %bb.0: ## %entry 626 ; SKX-NEXT: kmovd %edi, %k1 627 ; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 {%k1} {z} 628 ; SKX-NEXT: retq 629 entry: 630 %0 = bitcast <4 x double> %__A to <4 x i64> 631 %1 = bitcast <4 x double> %__B to <4 x i64> 632 %xor.i.i = xor <4 x i64> %0, %1 633 %2 = bitcast <4 x i64> %xor.i.i to <4 x double> 634 %3 = bitcast i8 %__U to <8 x i1> 635 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 636 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer 637 ret <4 x double> %4 638 } 639 640 define <2 x double> @test_mm_mask_xor_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 641 ; KNL-LABEL: test_mm_mask_xor_pd: 642 ; KNL: ## %bb.0: ## %entry 643 ; KNL-NEXT: kmovw %edi, %k1 644 ; KNL-NEXT: vpxorq %xmm2, %xmm1, %xmm0 {%k1} 645 ; KNL-NEXT: retq 646 ; 647 ; SKX-LABEL: test_mm_mask_xor_pd: 648 ; SKX: ## %bb.0: ## %entry 649 ; SKX-NEXT: kmovd %edi, %k1 650 ; SKX-NEXT: vxorpd %xmm2, %xmm1, %xmm0 {%k1} 651 ; SKX-NEXT: retq 652 entry: 653 %0 = bitcast <2 x double> %__A to <2 x i64> 654 %1 = bitcast <2 x double> %__B to <2 x i64> 655 %xor.i.i = xor <2 x i64> %0, %1 656 %2 = bitcast <2 x i64> %xor.i.i to <2 x double> 657 %3 = bitcast i8 %__U to <8 x i1> 658 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 659 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W 660 ret <2 x double> %4 661 } 662 663 define <2 x double> @test_mm_maskz_xor_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 664 ; KNL-LABEL: test_mm_maskz_xor_pd: 665 ; KNL: ## %bb.0: ## %entry 666 ; KNL-NEXT: kmovw %edi, %k1 667 ; KNL-NEXT: vpxorq %xmm1, %xmm0, %xmm0 {%k1} {z} 668 ; KNL-NEXT: retq 669 ; 670 ; SKX-LABEL: test_mm_maskz_xor_pd: 671 ; SKX: ## %bb.0: ## %entry 672 ; SKX-NEXT: kmovd %edi, %k1 673 ; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 {%k1} {z} 674 ; SKX-NEXT: retq 675 entry: 676 %0 = bitcast <2 x double> %__A to <2 x i64> 677 %1 = bitcast <2 x double> %__B to <2 x i64> 678 %xor.i.i = xor <2 x i64> %0, %1 679 %2 = bitcast <2 x i64> %xor.i.i to <2 x double> 680 %3 = bitcast i8 %__U to <8 x i1> 681 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 682 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer 683 ret <2 x double> %4 684 } 685 686 define <8 x float> @test_mm256_mask_xor_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { 687 ; KNL-LABEL: test_mm256_mask_xor_ps: 688 ; KNL: ## %bb.0: ## %entry 689 ; KNL-NEXT: kmovw %edi, %k1 690 ; KNL-NEXT: vpxord %ymm2, %ymm1, %ymm0 {%k1} 691 ; KNL-NEXT: retq 692 ; 693 ; SKX-LABEL: test_mm256_mask_xor_ps: 694 ; SKX: ## %bb.0: ## %entry 695 ; SKX-NEXT: kmovd %edi, %k1 696 ; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm0 {%k1} 697 ; SKX-NEXT: retq 698 entry: 699 %0 = bitcast <8 x float> %__A to <8 x i32> 700 %1 = bitcast <8 x float> %__B to <8 x i32> 701 %xor.i.i = xor <8 x i32> %0, %1 702 %2 = bitcast <8 x i32> %xor.i.i to <8 x float> 703 %3 = bitcast i8 %__U to <8 x i1> 704 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W 705 ret <8 x float> %4 706 } 707 708 define <8 x float> @test_mm256_maskz_xor_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { 709 ; KNL-LABEL: test_mm256_maskz_xor_ps: 710 ; KNL: ## %bb.0: ## %entry 711 ; KNL-NEXT: kmovw %edi, %k1 712 ; KNL-NEXT: vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} 713 ; KNL-NEXT: retq 714 ; 715 ; SKX-LABEL: test_mm256_maskz_xor_ps: 716 ; SKX: ## %bb.0: ## %entry 717 ; SKX-NEXT: kmovd %edi, %k1 718 ; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 {%k1} {z} 719 ; SKX-NEXT: retq 720 entry: 721 %0 = bitcast <8 x float> %__A to <8 x i32> 722 %1 = bitcast <8 x float> %__B to <8 x i32> 723 %xor.i.i = xor <8 x i32> %0, %1 724 %2 = bitcast <8 x i32> %xor.i.i to <8 x float> 725 %3 = bitcast i8 %__U to <8 x i1> 726 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer 727 ret <8 x float> %4 728 } 729 730 define <4 x float> @test_mm_mask_xor_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 731 ; KNL-LABEL: test_mm_mask_xor_ps: 732 ; KNL: ## %bb.0: ## %entry 733 ; KNL-NEXT: kmovw %edi, %k1 734 ; KNL-NEXT: vpxord %xmm2, %xmm1, %xmm0 {%k1} 735 ; KNL-NEXT: retq 736 ; 737 ; SKX-LABEL: test_mm_mask_xor_ps: 738 ; SKX: ## %bb.0: ## %entry 739 ; SKX-NEXT: kmovd %edi, %k1 740 ; SKX-NEXT: vxorps %xmm2, %xmm1, %xmm0 {%k1} 741 ; SKX-NEXT: retq 742 entry: 743 %0 = bitcast <4 x float> %__A to <4 x i32> 744 %1 = bitcast <4 x float> %__B to <4 x i32> 745 %xor.i.i = xor <4 x i32> %0, %1 746 %2 = bitcast <4 x i32> %xor.i.i to <4 x float> 747 %3 = bitcast i8 %__U to <8 x i1> 748 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 749 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W 750 ret <4 x float> %4 751 } 752 753 define <4 x float> @test_mm_maskz_xor_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 754 ; KNL-LABEL: test_mm_maskz_xor_ps: 755 ; KNL: ## %bb.0: ## %entry 756 ; KNL-NEXT: kmovw %edi, %k1 757 ; KNL-NEXT: vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} 758 ; KNL-NEXT: retq 759 ; 760 ; SKX-LABEL: test_mm_maskz_xor_ps: 761 ; SKX: ## %bb.0: ## %entry 762 ; SKX-NEXT: kmovd %edi, %k1 763 ; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 {%k1} {z} 764 ; SKX-NEXT: retq 765 entry: 766 %0 = bitcast <4 x float> %__A to <4 x i32> 767 %1 = bitcast <4 x float> %__B to <4 x i32> 768 %xor.i.i = xor <4 x i32> %0, %1 769 %2 = bitcast <4 x i32> %xor.i.i to <4 x float> 770 %3 = bitcast i8 %__U to <8 x i1> 771 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 772 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer 773 ret <4 x float> %4 774 } 775 776 define <4 x double> @test_mm256_mask_or_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { 777 ; KNL-LABEL: test_mm256_mask_or_pd: 778 ; KNL: ## %bb.0: ## %entry 779 ; KNL-NEXT: kmovw %edi, %k1 780 ; KNL-NEXT: vporq %ymm1, %ymm2, %ymm0 {%k1} 781 ; KNL-NEXT: retq 782 ; 783 ; SKX-LABEL: test_mm256_mask_or_pd: 784 ; SKX: ## %bb.0: ## %entry 785 ; SKX-NEXT: kmovd %edi, %k1 786 ; SKX-NEXT: vorpd %ymm1, %ymm2, %ymm0 {%k1} 787 ; SKX-NEXT: retq 788 entry: 789 %0 = bitcast <4 x double> %__A to <4 x i64> 790 %1 = bitcast <4 x double> %__B to <4 x i64> 791 %or.i.i = or <4 x i64> %1, %0 792 %2 = bitcast <4 x i64> %or.i.i to <4 x double> 793 %3 = bitcast i8 %__U to <8 x i1> 794 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 795 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W 796 ret <4 x double> %4 797 } 798 799 define <4 x double> @test_mm256_maskz_or_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { 800 ; KNL-LABEL: test_mm256_maskz_or_pd: 801 ; KNL: ## %bb.0: ## %entry 802 ; KNL-NEXT: kmovw %edi, %k1 803 ; KNL-NEXT: vporq %ymm0, %ymm1, %ymm0 {%k1} {z} 804 ; KNL-NEXT: retq 805 ; 806 ; SKX-LABEL: test_mm256_maskz_or_pd: 807 ; SKX: ## %bb.0: ## %entry 808 ; SKX-NEXT: kmovd %edi, %k1 809 ; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 {%k1} {z} 810 ; SKX-NEXT: retq 811 entry: 812 %0 = bitcast <4 x double> %__A to <4 x i64> 813 %1 = bitcast <4 x double> %__B to <4 x i64> 814 %or.i.i = or <4 x i64> %1, %0 815 %2 = bitcast <4 x i64> %or.i.i to <4 x double> 816 %3 = bitcast i8 %__U to <8 x i1> 817 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 818 %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer 819 ret <4 x double> %4 820 } 821 822 define <2 x double> @test_mm_mask_or_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 823 ; KNL-LABEL: test_mm_mask_or_pd: 824 ; KNL: ## %bb.0: ## %entry 825 ; KNL-NEXT: kmovw %edi, %k1 826 ; KNL-NEXT: vporq %xmm1, %xmm2, %xmm0 {%k1} 827 ; KNL-NEXT: retq 828 ; 829 ; SKX-LABEL: test_mm_mask_or_pd: 830 ; SKX: ## %bb.0: ## %entry 831 ; SKX-NEXT: kmovd %edi, %k1 832 ; SKX-NEXT: vorpd %xmm1, %xmm2, %xmm0 {%k1} 833 ; SKX-NEXT: retq 834 entry: 835 %0 = bitcast <2 x double> %__A to <2 x i64> 836 %1 = bitcast <2 x double> %__B to <2 x i64> 837 %or.i.i = or <2 x i64> %1, %0 838 %2 = bitcast <2 x i64> %or.i.i to <2 x double> 839 %3 = bitcast i8 %__U to <8 x i1> 840 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 841 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W 842 ret <2 x double> %4 843 } 844 845 define <2 x double> @test_mm_maskz_or_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { 846 ; KNL-LABEL: test_mm_maskz_or_pd: 847 ; KNL: ## %bb.0: ## %entry 848 ; KNL-NEXT: kmovw %edi, %k1 849 ; KNL-NEXT: vporq %xmm0, %xmm1, %xmm0 {%k1} {z} 850 ; KNL-NEXT: retq 851 ; 852 ; SKX-LABEL: test_mm_maskz_or_pd: 853 ; SKX: ## %bb.0: ## %entry 854 ; SKX-NEXT: kmovd %edi, %k1 855 ; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 {%k1} {z} 856 ; SKX-NEXT: retq 857 entry: 858 %0 = bitcast <2 x double> %__A to <2 x i64> 859 %1 = bitcast <2 x double> %__B to <2 x i64> 860 %or.i.i = or <2 x i64> %1, %0 861 %2 = bitcast <2 x i64> %or.i.i to <2 x double> 862 %3 = bitcast i8 %__U to <8 x i1> 863 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 864 %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer 865 ret <2 x double> %4 866 } 867 868 define <8 x float> @test_mm256_mask_or_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { 869 ; KNL-LABEL: test_mm256_mask_or_ps: 870 ; KNL: ## %bb.0: ## %entry 871 ; KNL-NEXT: kmovw %edi, %k1 872 ; KNL-NEXT: vpord %ymm1, %ymm2, %ymm0 {%k1} 873 ; KNL-NEXT: retq 874 ; 875 ; SKX-LABEL: test_mm256_mask_or_ps: 876 ; SKX: ## %bb.0: ## %entry 877 ; SKX-NEXT: kmovd %edi, %k1 878 ; SKX-NEXT: vorps %ymm1, %ymm2, %ymm0 {%k1} 879 ; SKX-NEXT: retq 880 entry: 881 %0 = bitcast <8 x float> %__A to <8 x i32> 882 %1 = bitcast <8 x float> %__B to <8 x i32> 883 %or.i.i = or <8 x i32> %1, %0 884 %2 = bitcast <8 x i32> %or.i.i to <8 x float> 885 %3 = bitcast i8 %__U to <8 x i1> 886 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W 887 ret <8 x float> %4 888 } 889 890 define <8 x float> @test_mm256_maskz_or_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { 891 ; KNL-LABEL: test_mm256_maskz_or_ps: 892 ; KNL: ## %bb.0: ## %entry 893 ; KNL-NEXT: kmovw %edi, %k1 894 ; KNL-NEXT: vpord %ymm0, %ymm1, %ymm0 {%k1} {z} 895 ; KNL-NEXT: retq 896 ; 897 ; SKX-LABEL: test_mm256_maskz_or_ps: 898 ; SKX: ## %bb.0: ## %entry 899 ; SKX-NEXT: kmovd %edi, %k1 900 ; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 {%k1} {z} 901 ; SKX-NEXT: retq 902 entry: 903 %0 = bitcast <8 x float> %__A to <8 x i32> 904 %1 = bitcast <8 x float> %__B to <8 x i32> 905 %or.i.i = or <8 x i32> %1, %0 906 %2 = bitcast <8 x i32> %or.i.i to <8 x float> 907 %3 = bitcast i8 %__U to <8 x i1> 908 %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer 909 ret <8 x float> %4 910 } 911 912 define <4 x float> @test_mm_mask_or_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 913 ; KNL-LABEL: test_mm_mask_or_ps: 914 ; KNL: ## %bb.0: ## %entry 915 ; KNL-NEXT: kmovw %edi, %k1 916 ; KNL-NEXT: vpord %xmm1, %xmm2, %xmm0 {%k1} 917 ; KNL-NEXT: retq 918 ; 919 ; SKX-LABEL: test_mm_mask_or_ps: 920 ; SKX: ## %bb.0: ## %entry 921 ; SKX-NEXT: kmovd %edi, %k1 922 ; SKX-NEXT: vorps %xmm1, %xmm2, %xmm0 {%k1} 923 ; SKX-NEXT: retq 924 entry: 925 %0 = bitcast <4 x float> %__A to <4 x i32> 926 %1 = bitcast <4 x float> %__B to <4 x i32> 927 %or.i.i = or <4 x i32> %1, %0 928 %2 = bitcast <4 x i32> %or.i.i to <4 x float> 929 %3 = bitcast i8 %__U to <8 x i1> 930 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 931 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W 932 ret <4 x float> %4 933 } 934 935 define <4 x float> @test_mm_maskz_or_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { 936 ; KNL-LABEL: test_mm_maskz_or_ps: 937 ; KNL: ## %bb.0: ## %entry 938 ; KNL-NEXT: kmovw %edi, %k1 939 ; KNL-NEXT: vpord %xmm0, %xmm1, %xmm0 {%k1} {z} 940 ; KNL-NEXT: retq 941 ; 942 ; SKX-LABEL: test_mm_maskz_or_ps: 943 ; SKX: ## %bb.0: ## %entry 944 ; SKX-NEXT: kmovd %edi, %k1 945 ; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 {%k1} {z} 946 ; SKX-NEXT: retq 947 entry: 948 %0 = bitcast <4 x float> %__A to <4 x i32> 949 %1 = bitcast <4 x float> %__B to <4 x i32> 950 %or.i.i = or <4 x i32> %1, %0 951 %2 = bitcast <4 x i32> %or.i.i to <4 x float> 952 %3 = bitcast i8 %__U to <8 x i1> 953 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 954 %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer 955 ret <4 x float> %4 956 } 957