1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s 3 4 define i32 @test1(float %x) { 5 ; CHECK-LABEL: test1: 6 ; CHECK: ## BB#0: 7 ; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0] 8 ; CHECK-NEXT: retq ## encoding: [0xc3] 9 %res = bitcast float %x to i32 10 ret i32 %res 11 } 12 13 define <4 x i32> @test2(i32 %x) { 14 ; CHECK-LABEL: test2: 15 ; CHECK: ## BB#0: 16 ; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] 17 ; CHECK-NEXT: retq ## encoding: [0xc3] 18 %res = insertelement <4 x i32>undef, i32 %x, i32 0 19 ret <4 x i32>%res 20 } 21 22 define <2 x i64> @test3(i64 %x) { 23 ; CHECK-LABEL: test3: 24 ; CHECK: ## BB#0: 25 ; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7] 26 ; CHECK-NEXT: retq ## encoding: [0xc3] 27 %res = insertelement <2 x i64>undef, i64 %x, i32 0 28 ret <2 x i64>%res 29 } 30 31 define <4 x i32> @test4(i32* %x) { 32 ; CHECK-LABEL: test4: 33 ; CHECK: ## BB#0: 34 ; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07] 35 ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 36 ; CHECK-NEXT: retq ## encoding: [0xc3] 37 %y = load i32, i32* %x 38 %res = insertelement <4 x i32>undef, i32 %y, i32 0 39 ret <4 x i32>%res 40 } 41 42 define void @test5(float %x, float* %y) { 43 ; CHECK-LABEL: test5: 44 ; CHECK: ## BB#0: 45 ; CHECK-NEXT: vmovss %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x11,0x07] 46 ; CHECK-NEXT: retq ## encoding: [0xc3] 47 store float %x, float* %y, align 4 48 ret void 49 } 50 51 define void @test6(double %x, double* %y) { 52 ; CHECK-LABEL: test6: 53 ; CHECK: ## BB#0: 54 ; CHECK-NEXT: vmovsd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x11,0x07] 55 ; CHECK-NEXT: retq ## encoding: [0xc3] 56 store double %x, double* %y, align 8 57 ret void 58 } 59 60 define float @test7(i32* %x) { 61 ; CHECK-LABEL: test7: 62 ; CHECK: ## BB#0: 63 ; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] 64 ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 65 ; CHECK-NEXT: retq ## encoding: [0xc3] 66 %y = load i32, i32* %x 67 %res = bitcast i32 %y to float 68 ret float %res 69 } 70 71 define i32 @test8(<4 x i32> %x) { 72 ; CHECK-LABEL: test8: 73 ; CHECK: ## BB#0: 74 ; CHECK-NEXT: vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0] 75 ; CHECK-NEXT: retq ## encoding: [0xc3] 76 %res = extractelement <4 x i32> %x, i32 0 77 ret i32 %res 78 } 79 80 define i64 @test9(<2 x i64> %x) { 81 ; CHECK-LABEL: test9: 82 ; CHECK: ## BB#0: 83 ; CHECK-NEXT: vmovq %xmm0, %rax ## encoding: [0x62,0xf1,0xfd,0x08,0x7e,0xc0] 84 ; CHECK-NEXT: retq ## encoding: [0xc3] 85 %res = extractelement <2 x i64> %x, i32 0 86 ret i64 %res 87 } 88 89 define <4 x i32> @test10(i32* %x) { 90 ; CHECK-LABEL: test10: 91 ; CHECK: ## BB#0: 92 ; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07] 93 ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 94 ; CHECK-NEXT: retq ## encoding: [0xc3] 95 %y = load i32, i32* %x, align 4 96 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 97 ret <4 x i32>%res 98 } 99 100 define <4 x float> @test11(float* %x) { 101 ; CHECK-LABEL: test11: 102 ; CHECK: ## BB#0: 103 ; CHECK-NEXT: vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07] 104 ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 105 ; CHECK-NEXT: retq ## encoding: [0xc3] 106 %y = load float, float* %x, align 4 107 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 108 ret <4 x float>%res 109 } 110 111 define <2 x double> @test12(double* %x) { 112 ; CHECK-LABEL: test12: 113 ; CHECK: ## BB#0: 114 ; CHECK-NEXT: vmovsd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x10,0x07] 115 ; CHECK-NEXT: ## xmm0 = mem[0],zero 116 ; CHECK-NEXT: retq ## encoding: [0xc3] 117 %y = load double, double* %x, align 8 118 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 119 ret <2 x double>%res 120 } 121 122 define <2 x i64> @test13(i64 %x) { 123 ; CHECK-LABEL: test13: 124 ; CHECK: ## BB#0: 125 ; CHECK-NEXT: vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7] 126 ; CHECK-NEXT: retq ## encoding: [0xc3] 127 %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 128 ret <2 x i64>%res 129 } 130 131 define <4 x i32> @test14(i32 %x) { 132 ; CHECK-LABEL: test14: 133 ; CHECK: ## BB#0: 134 ; CHECK-NEXT: vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7] 135 ; CHECK-NEXT: retq ## encoding: [0xc3] 136 %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 137 ret <4 x i32>%res 138 } 139 140 define <4 x i32> @test15(i32* %x) { 141 ; CHECK-LABEL: test15: 142 ; CHECK: ## BB#0: 143 ; CHECK-NEXT: vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07] 144 ; CHECK-NEXT: ## xmm0 = mem[0],zero,zero,zero 145 ; CHECK-NEXT: retq ## encoding: [0xc3] 146 %y = load i32, i32* %x, align 4 147 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 148 ret <4 x i32>%res 149 } 150 151 define <16 x i32> @test16(i8 * %addr) { 152 ; CHECK-LABEL: test16: 153 ; CHECK: ## BB#0: 154 ; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x07] 155 ; CHECK-NEXT: retq ## encoding: [0xc3] 156 %vaddr = bitcast i8* %addr to <16 x i32>* 157 %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 158 ret <16 x i32>%res 159 } 160 161 define <16 x i32> @test17(i8 * %addr) { 162 ; CHECK-LABEL: test17: 163 ; CHECK: ## BB#0: 164 ; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6f,0x07] 165 ; CHECK-NEXT: retq ## encoding: [0xc3] 166 %vaddr = bitcast i8* %addr to <16 x i32>* 167 %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 168 ret <16 x i32>%res 169 } 170 171 define void @test18(i8 * %addr, <8 x i64> %data) { 172 ; CHECK-LABEL: test18: 173 ; CHECK: ## BB#0: 174 ; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x07] 175 ; CHECK-NEXT: retq ## encoding: [0xc3] 176 %vaddr = bitcast i8* %addr to <8 x i64>* 177 store <8 x i64>%data, <8 x i64>* %vaddr, align 64 178 ret void 179 } 180 181 define void @test19(i8 * %addr, <16 x i32> %data) { 182 ; CHECK-LABEL: test19: 183 ; CHECK: ## BB#0: 184 ; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x48,0x7f,0x07] 185 ; CHECK-NEXT: retq ## encoding: [0xc3] 186 %vaddr = bitcast i8* %addr to <16 x i32>* 187 store <16 x i32>%data, <16 x i32>* %vaddr, align 1 188 ret void 189 } 190 191 define void @test20(i8 * %addr, <16 x i32> %data) { 192 ; CHECK-LABEL: test20: 193 ; CHECK: ## BB#0: 194 ; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x48,0x7f,0x07] 195 ; CHECK-NEXT: retq ## encoding: [0xc3] 196 %vaddr = bitcast i8* %addr to <16 x i32>* 197 store <16 x i32>%data, <16 x i32>* %vaddr, align 64 198 ret void 199 } 200 201 define <8 x i64> @test21(i8 * %addr) { 202 ; CHECK-LABEL: test21: 203 ; CHECK: ## BB#0: 204 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07] 205 ; CHECK-NEXT: retq ## encoding: [0xc3] 206 %vaddr = bitcast i8* %addr to <8 x i64>* 207 %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 208 ret <8 x i64>%res 209 } 210 211 define void @test22(i8 * %addr, <8 x i64> %data) { 212 ; CHECK-LABEL: test22: 213 ; CHECK: ## BB#0: 214 ; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x07] 215 ; CHECK-NEXT: retq ## encoding: [0xc3] 216 %vaddr = bitcast i8* %addr to <8 x i64>* 217 store <8 x i64>%data, <8 x i64>* %vaddr, align 1 218 ret void 219 } 220 221 define <8 x i64> @test23(i8 * %addr) { 222 ; CHECK-LABEL: test23: 223 ; CHECK: ## BB#0: 224 ; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] 225 ; CHECK-NEXT: retq ## encoding: [0xc3] 226 %vaddr = bitcast i8* %addr to <8 x i64>* 227 %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 228 ret <8 x i64>%res 229 } 230 231 define void @test24(i8 * %addr, <8 x double> %data) { 232 ; CHECK-LABEL: test24: 233 ; CHECK: ## BB#0: 234 ; CHECK-NEXT: vmovapd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x07] 235 ; CHECK-NEXT: retq ## encoding: [0xc3] 236 %vaddr = bitcast i8* %addr to <8 x double>* 237 store <8 x double>%data, <8 x double>* %vaddr, align 64 238 ret void 239 } 240 241 define <8 x double> @test25(i8 * %addr) { 242 ; CHECK-LABEL: test25: 243 ; CHECK: ## BB#0: 244 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07] 245 ; CHECK-NEXT: retq ## encoding: [0xc3] 246 %vaddr = bitcast i8* %addr to <8 x double>* 247 %res = load <8 x double>, <8 x double>* %vaddr, align 64 248 ret <8 x double>%res 249 } 250 251 define void @test26(i8 * %addr, <16 x float> %data) { 252 ; CHECK-LABEL: test26: 253 ; CHECK: ## BB#0: 254 ; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07] 255 ; CHECK-NEXT: retq ## encoding: [0xc3] 256 %vaddr = bitcast i8* %addr to <16 x float>* 257 store <16 x float>%data, <16 x float>* %vaddr, align 64 258 ret void 259 } 260 261 define <16 x float> @test27(i8 * %addr) { 262 ; CHECK-LABEL: test27: 263 ; CHECK: ## BB#0: 264 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07] 265 ; CHECK-NEXT: retq ## encoding: [0xc3] 266 %vaddr = bitcast i8* %addr to <16 x float>* 267 %res = load <16 x float>, <16 x float>* %vaddr, align 64 268 ret <16 x float>%res 269 } 270 271 define void @test28(i8 * %addr, <8 x double> %data) { 272 ; CHECK-LABEL: test28: 273 ; CHECK: ## BB#0: 274 ; CHECK-NEXT: vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07] 275 ; CHECK-NEXT: retq ## encoding: [0xc3] 276 %vaddr = bitcast i8* %addr to <8 x double>* 277 store <8 x double>%data, <8 x double>* %vaddr, align 1 278 ret void 279 } 280 281 define <8 x double> @test29(i8 * %addr) { 282 ; CHECK-LABEL: test29: 283 ; CHECK: ## BB#0: 284 ; CHECK-NEXT: vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07] 285 ; CHECK-NEXT: retq ## encoding: [0xc3] 286 %vaddr = bitcast i8* %addr to <8 x double>* 287 %res = load <8 x double>, <8 x double>* %vaddr, align 1 288 ret <8 x double>%res 289 } 290 291 define void @test30(i8 * %addr, <16 x float> %data) { 292 ; CHECK-LABEL: test30: 293 ; CHECK: ## BB#0: 294 ; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07] 295 ; CHECK-NEXT: retq ## encoding: [0xc3] 296 %vaddr = bitcast i8* %addr to <16 x float>* 297 store <16 x float>%data, <16 x float>* %vaddr, align 1 298 ret void 299 } 300 301 define <16 x float> @test31(i8 * %addr) { 302 ; CHECK-LABEL: test31: 303 ; CHECK: ## BB#0: 304 ; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07] 305 ; CHECK-NEXT: retq ## encoding: [0xc3] 306 %vaddr = bitcast i8* %addr to <16 x float>* 307 %res = load <16 x float>, <16 x float>* %vaddr, align 1 308 ret <16 x float>%res 309 } 310 311 define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 312 ; CHECK-LABEL: test32: 313 ; CHECK: ## BB#0: 314 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 315 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] 316 ; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] 317 ; CHECK-NEXT: retq ## encoding: [0xc3] 318 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 319 %vaddr = bitcast i8* %addr to <16 x i32>* 320 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 321 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 322 ret <16 x i32>%res 323 } 324 325 define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 326 ; CHECK-LABEL: test33: 327 ; CHECK: ## BB#0: 328 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 329 ; CHECK-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04] 330 ; CHECK-NEXT: vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07] 331 ; CHECK-NEXT: retq ## encoding: [0xc3] 332 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 333 %vaddr = bitcast i8* %addr to <16 x i32>* 334 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 335 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 336 ret <16 x i32>%res 337 } 338 339 define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) { 340 ; CHECK-LABEL: test34: 341 ; CHECK: ## BB#0: 342 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 343 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04] 344 ; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x07] 345 ; CHECK-NEXT: retq ## encoding: [0xc3] 346 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 347 %vaddr = bitcast i8* %addr to <16 x i32>* 348 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 349 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 350 ret <16 x i32>%res 351 } 352 353 define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) { 354 ; CHECK-LABEL: test35: 355 ; CHECK: ## BB#0: 356 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 357 ; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04] 358 ; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x07] 359 ; CHECK-NEXT: retq ## encoding: [0xc3] 360 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 361 %vaddr = bitcast i8* %addr to <16 x i32>* 362 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 363 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 364 ret <16 x i32>%res 365 } 366 367 define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 368 ; CHECK-LABEL: test36: 369 ; CHECK: ## BB#0: 370 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 371 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] 372 ; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] 373 ; CHECK-NEXT: retq ## encoding: [0xc3] 374 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 375 %vaddr = bitcast i8* %addr to <8 x i64>* 376 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 377 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 378 ret <8 x i64>%res 379 } 380 381 define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 382 ; CHECK-LABEL: test37: 383 ; CHECK: ## BB#0: 384 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 385 ; CHECK-NEXT: vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04] 386 ; CHECK-NEXT: vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07] 387 ; CHECK-NEXT: retq ## encoding: [0xc3] 388 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 389 %vaddr = bitcast i8* %addr to <8 x i64>* 390 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 391 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 392 ret <8 x i64>%res 393 } 394 395 define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) { 396 ; CHECK-LABEL: test38: 397 ; CHECK: ## BB#0: 398 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 399 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04] 400 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x07] 401 ; CHECK-NEXT: retq ## encoding: [0xc3] 402 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 403 %vaddr = bitcast i8* %addr to <8 x i64>* 404 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 405 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 406 ret <8 x i64>%res 407 } 408 409 define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) { 410 ; CHECK-LABEL: test39: 411 ; CHECK: ## BB#0: 412 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 413 ; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04] 414 ; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x07] 415 ; CHECK-NEXT: retq ## encoding: [0xc3] 416 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 417 %vaddr = bitcast i8* %addr to <8 x i64>* 418 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 419 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 420 ret <8 x i64>%res 421 } 422 423 define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 424 ; CHECK-LABEL: test40: 425 ; CHECK: ## BB#0: 426 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 427 ; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] 428 ; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] 429 ; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] 430 ; CHECK-NEXT: retq ## encoding: [0xc3] 431 %mask = fcmp one <16 x float> %mask1, zeroinitializer 432 %vaddr = bitcast i8* %addr to <16 x float>* 433 %r = load <16 x float>, <16 x float>* %vaddr, align 64 434 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 435 ret <16 x float>%res 436 } 437 438 define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 439 ; CHECK-LABEL: test41: 440 ; CHECK: ## BB#0: 441 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 442 ; CHECK-NEXT: vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07] 443 ; CHECK-NEXT: vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04] 444 ; CHECK-NEXT: vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07] 445 ; CHECK-NEXT: retq ## encoding: [0xc3] 446 %mask = fcmp one <16 x float> %mask1, zeroinitializer 447 %vaddr = bitcast i8* %addr to <16 x float>* 448 %r = load <16 x float>, <16 x float>* %vaddr, align 1 449 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 450 ret <16 x float>%res 451 } 452 453 define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) { 454 ; CHECK-LABEL: test42: 455 ; CHECK: ## BB#0: 456 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 457 ; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07] 458 ; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04] 459 ; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07] 460 ; CHECK-NEXT: retq ## encoding: [0xc3] 461 %mask = fcmp one <16 x float> %mask1, zeroinitializer 462 %vaddr = bitcast i8* %addr to <16 x float>* 463 %r = load <16 x float>, <16 x float>* %vaddr, align 64 464 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 465 ret <16 x float>%res 466 } 467 468 define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) { 469 ; CHECK-LABEL: test43: 470 ; CHECK: ## BB#0: 471 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 472 ; CHECK-NEXT: vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07] 473 ; CHECK-NEXT: vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04] 474 ; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07] 475 ; CHECK-NEXT: retq ## encoding: [0xc3] 476 %mask = fcmp one <16 x float> %mask1, zeroinitializer 477 %vaddr = bitcast i8* %addr to <16 x float>* 478 %r = load <16 x float>, <16 x float>* %vaddr, align 1 479 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 480 ret <16 x float>%res 481 } 482 483 define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 484 ; CHECK-LABEL: test44: 485 ; CHECK: ## BB#0: 486 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 487 ; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] 488 ; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] 489 ; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] 490 ; CHECK-NEXT: retq ## encoding: [0xc3] 491 %mask = fcmp one <8 x double> %mask1, zeroinitializer 492 %vaddr = bitcast i8* %addr to <8 x double>* 493 %r = load <8 x double>, <8 x double>* %vaddr, align 64 494 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 495 ret <8 x double>%res 496 } 497 498 define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 499 ; CHECK-LABEL: test45: 500 ; CHECK: ## BB#0: 501 ; CHECK-NEXT: vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2] 502 ; CHECK-NEXT: vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07] 503 ; CHECK-NEXT: vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04] 504 ; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07] 505 ; CHECK-NEXT: retq ## encoding: [0xc3] 506 %mask = fcmp one <8 x double> %mask1, zeroinitializer 507 %vaddr = bitcast i8* %addr to <8 x double>* 508 %r = load <8 x double>, <8 x double>* %vaddr, align 1 509 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 510 ret <8 x double>%res 511 } 512 513 define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) { 514 ; CHECK-LABEL: test46: 515 ; CHECK: ## BB#0: 516 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 517 ; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07] 518 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04] 519 ; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07] 520 ; CHECK-NEXT: retq ## encoding: [0xc3] 521 %mask = fcmp one <8 x double> %mask1, zeroinitializer 522 %vaddr = bitcast i8* %addr to <8 x double>* 523 %r = load <8 x double>, <8 x double>* %vaddr, align 64 524 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 525 ret <8 x double>%res 526 } 527 528 define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) { 529 ; CHECK-LABEL: test47: 530 ; CHECK: ## BB#0: 531 ; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9] 532 ; CHECK-NEXT: vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07] 533 ; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04] 534 ; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07] 535 ; CHECK-NEXT: retq ## encoding: [0xc3] 536 %mask = fcmp one <8 x double> %mask1, zeroinitializer 537 %vaddr = bitcast i8* %addr to <8 x double>* 538 %r = load <8 x double>, <8 x double>* %vaddr, align 1 539 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 540 ret <8 x double>%res 541 } 542