1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s 2 3 ; CHECK-LABEL: @test1 4 ; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 5 ; CHECK: ret 6 define i32 @test1(float %x) { 7 %res = bitcast float %x to i32 8 ret i32 %res 9 } 10 11 ; CHECK-LABEL: @test2 12 ; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 13 ; CHECK: ret 14 define <4 x i32> @test2(i32 %x) { 15 %res = insertelement <4 x i32>undef, i32 %x, i32 0 16 ret <4 x i32>%res 17 } 18 19 ; CHECK-LABEL: @test3 20 ; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 21 ; CHECK: ret 22 define <2 x i64> @test3(i64 %x) { 23 %res = insertelement <2 x i64>undef, i64 %x, i32 0 24 ret <2 x i64>%res 25 } 26 27 ; CHECK-LABEL: @test4 28 ; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 29 ; CHECK: ret 30 define <4 x i32> @test4(i32* %x) { 31 %y = load i32, i32* %x 32 %res = insertelement <4 x i32>undef, i32 %y, i32 0 33 ret <4 x i32>%res 34 } 35 36 ; CHECK-LABEL: @test5 37 ; CHECK: vmovss %xmm0, (%rdi) ## encoding: [0x62 38 ; CHECK: ret 39 define void @test5(float %x, float* %y) { 40 store float %x, float* %y, align 4 41 ret void 42 } 43 44 ; CHECK-LABEL: @test6 45 ; CHECK: vmovsd %xmm0, (%rdi) ## encoding: [0x62 46 ; CHECK: ret 47 define void @test6(double %x, double* %y) { 48 store double %x, double* %y, align 8 49 ret void 50 } 51 52 ; CHECK-LABEL: @test7 53 ; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 54 ; CHECK: ret 55 define float @test7(i32* %x) { 56 %y = load i32, i32* %x 57 %res = bitcast i32 %y to float 58 ret float %res 59 } 60 61 ; CHECK-LABEL: @test8 62 ; CHECK: vmovd %xmm0, %eax ## encoding: [0x62 63 ; CHECK: ret 64 define i32 @test8(<4 x i32> %x) { 65 %res = extractelement <4 x i32> %x, i32 0 66 ret i32 %res 67 } 68 69 ; CHECK-LABEL: @test9 70 ; CHECK: vmovq %xmm0, %rax ## encoding: [0x62 71 ; CHECK: ret 72 define i64 @test9(<2 x i64> %x) { 73 %res = extractelement <2 x i64> %x, i32 0 74 ret i64 %res 75 } 76 77 ; CHECK-LABEL: @test10 78 ; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 79 ; CHECK: ret 80 define <4 x i32> @test10(i32* %x) { 81 %y = load i32, i32* %x, align 4 82 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 83 ret <4 x i32>%res 84 } 85 86 ; CHECK-LABEL: @test11 87 ; CHECK: vmovss (%rdi), %xmm0 ## encoding: [0x62 88 ; CHECK: ret 89 define <4 x float> @test11(float* %x) { 90 %y = load float, float* %x, align 4 91 %res = insertelement <4 x float>zeroinitializer, float %y, i32 0 92 ret <4 x float>%res 93 } 94 95 ; CHECK-LABEL: @test12 96 ; CHECK: vmovsd (%rdi), %xmm0 ## encoding: [0x62 97 ; CHECK: ret 98 define <2 x double> @test12(double* %x) { 99 %y = load double, double* %x, align 8 100 %res = insertelement <2 x double>zeroinitializer, double %y, i32 0 101 ret <2 x double>%res 102 } 103 104 ; CHECK-LABEL: @test13 105 ; CHECK: vmovq %rdi, %xmm0 ## encoding: [0x62 106 ; CHECK: ret 107 define <2 x i64> @test13(i64 %x) { 108 %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0 109 ret <2 x i64>%res 110 } 111 112 ; CHECK-LABEL: @test14 113 ; CHECK: vmovd %edi, %xmm0 ## encoding: [0x62 114 ; CHECK: ret 115 define <4 x i32> @test14(i32 %x) { 116 %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0 117 ret <4 x i32>%res 118 } 119 120 ; CHECK-LABEL: @test15 121 ; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62 122 ; CHECK: ret 123 define <4 x i32> @test15(i32* %x) { 124 %y = load i32, i32* %x, align 4 125 %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0 126 ret <4 x i32>%res 127 } 128 129 ; CHECK-LABEL: test16 130 ; CHECK: vmovdqu32 131 ; CHECK: ret 132 define <16 x i32> @test16(i8 * %addr) { 133 %vaddr = bitcast i8* %addr to <16 x i32>* 134 %res = load <16 x i32>, <16 x i32>* %vaddr, align 1 135 ret <16 x i32>%res 136 } 137 138 ; CHECK-LABEL: test17 139 ; CHECK: vmovdqa32 140 ; CHECK: ret 141 define <16 x i32> @test17(i8 * %addr) { 142 %vaddr = bitcast i8* %addr to <16 x i32>* 143 %res = load <16 x i32>, <16 x i32>* %vaddr, align 64 144 ret <16 x i32>%res 145 } 146 147 ; CHECK-LABEL: test18 148 ; CHECK: vmovdqa64 149 ; CHECK: ret 150 define void @test18(i8 * %addr, <8 x i64> %data) { 151 %vaddr = bitcast i8* %addr to <8 x i64>* 152 store <8 x i64>%data, <8 x i64>* %vaddr, align 64 153 ret void 154 } 155 156 ; CHECK-LABEL: test19 157 ; CHECK: vmovdqu32 158 ; CHECK: ret 159 define void @test19(i8 * %addr, <16 x i32> %data) { 160 %vaddr = bitcast i8* %addr to <16 x i32>* 161 store <16 x i32>%data, <16 x i32>* %vaddr, align 1 162 ret void 163 } 164 165 ; CHECK-LABEL: test20 166 ; CHECK: vmovdqa32 167 ; CHECK: ret 168 define void @test20(i8 * %addr, <16 x i32> %data) { 169 %vaddr = bitcast i8* %addr to <16 x i32>* 170 store <16 x i32>%data, <16 x i32>* %vaddr, align 64 171 ret void 172 } 173 174 ; CHECK-LABEL: test21 175 ; CHECK: vmovdqa64 176 ; CHECK: ret 177 define <8 x i64> @test21(i8 * %addr) { 178 %vaddr = bitcast i8* %addr to <8 x i64>* 179 %res = load <8 x i64>, <8 x i64>* %vaddr, align 64 180 ret <8 x i64>%res 181 } 182 183 ; CHECK-LABEL: test22 184 ; CHECK: vmovdqu64 185 ; CHECK: ret 186 define void @test22(i8 * %addr, <8 x i64> %data) { 187 %vaddr = bitcast i8* %addr to <8 x i64>* 188 store <8 x i64>%data, <8 x i64>* %vaddr, align 1 189 ret void 190 } 191 192 ; CHECK-LABEL: test23 193 ; CHECK: vmovdqu64 194 ; CHECK: ret 195 define <8 x i64> @test23(i8 * %addr) { 196 %vaddr = bitcast i8* %addr to <8 x i64>* 197 %res = load <8 x i64>, <8 x i64>* %vaddr, align 1 198 ret <8 x i64>%res 199 } 200 201 ; CHECK-LABEL: test24 202 ; CHECK: vmovapd 203 ; CHECK: ret 204 define void @test24(i8 * %addr, <8 x double> %data) { 205 %vaddr = bitcast i8* %addr to <8 x double>* 206 store <8 x double>%data, <8 x double>* %vaddr, align 64 207 ret void 208 } 209 210 ; CHECK-LABEL: test25 211 ; CHECK: vmovapd 212 ; CHECK: ret 213 define <8 x double> @test25(i8 * %addr) { 214 %vaddr = bitcast i8* %addr to <8 x double>* 215 %res = load <8 x double>, <8 x double>* %vaddr, align 64 216 ret <8 x double>%res 217 } 218 219 ; CHECK-LABEL: test26 220 ; CHECK: vmovaps 221 ; CHECK: ret 222 define void @test26(i8 * %addr, <16 x float> %data) { 223 %vaddr = bitcast i8* %addr to <16 x float>* 224 store <16 x float>%data, <16 x float>* %vaddr, align 64 225 ret void 226 } 227 228 ; CHECK-LABEL: test27 229 ; CHECK: vmovaps 230 ; CHECK: ret 231 define <16 x float> @test27(i8 * %addr) { 232 %vaddr = bitcast i8* %addr to <16 x float>* 233 %res = load <16 x float>, <16 x float>* %vaddr, align 64 234 ret <16 x float>%res 235 } 236 237 ; CHECK-LABEL: test28 238 ; CHECK: vmovupd 239 ; CHECK: ret 240 define void @test28(i8 * %addr, <8 x double> %data) { 241 %vaddr = bitcast i8* %addr to <8 x double>* 242 store <8 x double>%data, <8 x double>* %vaddr, align 1 243 ret void 244 } 245 246 ; CHECK-LABEL: test29 247 ; CHECK: vmovupd 248 ; CHECK: ret 249 define <8 x double> @test29(i8 * %addr) { 250 %vaddr = bitcast i8* %addr to <8 x double>* 251 %res = load <8 x double>, <8 x double>* %vaddr, align 1 252 ret <8 x double>%res 253 } 254 255 ; CHECK-LABEL: test30 256 ; CHECK: vmovups 257 ; CHECK: ret 258 define void @test30(i8 * %addr, <16 x float> %data) { 259 %vaddr = bitcast i8* %addr to <16 x float>* 260 store <16 x float>%data, <16 x float>* %vaddr, align 1 261 ret void 262 } 263 264 ; CHECK-LABEL: test31 265 ; CHECK: vmovups 266 ; CHECK: ret 267 define <16 x float> @test31(i8 * %addr) { 268 %vaddr = bitcast i8* %addr to <16 x float>* 269 %res = load <16 x float>, <16 x float>* %vaddr, align 1 270 ret <16 x float>%res 271 } 272 273 ; CHECK-LABEL: test32 274 ; CHECK: vmovdqa32{{.*{%k[1-7]} }} 275 ; CHECK: ret 276 define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 277 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 278 %vaddr = bitcast i8* %addr to <16 x i32>* 279 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 280 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 281 ret <16 x i32>%res 282 } 283 284 ; CHECK-LABEL: test33 285 ; CHECK: vmovdqu32{{.*{%k[1-7]} }} 286 ; CHECK: ret 287 define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) { 288 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 289 %vaddr = bitcast i8* %addr to <16 x i32>* 290 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 291 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old 292 ret <16 x i32>%res 293 } 294 295 ; CHECK-LABEL: test34 296 ; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }} 297 ; CHECK: ret 298 define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) { 299 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 300 %vaddr = bitcast i8* %addr to <16 x i32>* 301 %r = load <16 x i32>, <16 x i32>* %vaddr, align 64 302 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 303 ret <16 x i32>%res 304 } 305 306 ; CHECK-LABEL: test35 307 ; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }} 308 ; CHECK: ret 309 define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) { 310 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 311 %vaddr = bitcast i8* %addr to <16 x i32>* 312 %r = load <16 x i32>, <16 x i32>* %vaddr, align 1 313 %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer 314 ret <16 x i32>%res 315 } 316 317 ; CHECK-LABEL: test36 318 ; CHECK: vmovdqa64{{.*{%k[1-7]} }} 319 ; CHECK: ret 320 define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 321 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 322 %vaddr = bitcast i8* %addr to <8 x i64>* 323 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 324 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 325 ret <8 x i64>%res 326 } 327 328 ; CHECK-LABEL: test37 329 ; CHECK: vmovdqu64{{.*{%k[1-7]} }} 330 ; CHECK: ret 331 define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) { 332 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 333 %vaddr = bitcast i8* %addr to <8 x i64>* 334 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 335 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old 336 ret <8 x i64>%res 337 } 338 339 ; CHECK-LABEL: test38 340 ; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }} 341 ; CHECK: ret 342 define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) { 343 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 344 %vaddr = bitcast i8* %addr to <8 x i64>* 345 %r = load <8 x i64>, <8 x i64>* %vaddr, align 64 346 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 347 ret <8 x i64>%res 348 } 349 350 ; CHECK-LABEL: test39 351 ; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }} 352 ; CHECK: ret 353 define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) { 354 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 355 %vaddr = bitcast i8* %addr to <8 x i64>* 356 %r = load <8 x i64>, <8 x i64>* %vaddr, align 1 357 %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer 358 ret <8 x i64>%res 359 } 360 361 ; CHECK-LABEL: test40 362 ; CHECK: vmovaps{{.*{%k[1-7]} }} 363 ; CHECK: ret 364 define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 365 %mask = fcmp one <16 x float> %mask1, zeroinitializer 366 %vaddr = bitcast i8* %addr to <16 x float>* 367 %r = load <16 x float>, <16 x float>* %vaddr, align 64 368 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 369 ret <16 x float>%res 370 } 371 372 ; CHECK-LABEL: test41 373 ; CHECK: vmovups{{.*{%k[1-7]} }} 374 ; CHECK: ret 375 define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) { 376 %mask = fcmp one <16 x float> %mask1, zeroinitializer 377 %vaddr = bitcast i8* %addr to <16 x float>* 378 %r = load <16 x float>, <16 x float>* %vaddr, align 1 379 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old 380 ret <16 x float>%res 381 } 382 383 ; CHECK-LABEL: test42 384 ; CHECK: vmovaps{{.*{%k[1-7]} {z} }} 385 ; CHECK: ret 386 define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) { 387 %mask = fcmp one <16 x float> %mask1, zeroinitializer 388 %vaddr = bitcast i8* %addr to <16 x float>* 389 %r = load <16 x float>, <16 x float>* %vaddr, align 64 390 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 391 ret <16 x float>%res 392 } 393 394 ; CHECK-LABEL: test43 395 ; CHECK: vmovups{{.*{%k[1-7]} {z} }} 396 ; CHECK: ret 397 define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) { 398 %mask = fcmp one <16 x float> %mask1, zeroinitializer 399 %vaddr = bitcast i8* %addr to <16 x float>* 400 %r = load <16 x float>, <16 x float>* %vaddr, align 1 401 %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer 402 ret <16 x float>%res 403 } 404 405 ; CHECK-LABEL: test44 406 ; CHECK: vmovapd{{.*{%k[1-7]} }} 407 ; CHECK: ret 408 define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 409 %mask = fcmp one <8 x double> %mask1, zeroinitializer 410 %vaddr = bitcast i8* %addr to <8 x double>* 411 %r = load <8 x double>, <8 x double>* %vaddr, align 64 412 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 413 ret <8 x double>%res 414 } 415 416 ; CHECK-LABEL: test45 417 ; CHECK: vmovupd{{.*{%k[1-7]} }} 418 ; CHECK: ret 419 define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) { 420 %mask = fcmp one <8 x double> %mask1, zeroinitializer 421 %vaddr = bitcast i8* %addr to <8 x double>* 422 %r = load <8 x double>, <8 x double>* %vaddr, align 1 423 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old 424 ret <8 x double>%res 425 } 426 427 ; CHECK-LABEL: test46 428 ; CHECK: vmovapd{{.*{%k[1-7]} {z} }} 429 ; CHECK: ret 430 define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) { 431 %mask = fcmp one <8 x double> %mask1, zeroinitializer 432 %vaddr = bitcast i8* %addr to <8 x double>* 433 %r = load <8 x double>, <8 x double>* %vaddr, align 64 434 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 435 ret <8 x double>%res 436 } 437 438 ; CHECK-LABEL: test47 439 ; CHECK: vmovupd{{.*{%k[1-7]} {z} }} 440 ; CHECK: ret 441 define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) { 442 %mask = fcmp one <8 x double> %mask1, zeroinitializer 443 %vaddr = bitcast i8* %addr to <8 x double>* 444 %r = load <8 x double>, <8 x double>* %vaddr, align 1 445 %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer 446 ret <8 x double>%res 447 } 448