Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
      3 
      4 define i32 @test1(float %x) {
      5 ; CHECK-LABEL: test1:
      6 ; CHECK:       ## BB#0:
      7 ; CHECK-NEXT:    vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0]
      8 ; CHECK-NEXT:    retq ## encoding: [0xc3]
      9    %res = bitcast float %x to i32
     10    ret i32 %res
     11 }
     12 
     13 define <4 x i32> @test2(i32 %x) {
     14 ; CHECK-LABEL: test2:
     15 ; CHECK:       ## BB#0:
     16 ; CHECK-NEXT:    vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
     17 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     18    %res = insertelement <4 x i32>undef, i32 %x, i32 0
     19    ret <4 x i32>%res
     20 }
     21 
     22 define <2 x i64> @test3(i64 %x) {
     23 ; CHECK-LABEL: test3:
     24 ; CHECK:       ## BB#0:
     25 ; CHECK-NEXT:    vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7]
     26 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     27    %res = insertelement <2 x i64>undef, i64 %x, i32 0
     28    ret <2 x i64>%res
     29 }
     30 
     31 define <4 x i32> @test4(i32* %x) {
     32 ; CHECK-LABEL: test4:
     33 ; CHECK:       ## BB#0:
     34 ; CHECK-NEXT:    vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
     35 ; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
     36 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     37    %y = load i32, i32* %x
     38    %res = insertelement <4 x i32>undef, i32 %y, i32 0
     39    ret <4 x i32>%res
     40 }
     41 
     42 define void @test5(float %x, float* %y) {
     43 ; CHECK-LABEL: test5:
     44 ; CHECK:       ## BB#0:
     45 ; CHECK-NEXT:    vmovss %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x11,0x07]
     46 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     47    store float %x, float* %y, align 4
     48    ret void
     49 }
     50 
     51 define void @test6(double %x, double* %y) {
     52 ; CHECK-LABEL: test6:
     53 ; CHECK:       ## BB#0:
     54 ; CHECK-NEXT:    vmovsd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xff,0x08,0x11,0x07]
     55 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     56    store double %x, double* %y, align 8
     57    ret void
     58 }
     59 
     60 define float @test7(i32* %x) {
     61 ; CHECK-LABEL: test7:
     62 ; CHECK:       ## BB#0:
     63 ; CHECK-NEXT:    vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07]
     64 ; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
     65 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     66    %y = load i32, i32* %x
     67    %res = bitcast i32 %y to float
     68    ret float %res
     69 }
     70 
     71 define i32 @test8(<4 x i32> %x) {
     72 ; CHECK-LABEL: test8:
     73 ; CHECK:       ## BB#0:
     74 ; CHECK-NEXT:    vmovd %xmm0, %eax ## encoding: [0x62,0xf1,0x7d,0x08,0x7e,0xc0]
     75 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     76    %res = extractelement <4 x i32> %x, i32 0
     77    ret i32 %res
     78 }
     79 
     80 define i64 @test9(<2 x i64> %x) {
     81 ; CHECK-LABEL: test9:
     82 ; CHECK:       ## BB#0:
     83 ; CHECK-NEXT:    vmovq %xmm0, %rax ## encoding: [0x62,0xf1,0xfd,0x08,0x7e,0xc0]
     84 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     85    %res = extractelement <2 x i64> %x, i32 0
     86    ret i64 %res
     87 }
     88 
     89 define <4 x i32> @test10(i32* %x) {
     90 ; CHECK-LABEL: test10:
     91 ; CHECK:       ## BB#0:
     92 ; CHECK-NEXT:    vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
     93 ; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
     94 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     95    %y = load i32, i32* %x, align 4
     96    %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
     97    ret <4 x i32>%res
     98 }
     99 
    100 define <4 x float> @test11(float* %x) {
    101 ; CHECK-LABEL: test11:
    102 ; CHECK:       ## BB#0:
    103 ; CHECK-NEXT:    vmovss (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x10,0x07]
    104 ; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
    105 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    106    %y = load float, float* %x, align 4
    107    %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
    108    ret <4 x float>%res
    109 }
    110 
    111 define <2 x double> @test12(double* %x) {
    112 ; CHECK-LABEL: test12:
    113 ; CHECK:       ## BB#0:
    114 ; CHECK-NEXT:    vmovsd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x10,0x07]
    115 ; CHECK-NEXT:    ## xmm0 = mem[0],zero
    116 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    117    %y = load double, double* %x, align 8
    118    %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
    119    ret <2 x double>%res
    120 }
    121 
    122 define <2 x i64> @test13(i64 %x) {
    123 ; CHECK-LABEL: test13:
    124 ; CHECK:       ## BB#0:
    125 ; CHECK-NEXT:    vmovq %rdi, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6e,0xc7]
    126 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    127    %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
    128    ret <2 x i64>%res
    129 }
    130 
    131 define <4 x i32> @test14(i32 %x) {
    132 ; CHECK-LABEL: test14:
    133 ; CHECK:       ## BB#0:
    134 ; CHECK-NEXT:    vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
    135 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    136    %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
    137    ret <4 x i32>%res
    138 }
    139 
    140 define <4 x i32> @test15(i32* %x) {
    141 ; CHECK-LABEL: test15:
    142 ; CHECK:       ## BB#0:
    143 ; CHECK-NEXT:    vmovd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0x07]
    144 ; CHECK-NEXT:    ## xmm0 = mem[0],zero,zero,zero
    145 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    146    %y = load i32, i32* %x, align 4
    147    %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
    148    ret <4 x i32>%res
    149 }
    150 
    151 define <16 x i32> @test16(i8 * %addr) {
    152 ; CHECK-LABEL: test16:
    153 ; CHECK:       ## BB#0:
    154 ; CHECK-NEXT:    vmovdqu32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x07]
    155 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    156   %vaddr = bitcast i8* %addr to <16 x i32>*
    157   %res = load <16 x i32>, <16 x i32>* %vaddr, align 1
    158   ret <16 x i32>%res
    159 }
    160 
    161 define <16 x i32> @test17(i8 * %addr) {
    162 ; CHECK-LABEL: test17:
    163 ; CHECK:       ## BB#0:
    164 ; CHECK-NEXT:    vmovdqa32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6f,0x07]
    165 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    166   %vaddr = bitcast i8* %addr to <16 x i32>*
    167   %res = load <16 x i32>, <16 x i32>* %vaddr, align 64
    168   ret <16 x i32>%res
    169 }
    170 
    171 define void @test18(i8 * %addr, <8 x i64> %data) {
    172 ; CHECK-LABEL: test18:
    173 ; CHECK:       ## BB#0:
    174 ; CHECK-NEXT:    vmovdqa64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x07]
    175 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    176   %vaddr = bitcast i8* %addr to <8 x i64>*
    177   store <8 x i64>%data, <8 x i64>* %vaddr, align 64
    178   ret void
    179 }
    180 
    181 define void @test19(i8 * %addr, <16 x i32> %data) {
    182 ; CHECK-LABEL: test19:
    183 ; CHECK:       ## BB#0:
    184 ; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x48,0x7f,0x07]
    185 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    186   %vaddr = bitcast i8* %addr to <16 x i32>*
    187   store <16 x i32>%data, <16 x i32>* %vaddr, align 1
    188   ret void
    189 }
    190 
    191 define void @test20(i8 * %addr, <16 x i32> %data) {
    192 ; CHECK-LABEL: test20:
    193 ; CHECK:       ## BB#0:
    194 ; CHECK-NEXT:    vmovdqa32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x48,0x7f,0x07]
    195 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    196   %vaddr = bitcast i8* %addr to <16 x i32>*
    197   store <16 x i32>%data, <16 x i32>* %vaddr, align 64
    198   ret void
    199 }
    200 
    201 define  <8 x i64> @test21(i8 * %addr) {
    202 ; CHECK-LABEL: test21:
    203 ; CHECK:       ## BB#0:
    204 ; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
    205 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    206   %vaddr = bitcast i8* %addr to <8 x i64>*
    207   %res = load <8 x i64>, <8 x i64>* %vaddr, align 64
    208   ret <8 x i64>%res
    209 }
    210 
    211 define void @test22(i8 * %addr, <8 x i64> %data) {
    212 ; CHECK-LABEL: test22:
    213 ; CHECK:       ## BB#0:
    214 ; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x07]
    215 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    216   %vaddr = bitcast i8* %addr to <8 x i64>*
    217   store <8 x i64>%data, <8 x i64>* %vaddr, align 1
    218   ret void
    219 }
    220 
    221 define <8 x i64> @test23(i8 * %addr) {
    222 ; CHECK-LABEL: test23:
    223 ; CHECK:       ## BB#0:
    224 ; CHECK-NEXT:    vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
    225 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    226   %vaddr = bitcast i8* %addr to <8 x i64>*
    227   %res = load <8 x i64>, <8 x i64>* %vaddr, align 1
    228   ret <8 x i64>%res
    229 }
    230 
    231 define void @test24(i8 * %addr, <8 x double> %data) {
    232 ; CHECK-LABEL: test24:
    233 ; CHECK:       ## BB#0:
    234 ; CHECK-NEXT:    vmovapd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x07]
    235 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    236   %vaddr = bitcast i8* %addr to <8 x double>*
    237   store <8 x double>%data, <8 x double>* %vaddr, align 64
    238   ret void
    239 }
    240 
    241 define <8 x double> @test25(i8 * %addr) {
    242 ; CHECK-LABEL: test25:
    243 ; CHECK:       ## BB#0:
    244 ; CHECK-NEXT:    vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07]
    245 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    246   %vaddr = bitcast i8* %addr to <8 x double>*
    247   %res = load <8 x double>, <8 x double>* %vaddr, align 64
    248   ret <8 x double>%res
    249 }
    250 
    251 define void @test26(i8 * %addr, <16 x float> %data) {
    252 ; CHECK-LABEL: test26:
    253 ; CHECK:       ## BB#0:
    254 ; CHECK-NEXT:    vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07]
    255 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    256   %vaddr = bitcast i8* %addr to <16 x float>*
    257   store <16 x float>%data, <16 x float>* %vaddr, align 64
    258   ret void
    259 }
    260 
    261 define <16 x float> @test27(i8 * %addr) {
    262 ; CHECK-LABEL: test27:
    263 ; CHECK:       ## BB#0:
    264 ; CHECK-NEXT:    vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
    265 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    266   %vaddr = bitcast i8* %addr to <16 x float>*
    267   %res = load <16 x float>, <16 x float>* %vaddr, align 64
    268   ret <16 x float>%res
    269 }
    270 
    271 define void @test28(i8 * %addr, <8 x double> %data) {
    272 ; CHECK-LABEL: test28:
    273 ; CHECK:       ## BB#0:
    274 ; CHECK-NEXT:    vmovupd %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x07]
    275 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    276   %vaddr = bitcast i8* %addr to <8 x double>*
    277   store <8 x double>%data, <8 x double>* %vaddr, align 1
    278   ret void
    279 }
    280 
    281 define <8 x double> @test29(i8 * %addr) {
    282 ; CHECK-LABEL: test29:
    283 ; CHECK:       ## BB#0:
    284 ; CHECK-NEXT:    vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
    285 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    286   %vaddr = bitcast i8* %addr to <8 x double>*
    287   %res = load <8 x double>, <8 x double>* %vaddr, align 1
    288   ret <8 x double>%res
    289 }
    290 
    291 define void @test30(i8 * %addr, <16 x float> %data) {
    292 ; CHECK-LABEL: test30:
    293 ; CHECK:       ## BB#0:
    294 ; CHECK-NEXT:    vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
    295 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    296   %vaddr = bitcast i8* %addr to <16 x float>*
    297   store <16 x float>%data, <16 x float>* %vaddr, align 1
    298   ret void
    299 }
    300 
    301 define <16 x float> @test31(i8 * %addr) {
    302 ; CHECK-LABEL: test31:
    303 ; CHECK:       ## BB#0:
    304 ; CHECK-NEXT:    vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
    305 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    306   %vaddr = bitcast i8* %addr to <16 x float>*
    307   %res = load <16 x float>, <16 x float>* %vaddr, align 1
    308   ret <16 x float>%res
    309 }
    310 
    311 define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
    312 ; CHECK-LABEL: test32:
    313 ; CHECK:       ## BB#0:
    314 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
    315 ; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
    316 ; CHECK-NEXT:    vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07]
    317 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    318   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    319   %vaddr = bitcast i8* %addr to <16 x i32>*
    320   %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
    321   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
    322   ret <16 x i32>%res
    323 }
    324 
    325 define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
    326 ; CHECK-LABEL: test33:
    327 ; CHECK:       ## BB#0:
    328 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
    329 ; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x48,0x1f,0xca,0x04]
    330 ; CHECK-NEXT:    vpblendmd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x64,0x07]
    331 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    332   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    333   %vaddr = bitcast i8* %addr to <16 x i32>*
    334   %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
    335   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
    336   ret <16 x i32>%res
    337 }
    338 
    339 define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
    340 ; CHECK-LABEL: test34:
    341 ; CHECK:       ## BB#0:
    342 ; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
    343 ; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04]
    344 ; CHECK-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x07]
    345 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    346   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    347   %vaddr = bitcast i8* %addr to <16 x i32>*
    348   %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
    349   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
    350   ret <16 x i32>%res
    351 }
    352 
    353 define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
    354 ; CHECK-LABEL: test35:
    355 ; CHECK:       ## BB#0:
    356 ; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
    357 ; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc9,0x04]
    358 ; CHECK-NEXT:    vmovdqu32 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x07]
    359 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    360   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
    361   %vaddr = bitcast i8* %addr to <16 x i32>*
    362   %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
    363   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
    364   ret <16 x i32>%res
    365 }
    366 
    367 define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
    368 ; CHECK-LABEL: test36:
    369 ; CHECK:       ## BB#0:
    370 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
    371 ; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
    372 ; CHECK-NEXT:    vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07]
    373 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    374   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    375   %vaddr = bitcast i8* %addr to <8 x i64>*
    376   %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
    377   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
    378   ret <8 x i64>%res
    379 }
    380 
    381 define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
    382 ; CHECK-LABEL: test37:
    383 ; CHECK:       ## BB#0:
    384 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
    385 ; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x48,0x1f,0xca,0x04]
    386 ; CHECK-NEXT:    vpblendmq (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x64,0x07]
    387 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    388   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    389   %vaddr = bitcast i8* %addr to <8 x i64>*
    390   %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
    391   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
    392   ret <8 x i64>%res
    393 }
    394 
    395 define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
    396 ; CHECK-LABEL: test38:
    397 ; CHECK:       ## BB#0:
    398 ; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
    399 ; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04]
    400 ; CHECK-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x07]
    401 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    402   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    403   %vaddr = bitcast i8* %addr to <8 x i64>*
    404   %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
    405   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
    406   ret <8 x i64>%res
    407 }
    408 
    409 define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
    410 ; CHECK-LABEL: test39:
    411 ; CHECK:       ## BB#0:
    412 ; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
    413 ; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xc9,0x04]
    414 ; CHECK-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x07]
    415 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    416   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
    417   %vaddr = bitcast i8* %addr to <8 x i64>*
    418   %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
    419   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
    420   ret <8 x i64>%res
    421 }
    422 
    423 define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
    424 ; CHECK-LABEL: test40:
    425 ; CHECK:       ## BB#0:
    426 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
    427 ; CHECK-NEXT:    vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
    428 ; CHECK-NEXT:    vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
    429 ; CHECK-NEXT:    vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07]
    430 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    431   %mask = fcmp one <16 x float> %mask1, zeroinitializer
    432   %vaddr = bitcast i8* %addr to <16 x float>*
    433   %r = load <16 x float>, <16 x float>* %vaddr, align 64
    434   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
    435   ret <16 x float>%res
    436 }
    437 
    438 define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
    439 ; CHECK-LABEL: test41:
    440 ; CHECK:       ## BB#0:
    441 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
    442 ; CHECK-NEXT:    vcmpordps %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0x74,0x48,0xc2,0xca,0x07]
    443 ; CHECK-NEXT:    vcmpneqps %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x49,0xc2,0xca,0x04]
    444 ; CHECK-NEXT:    vblendmps (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x65,0x07]
    445 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    446   %mask = fcmp one <16 x float> %mask1, zeroinitializer
    447   %vaddr = bitcast i8* %addr to <16 x float>*
    448   %r = load <16 x float>, <16 x float>* %vaddr, align 1
    449   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
    450   ret <16 x float>%res
    451 }
    452 
    453 define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
    454 ; CHECK-LABEL: test42:
    455 ; CHECK:       ## BB#0:
    456 ; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
    457 ; CHECK-NEXT:    vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
    458 ; CHECK-NEXT:    vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
    459 ; CHECK-NEXT:    vmovaps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x07]
    460 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    461   %mask = fcmp one <16 x float> %mask1, zeroinitializer
    462   %vaddr = bitcast i8* %addr to <16 x float>*
    463   %r = load <16 x float>, <16 x float>* %vaddr, align 64
    464   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
    465   ret <16 x float>%res
    466 }
    467 
    468 define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
    469 ; CHECK-LABEL: test43:
    470 ; CHECK:       ## BB#0:
    471 ; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
    472 ; CHECK-NEXT:    vcmpordps %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xc9,0x07]
    473 ; CHECK-NEXT:    vcmpneqps %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc2,0xc9,0x04]
    474 ; CHECK-NEXT:    vmovups (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x07]
    475 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    476   %mask = fcmp one <16 x float> %mask1, zeroinitializer
    477   %vaddr = bitcast i8* %addr to <16 x float>*
    478   %r = load <16 x float>, <16 x float>* %vaddr, align 1
    479   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
    480   ret <16 x float>%res
    481 }
    482 
    483 define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
    484 ; CHECK-LABEL: test44:
    485 ; CHECK:       ## BB#0:
    486 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
    487 ; CHECK-NEXT:    vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
    488 ; CHECK-NEXT:    vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
    489 ; CHECK-NEXT:    vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07]
    490 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    491   %mask = fcmp one <8 x double> %mask1, zeroinitializer
    492   %vaddr = bitcast i8* %addr to <8 x double>*
    493   %r = load <8 x double>, <8 x double>* %vaddr, align 64
    494   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
    495   ret <8 x double>%res
    496 }
    497 
    498 define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
    499 ; CHECK-LABEL: test45:
    500 ; CHECK:       ## BB#0:
    501 ; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0xef,0xd2]
    502 ; CHECK-NEXT:    vcmpordpd %zmm2, %zmm1, %k1 ## encoding: [0x62,0xf1,0xf5,0x48,0xc2,0xca,0x07]
    503 ; CHECK-NEXT:    vcmpneqpd %zmm2, %zmm1, %k1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0xc2,0xca,0x04]
    504 ; CHECK-NEXT:    vblendmpd (%rdi), %zmm0, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x65,0x07]
    505 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    506   %mask = fcmp one <8 x double> %mask1, zeroinitializer
    507   %vaddr = bitcast i8* %addr to <8 x double>*
    508   %r = load <8 x double>, <8 x double>* %vaddr, align 1
    509   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
    510   ret <8 x double>%res
    511 }
    512 
    513 define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
    514 ; CHECK-LABEL: test46:
    515 ; CHECK:       ## BB#0:
    516 ; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
    517 ; CHECK-NEXT:    vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
    518 ; CHECK-NEXT:    vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
    519 ; CHECK-NEXT:    vmovapd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x07]
    520 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    521   %mask = fcmp one <8 x double> %mask1, zeroinitializer
    522   %vaddr = bitcast i8* %addr to <8 x double>*
    523   %r = load <8 x double>, <8 x double>* %vaddr, align 64
    524   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
    525   ret <8 x double>%res
    526 }
    527 
    528 define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) {
    529 ; CHECK-LABEL: test47:
    530 ; CHECK:       ## BB#0:
    531 ; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xef,0xc9]
    532 ; CHECK-NEXT:    vcmpordpd %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc9,0x07]
    533 ; CHECK-NEXT:    vcmpneqpd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc2,0xc9,0x04]
    534 ; CHECK-NEXT:    vmovupd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x07]
    535 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    536   %mask = fcmp one <8 x double> %mask1, zeroinitializer
    537   %vaddr = bitcast i8* %addr to <8 x double>*
    538   %r = load <8 x double>, <8 x double>* %vaddr, align 1
    539   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
    540   ret <8 x double>%res
    541 }
    542