Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
      3 
      4 define <8 x i32> @test_256_1(i8 * %addr) {
      5 ; CHECK-LABEL: test_256_1:
      6 ; CHECK:       ## %bb.0:
      7 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
      8 ; CHECK-NEXT:    retq ## encoding: [0xc3]
      9   %vaddr = bitcast i8* %addr to <8 x i32>*
     10   %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
     11   ret <8 x i32>%res
     12 }
     13 
     14 define <8 x i32> @test_256_2(i8 * %addr) {
     15 ; CHECK-LABEL: test_256_2:
     16 ; CHECK:       ## %bb.0:
     17 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
     18 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     19   %vaddr = bitcast i8* %addr to <8 x i32>*
     20   %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
     21   ret <8 x i32>%res
     22 }
     23 
     24 define void @test_256_3(i8 * %addr, <4 x i64> %data) {
     25 ; CHECK-LABEL: test_256_3:
     26 ; CHECK:       ## %bb.0:
     27 ; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
     28 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     29   %vaddr = bitcast i8* %addr to <4 x i64>*
     30   store <4 x i64>%data, <4 x i64>* %vaddr, align 32
     31   ret void
     32 }
     33 
     34 define void @test_256_4(i8 * %addr, <8 x i32> %data) {
     35 ; CHECK-LABEL: test_256_4:
     36 ; CHECK:       ## %bb.0:
     37 ; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
     38 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     39   %vaddr = bitcast i8* %addr to <8 x i32>*
     40   store <8 x i32>%data, <8 x i32>* %vaddr, align 1
     41   ret void
     42 }
     43 
     44 define void @test_256_5(i8 * %addr, <8 x i32> %data) {
     45 ; CHECK-LABEL: test_256_5:
     46 ; CHECK:       ## %bb.0:
     47 ; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
     48 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     49   %vaddr = bitcast i8* %addr to <8 x i32>*
     50   store <8 x i32>%data, <8 x i32>* %vaddr, align 32
     51   ret void
     52 }
     53 
     54 define  <4 x i64> @test_256_6(i8 * %addr) {
     55 ; CHECK-LABEL: test_256_6:
     56 ; CHECK:       ## %bb.0:
     57 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
     58 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     59   %vaddr = bitcast i8* %addr to <4 x i64>*
     60   %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
     61   ret <4 x i64>%res
     62 }
     63 
     64 define void @test_256_7(i8 * %addr, <4 x i64> %data) {
     65 ; CHECK-LABEL: test_256_7:
     66 ; CHECK:       ## %bb.0:
     67 ; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
     68 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     69   %vaddr = bitcast i8* %addr to <4 x i64>*
     70   store <4 x i64>%data, <4 x i64>* %vaddr, align 1
     71   ret void
     72 }
     73 
     74 define <4 x i64> @test_256_8(i8 * %addr) {
     75 ; CHECK-LABEL: test_256_8:
     76 ; CHECK:       ## %bb.0:
     77 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
     78 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     79   %vaddr = bitcast i8* %addr to <4 x i64>*
     80   %res = load <4 x i64>, <4 x i64>* %vaddr, align 1
     81   ret <4 x i64>%res
     82 }
     83 
     84 define void @test_256_9(i8 * %addr, <4 x double> %data) {
     85 ; CHECK-LABEL: test_256_9:
     86 ; CHECK:       ## %bb.0:
     87 ; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
     88 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     89   %vaddr = bitcast i8* %addr to <4 x double>*
     90   store <4 x double>%data, <4 x double>* %vaddr, align 32
     91   ret void
     92 }
     93 
     94 define <4 x double> @test_256_10(i8 * %addr) {
     95 ; CHECK-LABEL: test_256_10:
     96 ; CHECK:       ## %bb.0:
     97 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
     98 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     99   %vaddr = bitcast i8* %addr to <4 x double>*
    100   %res = load <4 x double>, <4 x double>* %vaddr, align 32
    101   ret <4 x double>%res
    102 }
    103 
    104 define void @test_256_11(i8 * %addr, <8 x float> %data) {
    105 ; CHECK-LABEL: test_256_11:
    106 ; CHECK:       ## %bb.0:
    107 ; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x07]
    108 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    109   %vaddr = bitcast i8* %addr to <8 x float>*
    110   store <8 x float>%data, <8 x float>* %vaddr, align 32
    111   ret void
    112 }
    113 
    114 define <8 x float> @test_256_12(i8 * %addr) {
    115 ; CHECK-LABEL: test_256_12:
    116 ; CHECK:       ## %bb.0:
    117 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
    118 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    119   %vaddr = bitcast i8* %addr to <8 x float>*
    120   %res = load <8 x float>, <8 x float>* %vaddr, align 32
    121   ret <8 x float>%res
    122 }
    123 
    124 define void @test_256_13(i8 * %addr, <4 x double> %data) {
    125 ; CHECK-LABEL: test_256_13:
    126 ; CHECK:       ## %bb.0:
    127 ; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
    128 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    129   %vaddr = bitcast i8* %addr to <4 x double>*
    130   store <4 x double>%data, <4 x double>* %vaddr, align 1
    131   ret void
    132 }
    133 
    134 define <4 x double> @test_256_14(i8 * %addr) {
    135 ; CHECK-LABEL: test_256_14:
    136 ; CHECK:       ## %bb.0:
    137 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
    138 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    139   %vaddr = bitcast i8* %addr to <4 x double>*
    140   %res = load <4 x double>, <4 x double>* %vaddr, align 1
    141   ret <4 x double>%res
    142 }
    143 
    144 define void @test_256_15(i8 * %addr, <8 x float> %data) {
    145 ; CHECK-LABEL: test_256_15:
    146 ; CHECK:       ## %bb.0:
    147 ; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
    148 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    149   %vaddr = bitcast i8* %addr to <8 x float>*
    150   store <8 x float>%data, <8 x float>* %vaddr, align 1
    151   ret void
    152 }
    153 
    154 define <8 x float> @test_256_16(i8 * %addr) {
    155 ; CHECK-LABEL: test_256_16:
    156 ; CHECK:       ## %bb.0:
    157 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
    158 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    159   %vaddr = bitcast i8* %addr to <8 x float>*
    160   %res = load <8 x float>, <8 x float>* %vaddr, align 1
    161   ret <8 x float>%res
    162 }
    163 
    164 define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
    165 ; CHECK-LABEL: test_256_17:
    166 ; CHECK:       ## %bb.0:
    167 ; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
    168 ; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
    169 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    170   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    171   %vaddr = bitcast i8* %addr to <8 x i32>*
    172   %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
    173   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
    174   ret <8 x i32>%res
    175 }
    176 
    177 define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
    178 ; CHECK-LABEL: test_256_18:
    179 ; CHECK:       ## %bb.0:
    180 ; CHECK-NEXT:    vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9]
    181 ; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x07]
    182 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    183   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    184   %vaddr = bitcast i8* %addr to <8 x i32>*
    185   %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
    186   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
    187   ret <8 x i32>%res
    188 }
    189 
    190 define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
    191 ; CHECK-LABEL: test_256_19:
    192 ; CHECK:       ## %bb.0:
    193 ; CHECK-NEXT:    vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8]
    194 ; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07]
    195 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    196   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    197   %vaddr = bitcast i8* %addr to <8 x i32>*
    198   %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
    199   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
    200   ret <8 x i32>%res
    201 }
    202 
    203 define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
    204 ; CHECK-LABEL: test_256_20:
    205 ; CHECK:       ## %bb.0:
    206 ; CHECK-NEXT:    vptestmd %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc8]
    207 ; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07]
    208 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    209   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    210   %vaddr = bitcast i8* %addr to <8 x i32>*
    211   %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
    212   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
    213   ret <8 x i32>%res
    214 }
    215 
    216 define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
    217 ; CHECK-LABEL: test_256_21:
    218 ; CHECK:       ## %bb.0:
    219 ; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
    220 ; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
    221 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    222   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    223   %vaddr = bitcast i8* %addr to <4 x i64>*
    224   %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
    225   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
    226   ret <4 x i64>%res
    227 }
    228 
    229 define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
    230 ; CHECK-LABEL: test_256_22:
    231 ; CHECK:       ## %bb.0:
    232 ; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
    233 ; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x07]
    234 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    235   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    236   %vaddr = bitcast i8* %addr to <4 x i64>*
    237   %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
    238   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
    239   ret <4 x i64>%res
    240 }
    241 
    242 define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
    243 ; CHECK-LABEL: test_256_23:
    244 ; CHECK:       ## %bb.0:
    245 ; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
    246 ; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07]
    247 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    248   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    249   %vaddr = bitcast i8* %addr to <4 x i64>*
    250   %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
    251   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
    252   ret <4 x i64>%res
    253 }
    254 
    255 define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
    256 ; CHECK-LABEL: test_256_24:
    257 ; CHECK:       ## %bb.0:
    258 ; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
    259 ; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07]
    260 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    261   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    262   %vaddr = bitcast i8* %addr to <4 x i64>*
    263   %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
    264   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
    265   ret <4 x i64>%res
    266 }
    267 
    268 define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
    269 ; CHECK-LABEL: test_256_25:
    270 ; CHECK:       ## %bb.0:
    271 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
    272 ; CHECK-NEXT:    vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
    273 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
    274 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    275   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    276   %vaddr = bitcast i8* %addr to <8 x float>*
    277   %r = load <8 x float>, <8 x float>* %vaddr, align 32
    278   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
    279   ret <8 x float>%res
    280 }
    281 
    282 define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
    283 ; CHECK-LABEL: test_256_26:
    284 ; CHECK:       ## %bb.0:
    285 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
    286 ; CHECK-NEXT:    vcmpneq_oqps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x0c]
    287 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
    288 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    289   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    290   %vaddr = bitcast i8* %addr to <8 x float>*
    291   %r = load <8 x float>, <8 x float>* %vaddr, align 1
    292   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
    293   ret <8 x float>%res
    294 }
    295 
    296 define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
    297 ; CHECK-LABEL: test_256_27:
    298 ; CHECK:       ## %bb.0:
    299 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
    300 ; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
    301 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
    302 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    303   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    304   %vaddr = bitcast i8* %addr to <8 x float>*
    305   %r = load <8 x float>, <8 x float>* %vaddr, align 32
    306   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
    307   ret <8 x float>%res
    308 }
    309 
    310 define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
    311 ; CHECK-LABEL: test_256_28:
    312 ; CHECK:       ## %bb.0:
    313 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
    314 ; CHECK-NEXT:    vcmpneq_oqps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x0c]
    315 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
    316 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    317   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    318   %vaddr = bitcast i8* %addr to <8 x float>*
    319   %r = load <8 x float>, <8 x float>* %vaddr, align 1
    320   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
    321   ret <8 x float>%res
    322 }
    323 
    324 define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
    325 ; CHECK-LABEL: test_256_29:
    326 ; CHECK:       ## %bb.0:
    327 ; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
    328 ; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
    329 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    330   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    331   %vaddr = bitcast i8* %addr to <4 x double>*
    332   %r = load <4 x double>, <4 x double>* %vaddr, align 32
    333   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
    334   ret <4 x double>%res
    335 }
    336 
    337 define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
    338 ; CHECK-LABEL: test_256_30:
    339 ; CHECK:       ## %bb.0:
    340 ; CHECK-NEXT:    vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9]
    341 ; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
    342 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    343   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    344   %vaddr = bitcast i8* %addr to <4 x double>*
    345   %r = load <4 x double>, <4 x double>* %vaddr, align 1
    346   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
    347   ret <4 x double>%res
    348 }
    349 
    350 define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
    351 ; CHECK-LABEL: test_256_31:
    352 ; CHECK:       ## %bb.0:
    353 ; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
    354 ; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07]
    355 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    356   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    357   %vaddr = bitcast i8* %addr to <4 x double>*
    358   %r = load <4 x double>, <4 x double>* %vaddr, align 32
    359   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
    360   ret <4 x double>%res
    361 }
    362 
    363 define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
    364 ; CHECK-LABEL: test_256_32:
    365 ; CHECK:       ## %bb.0:
    366 ; CHECK-NEXT:    vptestmq %ymm0, %ymm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc8]
    367 ; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07]
    368 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    369   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    370   %vaddr = bitcast i8* %addr to <4 x double>*
    371   %r = load <4 x double>, <4 x double>* %vaddr, align 1
    372   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
    373   ret <4 x double>%res
    374 }
    375 
    376 define <4 x i32> @test_128_1(i8 * %addr) {
    377 ; CHECK-LABEL: test_128_1:
    378 ; CHECK:       ## %bb.0:
    379 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
    380 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    381   %vaddr = bitcast i8* %addr to <4 x i32>*
    382   %res = load <4 x i32>, <4 x i32>* %vaddr, align 1
    383   ret <4 x i32>%res
    384 }
    385 
    386 define <4 x i32> @test_128_2(i8 * %addr) {
    387 ; CHECK-LABEL: test_128_2:
    388 ; CHECK:       ## %bb.0:
    389 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
    390 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    391   %vaddr = bitcast i8* %addr to <4 x i32>*
    392   %res = load <4 x i32>, <4 x i32>* %vaddr, align 16
    393   ret <4 x i32>%res
    394 }
    395 
    396 define void @test_128_3(i8 * %addr, <2 x i64> %data) {
    397 ; CHECK-LABEL: test_128_3:
    398 ; CHECK:       ## %bb.0:
    399 ; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
    400 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    401   %vaddr = bitcast i8* %addr to <2 x i64>*
    402   store <2 x i64>%data, <2 x i64>* %vaddr, align 16
    403   ret void
    404 }
    405 
    406 define void @test_128_4(i8 * %addr, <4 x i32> %data) {
    407 ; CHECK-LABEL: test_128_4:
    408 ; CHECK:       ## %bb.0:
    409 ; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
    410 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    411   %vaddr = bitcast i8* %addr to <4 x i32>*
    412   store <4 x i32>%data, <4 x i32>* %vaddr, align 1
    413   ret void
    414 }
    415 
    416 define void @test_128_5(i8 * %addr, <4 x i32> %data) {
    417 ; CHECK-LABEL: test_128_5:
    418 ; CHECK:       ## %bb.0:
    419 ; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
    420 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    421   %vaddr = bitcast i8* %addr to <4 x i32>*
    422   store <4 x i32>%data, <4 x i32>* %vaddr, align 16
    423   ret void
    424 }
    425 
    426 define  <2 x i64> @test_128_6(i8 * %addr) {
    427 ; CHECK-LABEL: test_128_6:
    428 ; CHECK:       ## %bb.0:
    429 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
    430 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    431   %vaddr = bitcast i8* %addr to <2 x i64>*
    432   %res = load <2 x i64>, <2 x i64>* %vaddr, align 16
    433   ret <2 x i64>%res
    434 }
    435 
    436 define void @test_128_7(i8 * %addr, <2 x i64> %data) {
    437 ; CHECK-LABEL: test_128_7:
    438 ; CHECK:       ## %bb.0:
    439 ; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
    440 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    441   %vaddr = bitcast i8* %addr to <2 x i64>*
    442   store <2 x i64>%data, <2 x i64>* %vaddr, align 1
    443   ret void
    444 }
    445 
    446 define <2 x i64> @test_128_8(i8 * %addr) {
    447 ; CHECK-LABEL: test_128_8:
    448 ; CHECK:       ## %bb.0:
    449 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
    450 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    451   %vaddr = bitcast i8* %addr to <2 x i64>*
    452   %res = load <2 x i64>, <2 x i64>* %vaddr, align 1
    453   ret <2 x i64>%res
    454 }
    455 
    456 define void @test_128_9(i8 * %addr, <2 x double> %data) {
    457 ; CHECK-LABEL: test_128_9:
    458 ; CHECK:       ## %bb.0:
    459 ; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
    460 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    461   %vaddr = bitcast i8* %addr to <2 x double>*
    462   store <2 x double>%data, <2 x double>* %vaddr, align 16
    463   ret void
    464 }
    465 
    466 define <2 x double> @test_128_10(i8 * %addr) {
    467 ; CHECK-LABEL: test_128_10:
    468 ; CHECK:       ## %bb.0:
    469 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
    470 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    471   %vaddr = bitcast i8* %addr to <2 x double>*
    472   %res = load <2 x double>, <2 x double>* %vaddr, align 16
    473   ret <2 x double>%res
    474 }
    475 
    476 define void @test_128_11(i8 * %addr, <4 x float> %data) {
    477 ; CHECK-LABEL: test_128_11:
    478 ; CHECK:       ## %bb.0:
    479 ; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07]
    480 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    481   %vaddr = bitcast i8* %addr to <4 x float>*
    482   store <4 x float>%data, <4 x float>* %vaddr, align 16
    483   ret void
    484 }
    485 
    486 define <4 x float> @test_128_12(i8 * %addr) {
    487 ; CHECK-LABEL: test_128_12:
    488 ; CHECK:       ## %bb.0:
    489 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
    490 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    491   %vaddr = bitcast i8* %addr to <4 x float>*
    492   %res = load <4 x float>, <4 x float>* %vaddr, align 16
    493   ret <4 x float>%res
    494 }
    495 
    496 define void @test_128_13(i8 * %addr, <2 x double> %data) {
    497 ; CHECK-LABEL: test_128_13:
    498 ; CHECK:       ## %bb.0:
    499 ; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
    500 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    501   %vaddr = bitcast i8* %addr to <2 x double>*
    502   store <2 x double>%data, <2 x double>* %vaddr, align 1
    503   ret void
    504 }
    505 
    506 define <2 x double> @test_128_14(i8 * %addr) {
    507 ; CHECK-LABEL: test_128_14:
    508 ; CHECK:       ## %bb.0:
    509 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
    510 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    511   %vaddr = bitcast i8* %addr to <2 x double>*
    512   %res = load <2 x double>, <2 x double>* %vaddr, align 1
    513   ret <2 x double>%res
    514 }
    515 
    516 define void @test_128_15(i8 * %addr, <4 x float> %data) {
    517 ; CHECK-LABEL: test_128_15:
    518 ; CHECK:       ## %bb.0:
    519 ; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
    520 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    521   %vaddr = bitcast i8* %addr to <4 x float>*
    522   store <4 x float>%data, <4 x float>* %vaddr, align 1
    523   ret void
    524 }
    525 
    526 define <4 x float> @test_128_16(i8 * %addr) {
    527 ; CHECK-LABEL: test_128_16:
    528 ; CHECK:       ## %bb.0:
    529 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
    530 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    531   %vaddr = bitcast i8* %addr to <4 x float>*
    532   %res = load <4 x float>, <4 x float>* %vaddr, align 1
    533   ret <4 x float>%res
    534 }
    535 
    536 define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
    537 ; CHECK-LABEL: test_128_17:
    538 ; CHECK:       ## %bb.0:
    539 ; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
    540 ; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
    541 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    542   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    543   %vaddr = bitcast i8* %addr to <4 x i32>*
    544   %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
    545   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
    546   ret <4 x i32>%res
    547 }
    548 
    549 define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
    550 ; CHECK-LABEL: test_128_18:
    551 ; CHECK:       ## %bb.0:
    552 ; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
    553 ; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x07]
    554 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    555   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    556   %vaddr = bitcast i8* %addr to <4 x i32>*
    557   %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
    558   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
    559   ret <4 x i32>%res
    560 }
    561 
    562 define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
    563 ; CHECK-LABEL: test_128_19:
    564 ; CHECK:       ## %bb.0:
    565 ; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
    566 ; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07]
    567 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    568   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    569   %vaddr = bitcast i8* %addr to <4 x i32>*
    570   %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
    571   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
    572   ret <4 x i32>%res
    573 }
    574 
    575 define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
    576 ; CHECK-LABEL: test_128_20:
    577 ; CHECK:       ## %bb.0:
    578 ; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
    579 ; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07]
    580 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    581   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    582   %vaddr = bitcast i8* %addr to <4 x i32>*
    583   %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
    584   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
    585   ret <4 x i32>%res
    586 }
    587 
    588 define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
    589 ; CHECK-LABEL: test_128_21:
    590 ; CHECK:       ## %bb.0:
    591 ; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
    592 ; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
    593 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    594   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    595   %vaddr = bitcast i8* %addr to <2 x i64>*
    596   %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
    597   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
    598   ret <2 x i64>%res
    599 }
    600 
    601 define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
    602 ; CHECK-LABEL: test_128_22:
    603 ; CHECK:       ## %bb.0:
    604 ; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
    605 ; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x07]
    606 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    607   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    608   %vaddr = bitcast i8* %addr to <2 x i64>*
    609   %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
    610   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
    611   ret <2 x i64>%res
    612 }
    613 
    614 define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
    615 ; CHECK-LABEL: test_128_23:
    616 ; CHECK:       ## %bb.0:
    617 ; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
    618 ; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07]
    619 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    620   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    621   %vaddr = bitcast i8* %addr to <2 x i64>*
    622   %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
    623   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
    624   ret <2 x i64>%res
    625 }
    626 
    627 define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
    628 ; CHECK-LABEL: test_128_24:
    629 ; CHECK:       ## %bb.0:
    630 ; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
    631 ; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07]
    632 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    633   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    634   %vaddr = bitcast i8* %addr to <2 x i64>*
    635   %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
    636   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
    637   ret <2 x i64>%res
    638 }
    639 
    640 define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
    641 ; CHECK-LABEL: test_128_25:
    642 ; CHECK:       ## %bb.0:
    643 ; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
    644 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
    645 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    646   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    647   %vaddr = bitcast i8* %addr to <4 x float>*
    648   %r = load <4 x float>, <4 x float>* %vaddr, align 16
    649   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
    650   ret <4 x float>%res
    651 }
    652 
    653 define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
    654 ; CHECK-LABEL: test_128_26:
    655 ; CHECK:       ## %bb.0:
    656 ; CHECK-NEXT:    vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9]
    657 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
    658 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    659   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    660   %vaddr = bitcast i8* %addr to <4 x float>*
    661   %r = load <4 x float>, <4 x float>* %vaddr, align 1
    662   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
    663   ret <4 x float>%res
    664 }
    665 
    666 define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
    667 ; CHECK-LABEL: test_128_27:
    668 ; CHECK:       ## %bb.0:
    669 ; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
    670 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07]
    671 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    672   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    673   %vaddr = bitcast i8* %addr to <4 x float>*
    674   %r = load <4 x float>, <4 x float>* %vaddr, align 16
    675   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
    676   ret <4 x float>%res
    677 }
    678 
    679 define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
    680 ; CHECK-LABEL: test_128_28:
    681 ; CHECK:       ## %bb.0:
    682 ; CHECK-NEXT:    vptestmd %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc8]
    683 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07]
    684 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    685   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    686   %vaddr = bitcast i8* %addr to <4 x float>*
    687   %r = load <4 x float>, <4 x float>* %vaddr, align 1
    688   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
    689   ret <4 x float>%res
    690 }
    691 
    692 define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
    693 ; CHECK-LABEL: test_128_29:
    694 ; CHECK:       ## %bb.0:
    695 ; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
    696 ; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
    697 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    698   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    699   %vaddr = bitcast i8* %addr to <2 x double>*
    700   %r = load <2 x double>, <2 x double>* %vaddr, align 16
    701   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
    702   ret <2 x double>%res
    703 }
    704 
    705 define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
    706 ; CHECK-LABEL: test_128_30:
    707 ; CHECK:       ## %bb.0:
    708 ; CHECK-NEXT:    vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9]
    709 ; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
    710 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    711   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    712   %vaddr = bitcast i8* %addr to <2 x double>*
    713   %r = load <2 x double>, <2 x double>* %vaddr, align 1
    714   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
    715   ret <2 x double>%res
    716 }
    717 
    718 define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
    719 ; CHECK-LABEL: test_128_31:
    720 ; CHECK:       ## %bb.0:
    721 ; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
    722 ; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07]
    723 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    724   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    725   %vaddr = bitcast i8* %addr to <2 x double>*
    726   %r = load <2 x double>, <2 x double>* %vaddr, align 16
    727   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
    728   ret <2 x double>%res
    729 }
    730 
    731 define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
    732 ; CHECK-LABEL: test_128_32:
    733 ; CHECK:       ## %bb.0:
    734 ; CHECK-NEXT:    vptestmq %xmm0, %xmm0, %k1 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc8]
    735 ; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07]
    736 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    737   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    738   %vaddr = bitcast i8* %addr to <2 x double>*
    739   %r = load <2 x double>, <2 x double>* %vaddr, align 1
    740   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
    741   ret <2 x double>%res
    742 }
    743 
    744