Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
      3 
      4 define <8 x i32> @test_256_1(i8 * %addr) {
      5 ; CHECK-LABEL: test_256_1:
      6 ; CHECK:       ## BB#0:
      7 ; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07]
      8 ; CHECK-NEXT:    retq ## encoding: [0xc3]
      9   %vaddr = bitcast i8* %addr to <8 x i32>*
     10   %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
     11   ret <8 x i32>%res
     12 }
     13 
     14 define <8 x i32> @test_256_2(i8 * %addr) {
     15 ; CHECK-LABEL: test_256_2:
     16 ; CHECK:       ## BB#0:
     17 ; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07]
     18 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     19   %vaddr = bitcast i8* %addr to <8 x i32>*
     20   %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
     21   ret <8 x i32>%res
     22 }
     23 
     24 define void @test_256_3(i8 * %addr, <4 x i64> %data) {
     25 ; CHECK-LABEL: test_256_3:
     26 ; CHECK:       ## BB#0:
     27 ; CHECK-NEXT:    vmovdqa64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x07]
     28 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     29   %vaddr = bitcast i8* %addr to <4 x i64>*
     30   store <4 x i64>%data, <4 x i64>* %vaddr, align 32
     31   ret void
     32 }
     33 
     34 define void @test_256_4(i8 * %addr, <8 x i32> %data) {
     35 ; CHECK-LABEL: test_256_4:
     36 ; CHECK:       ## BB#0:
     37 ; CHECK-NEXT:    vmovdqu32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x07]
     38 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     39   %vaddr = bitcast i8* %addr to <8 x i32>*
     40   store <8 x i32>%data, <8 x i32>* %vaddr, align 1
     41   ret void
     42 }
     43 
     44 define void @test_256_5(i8 * %addr, <8 x i32> %data) {
     45 ; CHECK-LABEL: test_256_5:
     46 ; CHECK:       ## BB#0:
     47 ; CHECK-NEXT:    vmovdqa32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x07]
     48 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     49   %vaddr = bitcast i8* %addr to <8 x i32>*
     50   store <8 x i32>%data, <8 x i32>* %vaddr, align 32
     51   ret void
     52 }
     53 
     54 define  <4 x i64> @test_256_6(i8 * %addr) {
     55 ; CHECK-LABEL: test_256_6:
     56 ; CHECK:       ## BB#0:
     57 ; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07]
     58 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     59   %vaddr = bitcast i8* %addr to <4 x i64>*
     60   %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
     61   ret <4 x i64>%res
     62 }
     63 
     64 define void @test_256_7(i8 * %addr, <4 x i64> %data) {
     65 ; CHECK-LABEL: test_256_7:
     66 ; CHECK:       ## BB#0:
     67 ; CHECK-NEXT:    vmovdqu64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x07]
     68 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     69   %vaddr = bitcast i8* %addr to <4 x i64>*
     70   store <4 x i64>%data, <4 x i64>* %vaddr, align 1
     71   ret void
     72 }
     73 
     74 define <4 x i64> @test_256_8(i8 * %addr) {
     75 ; CHECK-LABEL: test_256_8:
     76 ; CHECK:       ## BB#0:
     77 ; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07]
     78 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     79   %vaddr = bitcast i8* %addr to <4 x i64>*
     80   %res = load <4 x i64>, <4 x i64>* %vaddr, align 1
     81   ret <4 x i64>%res
     82 }
     83 
     84 define void @test_256_9(i8 * %addr, <4 x double> %data) {
     85 ; CHECK-LABEL: test_256_9:
     86 ; CHECK:       ## BB#0:
     87 ; CHECK-NEXT:    vmovapd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x07]
     88 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     89   %vaddr = bitcast i8* %addr to <4 x double>*
     90   store <4 x double>%data, <4 x double>* %vaddr, align 32
     91   ret void
     92 }
     93 
     94 define <4 x double> @test_256_10(i8 * %addr) {
     95 ; CHECK-LABEL: test_256_10:
     96 ; CHECK:       ## BB#0:
     97 ; CHECK-NEXT:    vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07]
     98 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     99   %vaddr = bitcast i8* %addr to <4 x double>*
    100   %res = load <4 x double>, <4 x double>* %vaddr, align 32
    101   ret <4 x double>%res
    102 }
    103 
    104 define void @test_256_11(i8 * %addr, <8 x float> %data) {
    105 ; CHECK-LABEL: test_256_11:
    106 ; CHECK:       ## BB#0:
    107 ; CHECK-NEXT:    vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
    108 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    109   %vaddr = bitcast i8* %addr to <8 x float>*
    110   store <8 x float>%data, <8 x float>* %vaddr, align 32
    111   ret void
    112 }
    113 
    114 define <8 x float> @test_256_12(i8 * %addr) {
    115 ; CHECK-LABEL: test_256_12:
    116 ; CHECK:       ## BB#0:
    117 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
    118 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    119   %vaddr = bitcast i8* %addr to <8 x float>*
    120   %res = load <8 x float>, <8 x float>* %vaddr, align 32
    121   ret <8 x float>%res
    122 }
    123 
    124 define void @test_256_13(i8 * %addr, <4 x double> %data) {
    125 ; CHECK-LABEL: test_256_13:
    126 ; CHECK:       ## BB#0:
    127 ; CHECK-NEXT:    vmovupd %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x07]
    128 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    129   %vaddr = bitcast i8* %addr to <4 x double>*
    130   store <4 x double>%data, <4 x double>* %vaddr, align 1
    131   ret void
    132 }
    133 
    134 define <4 x double> @test_256_14(i8 * %addr) {
    135 ; CHECK-LABEL: test_256_14:
    136 ; CHECK:       ## BB#0:
    137 ; CHECK-NEXT:    vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07]
    138 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    139   %vaddr = bitcast i8* %addr to <4 x double>*
    140   %res = load <4 x double>, <4 x double>* %vaddr, align 1
    141   ret <4 x double>%res
    142 }
    143 
    144 define void @test_256_15(i8 * %addr, <8 x float> %data) {
    145 ; CHECK-LABEL: test_256_15:
    146 ; CHECK:       ## BB#0:
    147 ; CHECK-NEXT:    vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
    148 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    149   %vaddr = bitcast i8* %addr to <8 x float>*
    150   store <8 x float>%data, <8 x float>* %vaddr, align 1
    151   ret void
    152 }
    153 
    154 define <8 x float> @test_256_16(i8 * %addr) {
    155 ; CHECK-LABEL: test_256_16:
    156 ; CHECK:       ## BB#0:
    157 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
    158 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    159   %vaddr = bitcast i8* %addr to <8 x float>*
    160   %res = load <8 x float>, <8 x float>* %vaddr, align 1
    161   ret <8 x float>%res
    162 }
    163 
    164 define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
    165 ; CHECK-LABEL: test_256_17:
    166 ; CHECK:       ## BB#0:
    167 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
    168 ; CHECK-NEXT:    vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
    169 ; CHECK-NEXT:    vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07]
    170 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    171   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    172   %vaddr = bitcast i8* %addr to <8 x i32>*
    173   %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
    174   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
    175   ret <8 x i32>%res
    176 }
    177 
    178 define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
    179 ; CHECK-LABEL: test_256_18:
    180 ; CHECK:       ## BB#0:
    181 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
    182 ; CHECK-NEXT:    vpcmpneqd %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0x75,0x28,0x1f,0xca,0x04]
    183 ; CHECK-NEXT:    vpblendmd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x64,0x07]
    184 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    185   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    186   %vaddr = bitcast i8* %addr to <8 x i32>*
    187   %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
    188   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
    189   ret <8 x i32>%res
    190 }
    191 
    192 define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
    193 ; CHECK-LABEL: test_256_19:
    194 ; CHECK:       ## BB#0:
    195 ; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
    196 ; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
    197 ; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x07]
    198 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    199   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    200   %vaddr = bitcast i8* %addr to <8 x i32>*
    201   %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
    202   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
    203   ret <8 x i32>%res
    204 }
    205 
    206 define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
    207 ; CHECK-LABEL: test_256_20:
    208 ; CHECK:       ## BB#0:
    209 ; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
    210 ; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x04]
    211 ; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x07]
    212 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    213   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    214   %vaddr = bitcast i8* %addr to <8 x i32>*
    215   %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
    216   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
    217   ret <8 x i32>%res
    218 }
    219 
    220 define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
    221 ; CHECK-LABEL: test_256_21:
    222 ; CHECK:       ## BB#0:
    223 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
    224 ; CHECK-NEXT:    vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
    225 ; CHECK-NEXT:    vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07]
    226 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    227   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    228   %vaddr = bitcast i8* %addr to <4 x i64>*
    229   %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
    230   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
    231   ret <4 x i64>%res
    232 }
    233 
    234 define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
    235 ; CHECK-LABEL: test_256_22:
    236 ; CHECK:       ## BB#0:
    237 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
    238 ; CHECK-NEXT:    vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
    239 ; CHECK-NEXT:    vpblendmq (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x64,0x07]
    240 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    241   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    242   %vaddr = bitcast i8* %addr to <4 x i64>*
    243   %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
    244   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
    245   ret <4 x i64>%res
    246 }
    247 
    248 define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
    249 ; CHECK-LABEL: test_256_23:
    250 ; CHECK:       ## BB#0:
    251 ; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
    252 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
    253 ; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x07]
    254 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    255   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    256   %vaddr = bitcast i8* %addr to <4 x i64>*
    257   %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
    258   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
    259   ret <4 x i64>%res
    260 }
    261 
    262 define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
    263 ; CHECK-LABEL: test_256_24:
    264 ; CHECK:       ## BB#0:
    265 ; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
    266 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
    267 ; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x07]
    268 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    269   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    270   %vaddr = bitcast i8* %addr to <4 x i64>*
    271   %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
    272   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
    273   ret <4 x i64>%res
    274 }
    275 
    276 define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
    277 ; CHECK-LABEL: test_256_25:
    278 ; CHECK:       ## BB#0:
    279 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
    280 ; CHECK-NEXT:    vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
    281 ; CHECK-NEXT:    vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
    282 ; CHECK-NEXT:    vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07]
    283 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    284   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    285   %vaddr = bitcast i8* %addr to <8 x float>*
    286   %r = load <8 x float>, <8 x float>* %vaddr, align 32
    287   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
    288   ret <8 x float>%res
    289 }
    290 
    291 define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
    292 ; CHECK-LABEL: test_256_26:
    293 ; CHECK:       ## BB#0:
    294 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
    295 ; CHECK-NEXT:    vcmpordps %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf1,0x74,0x28,0xc2,0xca,0x07]
    296 ; CHECK-NEXT:    vcmpneqps %ymm2, %ymm1, %k1 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0xc2,0xca,0x04]
    297 ; CHECK-NEXT:    vblendmps (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x65,0x07]
    298 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    299   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    300   %vaddr = bitcast i8* %addr to <8 x float>*
    301   %r = load <8 x float>, <8 x float>* %vaddr, align 1
    302   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
    303   ret <8 x float>%res
    304 }
    305 
    306 define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
    307 ; CHECK-LABEL: test_256_27:
    308 ; CHECK:       ## BB#0:
    309 ; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
    310 ; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
    311 ; CHECK-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
    312 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x07]
    313 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    314   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    315   %vaddr = bitcast i8* %addr to <8 x float>*
    316   %r = load <8 x float>, <8 x float>* %vaddr, align 32
    317   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
    318   ret <8 x float>%res
    319 }
    320 
    321 define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
    322 ; CHECK-LABEL: test_256_28:
    323 ; CHECK:       ## BB#0:
    324 ; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
    325 ; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc9,0x07]
    326 ; CHECK-NEXT:    vcmpneqps %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0xc2,0xc9,0x04]
    327 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x07]
    328 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    329   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    330   %vaddr = bitcast i8* %addr to <8 x float>*
    331   %r = load <8 x float>, <8 x float>* %vaddr, align 1
    332   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
    333   ret <8 x float>%res
    334 }
    335 
    336 define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
    337 ; CHECK-LABEL: test_256_29:
    338 ; CHECK:       ## BB#0:
    339 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
    340 ; CHECK-NEXT:    vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
    341 ; CHECK-NEXT:    vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07]
    342 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    343   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    344   %vaddr = bitcast i8* %addr to <4 x double>*
    345   %r = load <4 x double>, <4 x double>* %vaddr, align 32
    346   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
    347   ret <4 x double>%res
    348 }
    349 
    350 define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
    351 ; CHECK-LABEL: test_256_30:
    352 ; CHECK:       ## BB#0:
    353 ; CHECK-NEXT:    vpxord %ymm2, %ymm2, %ymm2 ## encoding: [0x62,0xf1,0x6d,0x28,0xef,0xd2]
    354 ; CHECK-NEXT:    vpcmpneqq %ymm2, %ymm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x28,0x1f,0xca,0x04]
    355 ; CHECK-NEXT:    vblendmpd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x65,0x07]
    356 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    357   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    358   %vaddr = bitcast i8* %addr to <4 x double>*
    359   %r = load <4 x double>, <4 x double>* %vaddr, align 1
    360   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
    361   ret <4 x double>%res
    362 }
    363 
    364 define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
    365 ; CHECK-LABEL: test_256_31:
    366 ; CHECK:       ## BB#0:
    367 ; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
    368 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
    369 ; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x07]
    370 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    371   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    372   %vaddr = bitcast i8* %addr to <4 x double>*
    373   %r = load <4 x double>, <4 x double>* %vaddr, align 32
    374   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
    375   ret <4 x double>%res
    376 }
    377 
    378 define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
    379 ; CHECK-LABEL: test_256_32:
    380 ; CHECK:       ## BB#0:
    381 ; CHECK-NEXT:    vpxord %ymm1, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xef,0xc9]
    382 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x04]
    383 ; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x07]
    384 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    385   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    386   %vaddr = bitcast i8* %addr to <4 x double>*
    387   %r = load <4 x double>, <4 x double>* %vaddr, align 1
    388   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
    389   ret <4 x double>%res
    390 }
    391 
    392 define <4 x i32> @test_128_1(i8 * %addr) {
    393 ; CHECK-LABEL: test_128_1:
    394 ; CHECK:       ## BB#0:
    395 ; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07]
    396 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    397   %vaddr = bitcast i8* %addr to <4 x i32>*
    398   %res = load <4 x i32>, <4 x i32>* %vaddr, align 1
    399   ret <4 x i32>%res
    400 }
    401 
    402 define <4 x i32> @test_128_2(i8 * %addr) {
    403 ; CHECK-LABEL: test_128_2:
    404 ; CHECK:       ## BB#0:
    405 ; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07]
    406 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    407   %vaddr = bitcast i8* %addr to <4 x i32>*
    408   %res = load <4 x i32>, <4 x i32>* %vaddr, align 16
    409   ret <4 x i32>%res
    410 }
    411 
    412 define void @test_128_3(i8 * %addr, <2 x i64> %data) {
    413 ; CHECK-LABEL: test_128_3:
    414 ; CHECK:       ## BB#0:
    415 ; CHECK-NEXT:    vmovdqa64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x07]
    416 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    417   %vaddr = bitcast i8* %addr to <2 x i64>*
    418   store <2 x i64>%data, <2 x i64>* %vaddr, align 16
    419   ret void
    420 }
    421 
    422 define void @test_128_4(i8 * %addr, <4 x i32> %data) {
    423 ; CHECK-LABEL: test_128_4:
    424 ; CHECK:       ## BB#0:
    425 ; CHECK-NEXT:    vmovdqu32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x07]
    426 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    427   %vaddr = bitcast i8* %addr to <4 x i32>*
    428   store <4 x i32>%data, <4 x i32>* %vaddr, align 1
    429   ret void
    430 }
    431 
    432 define void @test_128_5(i8 * %addr, <4 x i32> %data) {
    433 ; CHECK-LABEL: test_128_5:
    434 ; CHECK:       ## BB#0:
    435 ; CHECK-NEXT:    vmovdqa32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x07]
    436 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    437   %vaddr = bitcast i8* %addr to <4 x i32>*
    438   store <4 x i32>%data, <4 x i32>* %vaddr, align 16
    439   ret void
    440 }
    441 
    442 define  <2 x i64> @test_128_6(i8 * %addr) {
    443 ; CHECK-LABEL: test_128_6:
    444 ; CHECK:       ## BB#0:
    445 ; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07]
    446 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    447   %vaddr = bitcast i8* %addr to <2 x i64>*
    448   %res = load <2 x i64>, <2 x i64>* %vaddr, align 16
    449   ret <2 x i64>%res
    450 }
    451 
    452 define void @test_128_7(i8 * %addr, <2 x i64> %data) {
    453 ; CHECK-LABEL: test_128_7:
    454 ; CHECK:       ## BB#0:
    455 ; CHECK-NEXT:    vmovdqu64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x07]
    456 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    457   %vaddr = bitcast i8* %addr to <2 x i64>*
    458   store <2 x i64>%data, <2 x i64>* %vaddr, align 1
    459   ret void
    460 }
    461 
    462 define <2 x i64> @test_128_8(i8 * %addr) {
    463 ; CHECK-LABEL: test_128_8:
    464 ; CHECK:       ## BB#0:
    465 ; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07]
    466 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    467   %vaddr = bitcast i8* %addr to <2 x i64>*
    468   %res = load <2 x i64>, <2 x i64>* %vaddr, align 1
    469   ret <2 x i64>%res
    470 }
    471 
    472 define void @test_128_9(i8 * %addr, <2 x double> %data) {
    473 ; CHECK-LABEL: test_128_9:
    474 ; CHECK:       ## BB#0:
    475 ; CHECK-NEXT:    vmovapd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x07]
    476 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    477   %vaddr = bitcast i8* %addr to <2 x double>*
    478   store <2 x double>%data, <2 x double>* %vaddr, align 16
    479   ret void
    480 }
    481 
    482 define <2 x double> @test_128_10(i8 * %addr) {
    483 ; CHECK-LABEL: test_128_10:
    484 ; CHECK:       ## BB#0:
    485 ; CHECK-NEXT:    vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07]
    486 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    487   %vaddr = bitcast i8* %addr to <2 x double>*
    488   %res = load <2 x double>, <2 x double>* %vaddr, align 16
    489   ret <2 x double>%res
    490 }
    491 
    492 define void @test_128_11(i8 * %addr, <4 x float> %data) {
    493 ; CHECK-LABEL: test_128_11:
    494 ; CHECK:       ## BB#0:
    495 ; CHECK-NEXT:    vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
    496 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    497   %vaddr = bitcast i8* %addr to <4 x float>*
    498   store <4 x float>%data, <4 x float>* %vaddr, align 16
    499   ret void
    500 }
    501 
    502 define <4 x float> @test_128_12(i8 * %addr) {
    503 ; CHECK-LABEL: test_128_12:
    504 ; CHECK:       ## BB#0:
    505 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
    506 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    507   %vaddr = bitcast i8* %addr to <4 x float>*
    508   %res = load <4 x float>, <4 x float>* %vaddr, align 16
    509   ret <4 x float>%res
    510 }
    511 
    512 define void @test_128_13(i8 * %addr, <2 x double> %data) {
    513 ; CHECK-LABEL: test_128_13:
    514 ; CHECK:       ## BB#0:
    515 ; CHECK-NEXT:    vmovupd %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x07]
    516 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    517   %vaddr = bitcast i8* %addr to <2 x double>*
    518   store <2 x double>%data, <2 x double>* %vaddr, align 1
    519   ret void
    520 }
    521 
    522 define <2 x double> @test_128_14(i8 * %addr) {
    523 ; CHECK-LABEL: test_128_14:
    524 ; CHECK:       ## BB#0:
    525 ; CHECK-NEXT:    vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07]
    526 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    527   %vaddr = bitcast i8* %addr to <2 x double>*
    528   %res = load <2 x double>, <2 x double>* %vaddr, align 1
    529   ret <2 x double>%res
    530 }
    531 
    532 define void @test_128_15(i8 * %addr, <4 x float> %data) {
    533 ; CHECK-LABEL: test_128_15:
    534 ; CHECK:       ## BB#0:
    535 ; CHECK-NEXT:    vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
    536 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    537   %vaddr = bitcast i8* %addr to <4 x float>*
    538   store <4 x float>%data, <4 x float>* %vaddr, align 1
    539   ret void
    540 }
    541 
    542 define <4 x float> @test_128_16(i8 * %addr) {
    543 ; CHECK-LABEL: test_128_16:
    544 ; CHECK:       ## BB#0:
    545 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
    546 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    547   %vaddr = bitcast i8* %addr to <4 x float>*
    548   %res = load <4 x float>, <4 x float>* %vaddr, align 1
    549   ret <4 x float>%res
    550 }
    551 
    552 define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
    553 ; CHECK-LABEL: test_128_17:
    554 ; CHECK:       ## BB#0:
    555 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
    556 ; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
    557 ; CHECK-NEXT:    vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07]
    558 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    559   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    560   %vaddr = bitcast i8* %addr to <4 x i32>*
    561   %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
    562   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
    563   ret <4 x i32>%res
    564 }
    565 
    566 define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
    567 ; CHECK-LABEL: test_128_18:
    568 ; CHECK:       ## BB#0:
    569 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
    570 ; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
    571 ; CHECK-NEXT:    vpblendmd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x64,0x07]
    572 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    573   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    574   %vaddr = bitcast i8* %addr to <4 x i32>*
    575   %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
    576   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
    577   ret <4 x i32>%res
    578 }
    579 
    580 define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
    581 ; CHECK-LABEL: test_128_19:
    582 ; CHECK:       ## BB#0:
    583 ; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
    584 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
    585 ; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x07]
    586 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    587   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    588   %vaddr = bitcast i8* %addr to <4 x i32>*
    589   %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
    590   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
    591   ret <4 x i32>%res
    592 }
    593 
    594 define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
    595 ; CHECK-LABEL: test_128_20:
    596 ; CHECK:       ## BB#0:
    597 ; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
    598 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
    599 ; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x07]
    600 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    601   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    602   %vaddr = bitcast i8* %addr to <4 x i32>*
    603   %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
    604   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
    605   ret <4 x i32>%res
    606 }
    607 
    608 define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
    609 ; CHECK-LABEL: test_128_21:
    610 ; CHECK:       ## BB#0:
    611 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
    612 ; CHECK-NEXT:    vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
    613 ; CHECK-NEXT:    vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07]
    614 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    615   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    616   %vaddr = bitcast i8* %addr to <2 x i64>*
    617   %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
    618   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
    619   ret <2 x i64>%res
    620 }
    621 
    622 define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
    623 ; CHECK-LABEL: test_128_22:
    624 ; CHECK:       ## BB#0:
    625 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
    626 ; CHECK-NEXT:    vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
    627 ; CHECK-NEXT:    vpblendmq (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x64,0x07]
    628 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    629   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    630   %vaddr = bitcast i8* %addr to <2 x i64>*
    631   %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
    632   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
    633   ret <2 x i64>%res
    634 }
    635 
    636 define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
    637 ; CHECK-LABEL: test_128_23:
    638 ; CHECK:       ## BB#0:
    639 ; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
    640 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
    641 ; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x07]
    642 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    643   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    644   %vaddr = bitcast i8* %addr to <2 x i64>*
    645   %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
    646   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
    647   ret <2 x i64>%res
    648 }
    649 
    650 define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
    651 ; CHECK-LABEL: test_128_24:
    652 ; CHECK:       ## BB#0:
    653 ; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
    654 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
    655 ; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x07]
    656 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    657   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    658   %vaddr = bitcast i8* %addr to <2 x i64>*
    659   %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
    660   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
    661   ret <2 x i64>%res
    662 }
    663 
    664 define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
    665 ; CHECK-LABEL: test_128_25:
    666 ; CHECK:       ## BB#0:
    667 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
    668 ; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
    669 ; CHECK-NEXT:    vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07]
    670 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    671   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    672   %vaddr = bitcast i8* %addr to <4 x float>*
    673   %r = load <4 x float>, <4 x float>* %vaddr, align 16
    674   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
    675   ret <4 x float>%res
    676 }
    677 
    678 define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
    679 ; CHECK-LABEL: test_128_26:
    680 ; CHECK:       ## BB#0:
    681 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
    682 ; CHECK-NEXT:    vpcmpneqd %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0x75,0x08,0x1f,0xca,0x04]
    683 ; CHECK-NEXT:    vblendmps (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x65,0x07]
    684 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    685   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    686   %vaddr = bitcast i8* %addr to <4 x float>*
    687   %r = load <4 x float>, <4 x float>* %vaddr, align 1
    688   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
    689   ret <4 x float>%res
    690 }
    691 
    692 define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
    693 ; CHECK-LABEL: test_128_27:
    694 ; CHECK:       ## BB#0:
    695 ; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
    696 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
    697 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x07]
    698 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    699   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    700   %vaddr = bitcast i8* %addr to <4 x float>*
    701   %r = load <4 x float>, <4 x float>* %vaddr, align 16
    702   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
    703   ret <4 x float>%res
    704 }
    705 
    706 define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
    707 ; CHECK-LABEL: test_128_28:
    708 ; CHECK:       ## BB#0:
    709 ; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
    710 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x04]
    711 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x07]
    712 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    713   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    714   %vaddr = bitcast i8* %addr to <4 x float>*
    715   %r = load <4 x float>, <4 x float>* %vaddr, align 1
    716   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
    717   ret <4 x float>%res
    718 }
    719 
    720 define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
    721 ; CHECK-LABEL: test_128_29:
    722 ; CHECK:       ## BB#0:
    723 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
    724 ; CHECK-NEXT:    vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
    725 ; CHECK-NEXT:    vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07]
    726 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    727   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    728   %vaddr = bitcast i8* %addr to <2 x double>*
    729   %r = load <2 x double>, <2 x double>* %vaddr, align 16
    730   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
    731   ret <2 x double>%res
    732 }
    733 
    734 define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
    735 ; CHECK-LABEL: test_128_30:
    736 ; CHECK:       ## BB#0:
    737 ; CHECK-NEXT:    vpxord %xmm2, %xmm2, %xmm2 ## encoding: [0x62,0xf1,0x6d,0x08,0xef,0xd2]
    738 ; CHECK-NEXT:    vpcmpneqq %xmm2, %xmm1, %k1 ## encoding: [0x62,0xf3,0xf5,0x08,0x1f,0xca,0x04]
    739 ; CHECK-NEXT:    vblendmpd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x65,0x07]
    740 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    741   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    742   %vaddr = bitcast i8* %addr to <2 x double>*
    743   %r = load <2 x double>, <2 x double>* %vaddr, align 1
    744   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
    745   ret <2 x double>%res
    746 }
    747 
    748 define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
    749 ; CHECK-LABEL: test_128_31:
    750 ; CHECK:       ## BB#0:
    751 ; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
    752 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
    753 ; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x07]
    754 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    755   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    756   %vaddr = bitcast i8* %addr to <2 x double>*
    757   %r = load <2 x double>, <2 x double>* %vaddr, align 16
    758   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
    759   ret <2 x double>%res
    760 }
    761 
    762 define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
    763 ; CHECK-LABEL: test_128_32:
    764 ; CHECK:       ## BB#0:
    765 ; CHECK-NEXT:    vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
    766 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x04]
    767 ; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x07]
    768 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    769   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    770   %vaddr = bitcast i8* %addr to <2 x double>*
    771   %r = load <2 x double>, <2 x double>* %vaddr, align 1
    772   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
    773   ret <2 x double>%res
    774 }
    775 
    776