Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
      2 
      3 ; CHECK-LABEL: test_256_1
      4 ; CHECK: vmovdqu32
      5 ; CHECK: ret
      6 define <8 x i32> @test_256_1(i8 * %addr) {
      7   %vaddr = bitcast i8* %addr to <8 x i32>*
      8   %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
      9   ret <8 x i32>%res
     10 }
     11 
     12 ; CHECK-LABEL: test_256_2
     13 ; CHECK: vmovdqa32
     14 ; CHECK: ret
     15 define <8 x i32> @test_256_2(i8 * %addr) {
     16   %vaddr = bitcast i8* %addr to <8 x i32>*
     17   %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
     18   ret <8 x i32>%res
     19 }
     20 
     21 ; CHECK-LABEL: test_256_3
     22 ; CHECK: vmovdqa64
     23 ; CHECK: ret
     24 define void @test_256_3(i8 * %addr, <4 x i64> %data) {
     25   %vaddr = bitcast i8* %addr to <4 x i64>*
     26   store <4 x i64>%data, <4 x i64>* %vaddr, align 32
     27   ret void
     28 }
     29 
     30 ; CHECK-LABEL: test_256_4
     31 ; CHECK: vmovdqu32
     32 ; CHECK: ret
     33 define void @test_256_4(i8 * %addr, <8 x i32> %data) {
     34   %vaddr = bitcast i8* %addr to <8 x i32>*
     35   store <8 x i32>%data, <8 x i32>* %vaddr, align 1
     36   ret void
     37 }
     38 
     39 ; CHECK-LABEL: test_256_5
     40 ; CHECK: vmovdqa32
     41 ; CHECK: ret
     42 define void @test_256_5(i8 * %addr, <8 x i32> %data) {
     43   %vaddr = bitcast i8* %addr to <8 x i32>*
     44   store <8 x i32>%data, <8 x i32>* %vaddr, align 32
     45   ret void
     46 }
     47 
     48 ; CHECK-LABEL: test_256_6
     49 ; CHECK: vmovdqa64
     50 ; CHECK: ret
     51 define  <4 x i64> @test_256_6(i8 * %addr) {
     52   %vaddr = bitcast i8* %addr to <4 x i64>*
     53   %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
     54   ret <4 x i64>%res
     55 }
     56 
     57 ; CHECK-LABEL: test_256_7
     58 ; CHECK: vmovdqu64
     59 ; CHECK: ret
     60 define void @test_256_7(i8 * %addr, <4 x i64> %data) {
     61   %vaddr = bitcast i8* %addr to <4 x i64>*
     62   store <4 x i64>%data, <4 x i64>* %vaddr, align 1
     63   ret void
     64 }
     65 
     66 ; CHECK-LABEL: test_256_8
     67 ; CHECK: vmovdqu64
     68 ; CHECK: ret
     69 define <4 x i64> @test_256_8(i8 * %addr) {
     70   %vaddr = bitcast i8* %addr to <4 x i64>*
     71   %res = load <4 x i64>, <4 x i64>* %vaddr, align 1
     72   ret <4 x i64>%res
     73 }
     74 
     75 ; CHECK-LABEL: test_256_9
     76 ; CHECK: vmovapd {{.*}} ## encoding: [0x62
     77 ; CHECK: ret
     78 define void @test_256_9(i8 * %addr, <4 x double> %data) {
     79   %vaddr = bitcast i8* %addr to <4 x double>*
     80   store <4 x double>%data, <4 x double>* %vaddr, align 32
     81   ret void
     82 }
     83 
     84 ; CHECK-LABEL: test_256_10
     85 ; CHECK: vmovapd {{.*}} ## encoding: [0x62
     86 ; CHECK: ret
     87 define <4 x double> @test_256_10(i8 * %addr) {
     88   %vaddr = bitcast i8* %addr to <4 x double>*
     89   %res = load <4 x double>, <4 x double>* %vaddr, align 32
     90   ret <4 x double>%res
     91 }
     92 
     93 ; CHECK-LABEL: test_256_11
     94 ; CHECK: vmovaps {{.*}} ## encoding: [0x62
     95 ; CHECK: ret
     96 define void @test_256_11(i8 * %addr, <8 x float> %data) {
     97   %vaddr = bitcast i8* %addr to <8 x float>*
     98   store <8 x float>%data, <8 x float>* %vaddr, align 32
     99   ret void
    100 }
    101 
    102 ; CHECK-LABEL: test_256_12
    103 ; CHECK: vmovaps {{.*}} ## encoding: [0x62
    104 ; CHECK: ret
    105 define <8 x float> @test_256_12(i8 * %addr) {
    106   %vaddr = bitcast i8* %addr to <8 x float>*
    107   %res = load <8 x float>, <8 x float>* %vaddr, align 32
    108   ret <8 x float>%res
    109 }
    110 
    111 ; CHECK-LABEL: test_256_13
    112 ; CHECK: vmovupd {{.*}} ## encoding: [0x62
    113 ; CHECK: ret
    114 define void @test_256_13(i8 * %addr, <4 x double> %data) {
    115   %vaddr = bitcast i8* %addr to <4 x double>*
    116   store <4 x double>%data, <4 x double>* %vaddr, align 1
    117   ret void
    118 }
    119 
    120 ; CHECK-LABEL: test_256_14
    121 ; CHECK: vmovupd {{.*}} ## encoding: [0x62
    122 ; CHECK: ret
    123 define <4 x double> @test_256_14(i8 * %addr) {
    124   %vaddr = bitcast i8* %addr to <4 x double>*
    125   %res = load <4 x double>, <4 x double>* %vaddr, align 1
    126   ret <4 x double>%res
    127 }
    128 
    129 ; CHECK-LABEL: test_256_15
    130 ; CHECK: vmovups {{.*}} ## encoding: [0x62
    131 ; CHECK: ret
    132 define void @test_256_15(i8 * %addr, <8 x float> %data) {
    133   %vaddr = bitcast i8* %addr to <8 x float>*
    134   store <8 x float>%data, <8 x float>* %vaddr, align 1
    135   ret void
    136 }
    137 
    138 ; CHECK-LABEL: test_256_16
    139 ; CHECK: vmovups {{.*}} ## encoding: [0x62
    140 ; CHECK: ret
    141 define <8 x float> @test_256_16(i8 * %addr) {
    142   %vaddr = bitcast i8* %addr to <8 x float>*
    143   %res = load <8 x float>, <8 x float>* %vaddr, align 1
    144   ret <8 x float>%res
    145 }
    146 
    147 ; CHECK-LABEL: test_256_17
    148 ; CHECK: vmovdqa32{{.*{%k[1-7]} }}
    149 ; CHECK: ret
    150 define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
    151   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    152   %vaddr = bitcast i8* %addr to <8 x i32>*
    153   %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
    154   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
    155   ret <8 x i32>%res
    156 }
    157 
    158 ; CHECK-LABEL: test_256_18
    159 ; CHECK: vmovdqu32{{.*{%k[1-7]} }}
    160 ; CHECK: ret
    161 define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
    162   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    163   %vaddr = bitcast i8* %addr to <8 x i32>*
    164   %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
    165   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
    166   ret <8 x i32>%res
    167 }
    168 
    169 ; CHECK-LABEL: test_256_19
    170 ; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }}
    171 ; CHECK: ret
    172 define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
    173   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    174   %vaddr = bitcast i8* %addr to <8 x i32>*
    175   %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
    176   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
    177   ret <8 x i32>%res
    178 }
    179 
    180 ; CHECK-LABEL: test_256_20
    181 ; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }}
    182 ; CHECK: ret
    183 define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
    184   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
    185   %vaddr = bitcast i8* %addr to <8 x i32>*
    186   %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
    187   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
    188   ret <8 x i32>%res
    189 }
    190 
    191 ; CHECK-LABEL: test_256_21
    192 ; CHECK: vmovdqa64{{.*{%k[1-7]} }}
    193 ; CHECK: ret
    194 define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
    195   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    196   %vaddr = bitcast i8* %addr to <4 x i64>*
    197   %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
    198   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
    199   ret <4 x i64>%res
    200 }
    201 
    202 ; CHECK-LABEL: test_256_22
    203 ; CHECK: vmovdqu64{{.*{%k[1-7]} }}
    204 ; CHECK: ret
    205 define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
    206   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    207   %vaddr = bitcast i8* %addr to <4 x i64>*
    208   %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
    209   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
    210   ret <4 x i64>%res
    211 }
    212 
    213 ; CHECK-LABEL: test_256_23
    214 ; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }}
    215 ; CHECK: ret
    216 define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
    217   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    218   %vaddr = bitcast i8* %addr to <4 x i64>*
    219   %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
    220   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
    221   ret <4 x i64>%res
    222 }
    223 
    224 ; CHECK-LABEL: test_256_24
    225 ; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }}
    226 ; CHECK: ret
    227 define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
    228   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    229   %vaddr = bitcast i8* %addr to <4 x i64>*
    230   %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
    231   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
    232   ret <4 x i64>%res
    233 }
    234 
    235 ; CHECK-LABEL: test_256_25
    236 ; CHECK: vmovaps{{.*{%k[1-7]} }}
    237 ; CHECK: ret
    238 define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
    239   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    240   %vaddr = bitcast i8* %addr to <8 x float>*
    241   %r = load <8 x float>, <8 x float>* %vaddr, align 32
    242   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
    243   ret <8 x float>%res
    244 }
    245 
    246 ; CHECK-LABEL: test_256_26
    247 ; CHECK: vmovups{{.*{%k[1-7]} }}
    248 ; CHECK: ret
    249 define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
    250   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    251   %vaddr = bitcast i8* %addr to <8 x float>*
    252   %r = load <8 x float>, <8 x float>* %vaddr, align 1
    253   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
    254   ret <8 x float>%res
    255 }
    256 
    257 ; CHECK-LABEL: test_256_27
    258 ; CHECK: vmovaps{{.*{%k[1-7]} {z} }}
    259 ; CHECK: ret
    260 define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
    261   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    262   %vaddr = bitcast i8* %addr to <8 x float>*
    263   %r = load <8 x float>, <8 x float>* %vaddr, align 32
    264   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
    265   ret <8 x float>%res
    266 }
    267 
    268 ; CHECK-LABEL: test_256_28
    269 ; CHECK: vmovups{{.*{%k[1-7]} {z} }}
    270 ; CHECK: ret
    271 define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
    272   %mask = fcmp one <8 x float> %mask1, zeroinitializer
    273   %vaddr = bitcast i8* %addr to <8 x float>*
    274   %r = load <8 x float>, <8 x float>* %vaddr, align 1
    275   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
    276   ret <8 x float>%res
    277 }
    278 
    279 ; CHECK-LABEL: test_256_29
    280 ; CHECK: vmovapd{{.*{%k[1-7]} }}
    281 ; CHECK: ret
    282 define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
    283   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    284   %vaddr = bitcast i8* %addr to <4 x double>*
    285   %r = load <4 x double>, <4 x double>* %vaddr, align 32
    286   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
    287   ret <4 x double>%res
    288 }
    289 
    290 ; CHECK-LABEL: test_256_30
    291 ; CHECK: vmovupd{{.*{%k[1-7]} }}
    292 ; CHECK: ret
    293 define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
    294   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    295   %vaddr = bitcast i8* %addr to <4 x double>*
    296   %r = load <4 x double>, <4 x double>* %vaddr, align 1
    297   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
    298   ret <4 x double>%res
    299 }
    300 
    301 ; CHECK-LABEL: test_256_31
    302 ; CHECK: vmovapd{{.*{%k[1-7]} {z} }}
    303 ; CHECK: ret
    304 define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
    305   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    306   %vaddr = bitcast i8* %addr to <4 x double>*
    307   %r = load <4 x double>, <4 x double>* %vaddr, align 32
    308   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
    309   ret <4 x double>%res
    310 }
    311 
    312 ; CHECK-LABEL: test_256_32
    313 ; CHECK: vmovupd{{.*{%k[1-7]} {z} }}
    314 ; CHECK: ret
    315 define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
    316   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
    317   %vaddr = bitcast i8* %addr to <4 x double>*
    318   %r = load <4 x double>, <4 x double>* %vaddr, align 1
    319   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
    320   ret <4 x double>%res
    321 }
    322 
    323 ; CHECK-LABEL: test_128_1
    324 ; CHECK: vmovdqu32
    325 ; CHECK: ret
    326 define <4 x i32> @test_128_1(i8 * %addr) {
    327   %vaddr = bitcast i8* %addr to <4 x i32>*
    328   %res = load <4 x i32>, <4 x i32>* %vaddr, align 1
    329   ret <4 x i32>%res
    330 }
    331 
    332 ; CHECK-LABEL: test_128_2
    333 ; CHECK: vmovdqa32
    334 ; CHECK: ret
    335 define <4 x i32> @test_128_2(i8 * %addr) {
    336   %vaddr = bitcast i8* %addr to <4 x i32>*
    337   %res = load <4 x i32>, <4 x i32>* %vaddr, align 16
    338   ret <4 x i32>%res
    339 }
    340 
    341 ; CHECK-LABEL: test_128_3
    342 ; CHECK: vmovdqa64
    343 ; CHECK: ret
    344 define void @test_128_3(i8 * %addr, <2 x i64> %data) {
    345   %vaddr = bitcast i8* %addr to <2 x i64>*
    346   store <2 x i64>%data, <2 x i64>* %vaddr, align 16
    347   ret void
    348 }
    349 
    350 ; CHECK-LABEL: test_128_4
    351 ; CHECK: vmovdqu32
    352 ; CHECK: ret
    353 define void @test_128_4(i8 * %addr, <4 x i32> %data) {
    354   %vaddr = bitcast i8* %addr to <4 x i32>*
    355   store <4 x i32>%data, <4 x i32>* %vaddr, align 1
    356   ret void
    357 }
    358 
    359 ; CHECK-LABEL: test_128_5
    360 ; CHECK: vmovdqa32
    361 ; CHECK: ret
    362 define void @test_128_5(i8 * %addr, <4 x i32> %data) {
    363   %vaddr = bitcast i8* %addr to <4 x i32>*
    364   store <4 x i32>%data, <4 x i32>* %vaddr, align 16
    365   ret void
    366 }
    367 
    368 ; CHECK-LABEL: test_128_6
    369 ; CHECK: vmovdqa64
    370 ; CHECK: ret
    371 define  <2 x i64> @test_128_6(i8 * %addr) {
    372   %vaddr = bitcast i8* %addr to <2 x i64>*
    373   %res = load <2 x i64>, <2 x i64>* %vaddr, align 16
    374   ret <2 x i64>%res
    375 }
    376 
    377 ; CHECK-LABEL: test_128_7
    378 ; CHECK: vmovdqu64
    379 ; CHECK: ret
    380 define void @test_128_7(i8 * %addr, <2 x i64> %data) {
    381   %vaddr = bitcast i8* %addr to <2 x i64>*
    382   store <2 x i64>%data, <2 x i64>* %vaddr, align 1
    383   ret void
    384 }
    385 
    386 ; CHECK-LABEL: test_128_8
    387 ; CHECK: vmovdqu64
    388 ; CHECK: ret
    389 define <2 x i64> @test_128_8(i8 * %addr) {
    390   %vaddr = bitcast i8* %addr to <2 x i64>*
    391   %res = load <2 x i64>, <2 x i64>* %vaddr, align 1
    392   ret <2 x i64>%res
    393 }
    394 
    395 ; CHECK-LABEL: test_128_9
    396 ; CHECK: vmovapd {{.*}} ## encoding: [0x62
    397 ; CHECK: ret
    398 define void @test_128_9(i8 * %addr, <2 x double> %data) {
    399   %vaddr = bitcast i8* %addr to <2 x double>*
    400   store <2 x double>%data, <2 x double>* %vaddr, align 16
    401   ret void
    402 }
    403 
    404 ; CHECK-LABEL: test_128_10
    405 ; CHECK: vmovapd {{.*}} ## encoding: [0x62
    406 ; CHECK: ret
    407 define <2 x double> @test_128_10(i8 * %addr) {
    408   %vaddr = bitcast i8* %addr to <2 x double>*
    409   %res = load <2 x double>, <2 x double>* %vaddr, align 16
    410   ret <2 x double>%res
    411 }
    412 
    413 ; CHECK-LABEL: test_128_11
    414 ; CHECK: vmovaps {{.*}} ## encoding: [0x62
    415 ; CHECK: ret
    416 define void @test_128_11(i8 * %addr, <4 x float> %data) {
    417   %vaddr = bitcast i8* %addr to <4 x float>*
    418   store <4 x float>%data, <4 x float>* %vaddr, align 16
    419   ret void
    420 }
    421 
    422 ; CHECK-LABEL: test_128_12
    423 ; CHECK: vmovaps {{.*}} ## encoding: [0x62
    424 ; CHECK: ret
    425 define <4 x float> @test_128_12(i8 * %addr) {
    426   %vaddr = bitcast i8* %addr to <4 x float>*
    427   %res = load <4 x float>, <4 x float>* %vaddr, align 16
    428   ret <4 x float>%res
    429 }
    430 
    431 ; CHECK-LABEL: test_128_13
    432 ; CHECK: vmovupd {{.*}} ## encoding: [0x62
    433 ; CHECK: ret
    434 define void @test_128_13(i8 * %addr, <2 x double> %data) {
    435   %vaddr = bitcast i8* %addr to <2 x double>*
    436   store <2 x double>%data, <2 x double>* %vaddr, align 1
    437   ret void
    438 }
    439 
    440 ; CHECK-LABEL: test_128_14
    441 ; CHECK: vmovupd {{.*}} ## encoding: [0x62
    442 ; CHECK: ret
    443 define <2 x double> @test_128_14(i8 * %addr) {
    444   %vaddr = bitcast i8* %addr to <2 x double>*
    445   %res = load <2 x double>, <2 x double>* %vaddr, align 1
    446   ret <2 x double>%res
    447 }
    448 
    449 ; CHECK-LABEL: test_128_15
    450 ; CHECK: vmovups {{.*}} ## encoding: [0x62
    451 ; CHECK: ret
    452 define void @test_128_15(i8 * %addr, <4 x float> %data) {
    453   %vaddr = bitcast i8* %addr to <4 x float>*
    454   store <4 x float>%data, <4 x float>* %vaddr, align 1
    455   ret void
    456 }
    457 
    458 ; CHECK-LABEL: test_128_16
    459 ; CHECK: vmovups {{.*}} ## encoding: [0x62
    460 ; CHECK: ret
    461 define <4 x float> @test_128_16(i8 * %addr) {
    462   %vaddr = bitcast i8* %addr to <4 x float>*
    463   %res = load <4 x float>, <4 x float>* %vaddr, align 1
    464   ret <4 x float>%res
    465 }
    466 
    467 ; CHECK-LABEL: test_128_17
    468 ; CHECK: vmovdqa32{{.*{%k[1-7]} }}
    469 ; CHECK: ret
    470 define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
    471   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    472   %vaddr = bitcast i8* %addr to <4 x i32>*
    473   %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
    474   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
    475   ret <4 x i32>%res
    476 }
    477 
    478 ; CHECK-LABEL: test_128_18
    479 ; CHECK: vmovdqu32{{.*{%k[1-7]} }}
    480 ; CHECK: ret
    481 define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
    482   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    483   %vaddr = bitcast i8* %addr to <4 x i32>*
    484   %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
    485   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
    486   ret <4 x i32>%res
    487 }
    488 
    489 ; CHECK-LABEL: test_128_19
    490 ; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }}
    491 ; CHECK: ret
    492 define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
    493   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    494   %vaddr = bitcast i8* %addr to <4 x i32>*
    495   %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
    496   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
    497   ret <4 x i32>%res
    498 }
    499 
    500 ; CHECK-LABEL: test_128_20
    501 ; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }}
    502 ; CHECK: ret
    503 define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
    504   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    505   %vaddr = bitcast i8* %addr to <4 x i32>*
    506   %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
    507   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
    508   ret <4 x i32>%res
    509 }
    510 
    511 ; CHECK-LABEL: test_128_21
    512 ; CHECK: vmovdqa64{{.*{%k[1-7]} }}
    513 ; CHECK: ret
    514 define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
    515   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    516   %vaddr = bitcast i8* %addr to <2 x i64>*
    517   %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
    518   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
    519   ret <2 x i64>%res
    520 }
    521 
    522 ; CHECK-LABEL: test_128_22
    523 ; CHECK: vmovdqu64{{.*{%k[1-7]} }}
    524 ; CHECK: ret
    525 define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
    526   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    527   %vaddr = bitcast i8* %addr to <2 x i64>*
    528   %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
    529   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
    530   ret <2 x i64>%res
    531 }
    532 
    533 ; CHECK-LABEL: test_128_23
    534 ; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }}
    535 ; CHECK: ret
    536 define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
    537   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    538   %vaddr = bitcast i8* %addr to <2 x i64>*
    539   %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
    540   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
    541   ret <2 x i64>%res
    542 }
    543 
    544 ; CHECK-LABEL: test_128_24
    545 ; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }}
    546 ; CHECK: ret
    547 define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
    548   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    549   %vaddr = bitcast i8* %addr to <2 x i64>*
    550   %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
    551   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
    552   ret <2 x i64>%res
    553 }
    554 
    555 ; CHECK-LABEL: test_128_25
    556 ; CHECK: vmovaps{{.*{%k[1-7]} }}
    557 ; CHECK: ret
    558 define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
    559   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    560   %vaddr = bitcast i8* %addr to <4 x float>*
    561   %r = load <4 x float>, <4 x float>* %vaddr, align 16
    562   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
    563   ret <4 x float>%res
    564 }
    565 
    566 ; CHECK-LABEL: test_128_26
    567 ; CHECK: vmovups{{.*{%k[1-7]} }}
    568 ; CHECK: ret
    569 define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
    570   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    571   %vaddr = bitcast i8* %addr to <4 x float>*
    572   %r = load <4 x float>, <4 x float>* %vaddr, align 1
    573   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
    574   ret <4 x float>%res
    575 }
    576 
    577 ; CHECK-LABEL: test_128_27
    578 ; CHECK: vmovaps{{.*{%k[1-7]} {z} }}
    579 ; CHECK: ret
    580 define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
    581   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    582   %vaddr = bitcast i8* %addr to <4 x float>*
    583   %r = load <4 x float>, <4 x float>* %vaddr, align 16
    584   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
    585   ret <4 x float>%res
    586 }
    587 
    588 ; CHECK-LABEL: test_128_28
    589 ; CHECK: vmovups{{.*{%k[1-7]} {z} }}
    590 ; CHECK: ret
    591 define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
    592   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
    593   %vaddr = bitcast i8* %addr to <4 x float>*
    594   %r = load <4 x float>, <4 x float>* %vaddr, align 1
    595   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
    596   ret <4 x float>%res
    597 }
    598 
    599 ; CHECK-LABEL: test_128_29
    600 ; CHECK: vmovapd{{.*{%k[1-7]} }}
    601 ; CHECK: ret
    602 define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
    603   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    604   %vaddr = bitcast i8* %addr to <2 x double>*
    605   %r = load <2 x double>, <2 x double>* %vaddr, align 16
    606   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
    607   ret <2 x double>%res
    608 }
    609 
    610 ; CHECK-LABEL: test_128_30
    611 ; CHECK: vmovupd{{.*{%k[1-7]} }}
    612 ; CHECK: ret
    613 define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
    614   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    615   %vaddr = bitcast i8* %addr to <2 x double>*
    616   %r = load <2 x double>, <2 x double>* %vaddr, align 1
    617   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
    618   ret <2 x double>%res
    619 }
    620 
    621 ; CHECK-LABEL: test_128_31
    622 ; CHECK: vmovapd{{.*{%k[1-7]} {z} }}
    623 ; CHECK: ret
    624 define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
    625   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    626   %vaddr = bitcast i8* %addr to <2 x double>*
    627   %r = load <2 x double>, <2 x double>* %vaddr, align 16
    628   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
    629   ret <2 x double>%res
    630 }
    631 
    632 ; CHECK-LABEL: test_128_32
    633 ; CHECK: vmovupd{{.*{%k[1-7]} {z} }}
    634 ; CHECK: ret
    635 define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
    636   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
    637   %vaddr = bitcast i8* %addr to <2 x double>*
    638   %r = load <2 x double>, <2 x double>* %vaddr, align 1
    639   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
    640   ret <2 x double>%res
    641 }
    642 
    643