Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mcpu=skylake-avx512 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX
      3 ; RUN: llc -mcpu=knl < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL
      4 
      5 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      6 target triple = "x86_64-unknown-linux-gnu"
      7 
      8 
      9 
     10 define <16 x float> @test1(float* %base) {
     11 ; SKX-LABEL: test1:
     12 ; SKX:       # %bb.0:
     13 ; SKX-NEXT:    movw $-2049, %ax # imm = 0xF7FF
     14 ; SKX-NEXT:    kmovd %eax, %k1
     15 ; SKX-NEXT:    vexpandps (%rdi), %zmm0 {%k1} {z}
     16 ; SKX-NEXT:    retq
     17 ;
     18 ; KNL-LABEL: test1:
     19 ; KNL:       # %bb.0:
     20 ; KNL-NEXT:    movw $-2049, %ax # imm = 0xF7FF
     21 ; KNL-NEXT:    kmovw %eax, %k1
     22 ; KNL-NEXT:    vexpandps (%rdi), %zmm0 {%k1} {z}
     23 ; KNL-NEXT:    retq
     24   %res = call <16 x float> @llvm.masked.expandload.v16f32(float* %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
     25   ret <16 x float>%res
     26 }
     27 
     28 define <16 x float> @test2(float* %base, <16 x float> %src0) {
     29 ; SKX-LABEL: test2:
     30 ; SKX:       # %bb.0:
     31 ; SKX-NEXT:    movw $30719, %ax # imm = 0x77FF
     32 ; SKX-NEXT:    kmovd %eax, %k1
     33 ; SKX-NEXT:    vexpandps (%rdi), %zmm0 {%k1}
     34 ; SKX-NEXT:    retq
     35 ;
     36 ; KNL-LABEL: test2:
     37 ; KNL:       # %bb.0:
     38 ; KNL-NEXT:    movw $30719, %ax # imm = 0x77FF
     39 ; KNL-NEXT:    kmovw %eax, %k1
     40 ; KNL-NEXT:    vexpandps (%rdi), %zmm0 {%k1}
     41 ; KNL-NEXT:    retq
     42   %res = call <16 x float> @llvm.masked.expandload.v16f32(float* %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x float> %src0)
     43   ret <16 x float>%res
     44 }
     45 
     46 define <8 x double> @test3(double* %base, <8 x double> %src0, <8 x i1> %mask) {
     47 ; SKX-LABEL: test3:
     48 ; SKX:       # %bb.0:
     49 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
     50 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
     51 ; SKX-NEXT:    vexpandpd (%rdi), %zmm0 {%k1}
     52 ; SKX-NEXT:    retq
     53 ;
     54 ; KNL-LABEL: test3:
     55 ; KNL:       # %bb.0:
     56 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
     57 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
     58 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
     59 ; KNL-NEXT:    vexpandpd (%rdi), %zmm0 {%k1}
     60 ; KNL-NEXT:    retq
     61   %res = call <8 x double> @llvm.masked.expandload.v8f64(double* %base, <8 x i1> %mask, <8 x double> %src0)
     62   ret <8 x double>%res
     63 }
     64 
     65 define <4 x float> @test4(float* %base, <4 x float> %src0) {
     66 ; SKX-LABEL: test4:
     67 ; SKX:       # %bb.0:
     68 ; SKX-NEXT:    movb $7, %al
     69 ; SKX-NEXT:    kmovd %eax, %k1
     70 ; SKX-NEXT:    vexpandps (%rdi), %xmm0 {%k1}
     71 ; SKX-NEXT:    retq
     72 ;
     73 ; KNL-LABEL: test4:
     74 ; KNL:       # %bb.0:
     75 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
     76 ; KNL-NEXT:    movw $7, %ax
     77 ; KNL-NEXT:    kmovw %eax, %k1
     78 ; KNL-NEXT:    vexpandps (%rdi), %zmm0 {%k1}
     79 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
     80 ; KNL-NEXT:    retq
     81   %res = call <4 x float> @llvm.masked.expandload.v4f32(float* %base, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> %src0)
     82   ret <4 x float>%res
     83 }
     84 
     85 define <2 x i64> @test5(i64* %base, <2 x i64> %src0) {
     86 ; SKX-LABEL: test5:
     87 ; SKX:       # %bb.0:
     88 ; SKX-NEXT:    movb $2, %al
     89 ; SKX-NEXT:    kmovd %eax, %k1
     90 ; SKX-NEXT:    vpexpandq (%rdi), %xmm0 {%k1}
     91 ; SKX-NEXT:    retq
     92 ;
     93 ; KNL-LABEL: test5:
     94 ; KNL:       # %bb.0:
     95 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
     96 ; KNL-NEXT:    movb $2, %al
     97 ; KNL-NEXT:    kmovw %eax, %k1
     98 ; KNL-NEXT:    vpexpandq (%rdi), %zmm0 {%k1}
     99 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    100 ; KNL-NEXT:    retq
    101   %res = call <2 x i64> @llvm.masked.expandload.v2i64(i64* %base, <2 x i1> <i1 false, i1 true>, <2 x i64> %src0)
    102   ret <2 x i64>%res
    103 }
    104 
    105 declare <16 x float> @llvm.masked.expandload.v16f32(float*, <16 x i1>, <16 x float>)
    106 declare <8 x double> @llvm.masked.expandload.v8f64(double*, <8 x i1>, <8 x double>)
    107 declare <4 x float>  @llvm.masked.expandload.v4f32(float*, <4 x i1>, <4 x float>)
    108 declare <2 x i64>    @llvm.masked.expandload.v2i64(i64*, <2 x i1>, <2 x i64>)
    109 
    110 define void @test6(float* %base, <16 x float> %V) {
    111 ; SKX-LABEL: test6:
    112 ; SKX:       # %bb.0:
    113 ; SKX-NEXT:    movw $-2049, %ax # imm = 0xF7FF
    114 ; SKX-NEXT:    kmovd %eax, %k1
    115 ; SKX-NEXT:    vcompressps %zmm0, (%rdi) {%k1}
    116 ; SKX-NEXT:    vzeroupper
    117 ; SKX-NEXT:    retq
    118 ;
    119 ; KNL-LABEL: test6:
    120 ; KNL:       # %bb.0:
    121 ; KNL-NEXT:    movw $-2049, %ax # imm = 0xF7FF
    122 ; KNL-NEXT:    kmovw %eax, %k1
    123 ; KNL-NEXT:    vcompressps %zmm0, (%rdi) {%k1}
    124 ; KNL-NEXT:    retq
    125   call void @llvm.masked.compressstore.v16f32(<16 x float> %V, float* %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true>)
    126   ret void
    127 }
    128 
    129 define void @test7(float* %base, <8 x float> %V, <8 x i1> %mask) {
    130 ; SKX-LABEL: test7:
    131 ; SKX:       # %bb.0:
    132 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
    133 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
    134 ; SKX-NEXT:    vcompressps %ymm0, (%rdi) {%k1}
    135 ; SKX-NEXT:    vzeroupper
    136 ; SKX-NEXT:    retq
    137 ;
    138 ; KNL-LABEL: test7:
    139 ; KNL:       # %bb.0:
    140 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
    141 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
    142 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
    143 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
    144 ; KNL-NEXT:    vcompressps %zmm0, (%rdi) {%k1}
    145 ; KNL-NEXT:    retq
    146   call void @llvm.masked.compressstore.v8f32(<8 x float> %V, float* %base, <8 x i1> %mask)
    147   ret void
    148 }
    149 
    150 define void @test8(double* %base, <8 x double> %V, <8 x i1> %mask) {
    151 ; SKX-LABEL: test8:
    152 ; SKX:       # %bb.0:
    153 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
    154 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
    155 ; SKX-NEXT:    vcompresspd %zmm0, (%rdi) {%k1}
    156 ; SKX-NEXT:    vzeroupper
    157 ; SKX-NEXT:    retq
    158 ;
    159 ; KNL-LABEL: test8:
    160 ; KNL:       # %bb.0:
    161 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
    162 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
    163 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
    164 ; KNL-NEXT:    vcompresspd %zmm0, (%rdi) {%k1}
    165 ; KNL-NEXT:    retq
    166   call void @llvm.masked.compressstore.v8f64(<8 x double> %V, double* %base, <8 x i1> %mask)
    167   ret void
    168 }
    169 
    170 define void @test9(i64* %base, <8 x i64> %V, <8 x i1> %mask) {
    171 ; SKX-LABEL: test9:
    172 ; SKX:       # %bb.0:
    173 ; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
    174 ; SKX-NEXT:    vpmovw2m %xmm1, %k1
    175 ; SKX-NEXT:    vpcompressq %zmm0, (%rdi) {%k1}
    176 ; SKX-NEXT:    vzeroupper
    177 ; SKX-NEXT:    retq
    178 ;
    179 ; KNL-LABEL: test9:
    180 ; KNL:       # %bb.0:
    181 ; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
    182 ; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
    183 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
    184 ; KNL-NEXT:    vpcompressq %zmm0, (%rdi) {%k1}
    185 ; KNL-NEXT:    retq
    186     call void @llvm.masked.compressstore.v8i64(<8 x i64> %V, i64* %base, <8 x i1> %mask)
    187   ret void
    188 }
    189 
    190 define void @test10(i64* %base, <4 x i64> %V, <4 x i1> %mask) {
    191 ; SKX-LABEL: test10:
    192 ; SKX:       # %bb.0:
    193 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
    194 ; SKX-NEXT:    vpmovd2m %xmm1, %k1
    195 ; SKX-NEXT:    vpcompressq %ymm0, (%rdi) {%k1}
    196 ; SKX-NEXT:    vzeroupper
    197 ; SKX-NEXT:    retq
    198 ;
    199 ; KNL-LABEL: test10:
    200 ; KNL:       # %bb.0:
    201 ; KNL-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
    202 ; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
    203 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
    204 ; KNL-NEXT:    kshiftlw $12, %k0, %k0
    205 ; KNL-NEXT:    kshiftrw $12, %k0, %k1
    206 ; KNL-NEXT:    vpcompressq %zmm0, (%rdi) {%k1}
    207 ; KNL-NEXT:    retq
    208     call void @llvm.masked.compressstore.v4i64(<4 x i64> %V, i64* %base, <4 x i1> %mask)
    209   ret void
    210 }
    211 
    212 define void @test11(i64* %base, <2 x i64> %V, <2 x i1> %mask) {
    213 ; SKX-LABEL: test11:
    214 ; SKX:       # %bb.0:
    215 ; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
    216 ; SKX-NEXT:    vpmovq2m %xmm1, %k1
    217 ; SKX-NEXT:    vpcompressq %xmm0, (%rdi) {%k1}
    218 ; SKX-NEXT:    retq
    219 ;
    220 ; KNL-LABEL: test11:
    221 ; KNL:       # %bb.0:
    222 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
    223 ; KNL-NEXT:    vpsllq $63, %xmm1, %xmm1
    224 ; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k0
    225 ; KNL-NEXT:    kshiftlw $14, %k0, %k0
    226 ; KNL-NEXT:    kshiftrw $14, %k0, %k1
    227 ; KNL-NEXT:    vpcompressq %zmm0, (%rdi) {%k1}
    228 ; KNL-NEXT:    retq
    229     call void @llvm.masked.compressstore.v2i64(<2 x i64> %V, i64* %base, <2 x i1> %mask)
    230   ret void
    231 }
    232 
    233 define void @test12(float* %base, <4 x float> %V, <4 x i1> %mask) {
    234 ; SKX-LABEL: test12:
    235 ; SKX:       # %bb.0:
    236 ; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
    237 ; SKX-NEXT:    vpmovd2m %xmm1, %k1
    238 ; SKX-NEXT:    vcompressps %xmm0, (%rdi) {%k1}
    239 ; SKX-NEXT:    retq
    240 ;
    241 ; KNL-LABEL: test12:
    242 ; KNL:       # %bb.0:
    243 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
    244 ; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
    245 ; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
    246 ; KNL-NEXT:    kshiftlw $12, %k0, %k0
    247 ; KNL-NEXT:    kshiftrw $12, %k0, %k1
    248 ; KNL-NEXT:    vcompressps %zmm0, (%rdi) {%k1}
    249 ; KNL-NEXT:    retq
    250     call void @llvm.masked.compressstore.v4f32(<4 x float> %V, float* %base, <4 x i1> %mask)
    251   ret void
    252 }
    253 
    254 define <2 x float> @test13(float* %base, <2 x float> %src0, <2 x i32> %trigger) {
    255 ; SKX-LABEL: test13:
    256 ; SKX:       # %bb.0:
    257 ; SKX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    258 ; SKX-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
    259 ; SKX-NEXT:    vptestnmq %xmm1, %xmm1, %k1
    260 ; SKX-NEXT:    vexpandps (%rdi), %xmm0 {%k1}
    261 ; SKX-NEXT:    retq
    262 ;
    263 ; KNL-LABEL: test13:
    264 ; KNL:       # %bb.0:
    265 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
    266 ; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    267 ; KNL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
    268 ; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0
    269 ; KNL-NEXT:    kshiftlw $14, %k0, %k0
    270 ; KNL-NEXT:    kshiftrw $14, %k0, %k1
    271 ; KNL-NEXT:    vexpandps (%rdi), %zmm0 {%k1}
    272 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    273 ; KNL-NEXT:    retq
    274   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    275   %res = call <2 x float> @llvm.masked.expandload.v2f32(float* %base, <2 x i1> %mask, <2 x float> %src0)
    276   ret <2 x float> %res
    277 }
    278 
    279 define void @test14(float* %base, <2 x float> %V, <2 x i32> %trigger) {
    280 ; SKX-LABEL: test14:
    281 ; SKX:       # %bb.0:
    282 ; SKX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    283 ; SKX-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
    284 ; SKX-NEXT:    vptestnmq %xmm1, %xmm1, %k1
    285 ; SKX-NEXT:    vcompressps %xmm0, (%rdi) {%k1}
    286 ; SKX-NEXT:    retq
    287 ;
    288 ; KNL-LABEL: test14:
    289 ; KNL:       # %bb.0:
    290 ; KNL-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
    291 ; KNL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
    292 ; KNL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
    293 ; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k0
    294 ; KNL-NEXT:    kshiftlw $14, %k0, %k0
    295 ; KNL-NEXT:    kshiftrw $14, %k0, %k1
    296 ; KNL-NEXT:    vcompressps %zmm0, (%rdi) {%k1}
    297 ; KNL-NEXT:    retq
    298   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
    299   call void @llvm.masked.compressstore.v2f32(<2 x float> %V, float* %base, <2 x i1> %mask)
    300   ret void
    301 }
    302 
    303 define <32 x float> @test15(float* %base, <32 x float> %src0, <32 x i32> %trigger) {
    304 ; ALL-LABEL: test15:
    305 ; ALL:       # %bb.0:
    306 ; ALL-NEXT:    vptestnmd %zmm3, %zmm3, %k1
    307 ; ALL-NEXT:    vptestnmd %zmm2, %zmm2, %k2
    308 ; ALL-NEXT:    kmovw %k2, %eax
    309 ; ALL-NEXT:    popcntl %eax, %eax
    310 ; ALL-NEXT:    vexpandps (%rdi,%rax,4), %zmm1 {%k1}
    311 ; ALL-NEXT:    vexpandps (%rdi), %zmm0 {%k2}
    312 ; ALL-NEXT:    retq
    313   %mask = icmp eq <32 x i32> %trigger, zeroinitializer
    314   %res = call <32 x float> @llvm.masked.expandload.v32f32(float* %base, <32 x i1> %mask, <32 x float> %src0)
    315   ret <32 x float> %res
    316 }
    317 
    318 define <16 x double> @test16(double* %base, <16 x double> %src0, <16 x i32> %trigger) {
    319 ; SKX-LABEL: test16:
    320 ; SKX:       # %bb.0:
    321 ; SKX-NEXT:    vextracti64x4 $1, %zmm2, %ymm3
    322 ; SKX-NEXT:    vptestnmd %ymm3, %ymm3, %k1
    323 ; SKX-NEXT:    vptestnmd %ymm2, %ymm2, %k2
    324 ; SKX-NEXT:    kmovb %k2, %eax
    325 ; SKX-NEXT:    popcntl %eax, %eax
    326 ; SKX-NEXT:    vexpandpd (%rdi,%rax,8), %zmm1 {%k1}
    327 ; SKX-NEXT:    vexpandpd (%rdi), %zmm0 {%k2}
    328 ; SKX-NEXT:    retq
    329 ;
    330 ; KNL-LABEL: test16:
    331 ; KNL:       # %bb.0:
    332 ; KNL-NEXT:    vextracti64x4 $1, %zmm2, %ymm3
    333 ; KNL-NEXT:    vptestnmd %zmm3, %zmm3, %k1
    334 ; KNL-NEXT:    vptestnmd %zmm2, %zmm2, %k2
    335 ; KNL-NEXT:    vexpandpd (%rdi), %zmm0 {%k2}
    336 ; KNL-NEXT:    kmovw %k2, %eax
    337 ; KNL-NEXT:    movzbl %al, %eax
    338 ; KNL-NEXT:    popcntl %eax, %eax
    339 ; KNL-NEXT:    vexpandpd (%rdi,%rax,8), %zmm1 {%k1}
    340 ; KNL-NEXT:    retq
    341   %mask = icmp eq <16 x i32> %trigger, zeroinitializer
    342   %res = call <16 x double> @llvm.masked.expandload.v16f64(double* %base, <16 x i1> %mask, <16 x double> %src0)
    343   ret <16 x double> %res
    344 }
    345 
    346 define void @test17(float* %base, <32 x float> %V, <32 x i32> %trigger) {
    347 ; SKX-LABEL: test17:
    348 ; SKX:       # %bb.0:
    349 ; SKX-NEXT:    vptestnmd %zmm3, %zmm3, %k1
    350 ; SKX-NEXT:    vptestnmd %zmm2, %zmm2, %k2
    351 ; SKX-NEXT:    kmovw %k2, %eax
    352 ; SKX-NEXT:    popcntl %eax, %eax
    353 ; SKX-NEXT:    vcompressps %zmm1, (%rdi,%rax,4) {%k1}
    354 ; SKX-NEXT:    vcompressps %zmm0, (%rdi) {%k2}
    355 ; SKX-NEXT:    vzeroupper
    356 ; SKX-NEXT:    retq
    357 ;
    358 ; KNL-LABEL: test17:
    359 ; KNL:       # %bb.0:
    360 ; KNL-NEXT:    vptestnmd %zmm3, %zmm3, %k1
    361 ; KNL-NEXT:    vptestnmd %zmm2, %zmm2, %k2
    362 ; KNL-NEXT:    kmovw %k2, %eax
    363 ; KNL-NEXT:    popcntl %eax, %eax
    364 ; KNL-NEXT:    vcompressps %zmm1, (%rdi,%rax,4) {%k1}
    365 ; KNL-NEXT:    vcompressps %zmm0, (%rdi) {%k2}
    366 ; KNL-NEXT:    retq
    367   %mask = icmp eq <32 x i32> %trigger, zeroinitializer
    368   call void @llvm.masked.compressstore.v32f32(<32 x float> %V, float* %base, <32 x i1> %mask)
    369   ret void
    370 }
    371 
    372 define void @test18(double* %base, <16 x double> %V, <16 x i1> %mask) {
    373 ; SKX-LABEL: test18:
    374 ; SKX:       # %bb.0:
    375 ; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
    376 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
    377 ; SKX-NEXT:    kshiftrw $8, %k1, %k2
    378 ; SKX-NEXT:    kmovb %k1, %eax
    379 ; SKX-NEXT:    popcntl %eax, %eax
    380 ; SKX-NEXT:    vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
    381 ; SKX-NEXT:    vcompresspd %zmm0, (%rdi) {%k1}
    382 ; SKX-NEXT:    vzeroupper
    383 ; SKX-NEXT:    retq
    384 ;
    385 ; KNL-LABEL: test18:
    386 ; KNL:       # %bb.0:
    387 ; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
    388 ; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
    389 ; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
    390 ; KNL-NEXT:    kshiftrw $8, %k1, %k2
    391 ; KNL-NEXT:    vcompresspd %zmm0, (%rdi) {%k1}
    392 ; KNL-NEXT:    kmovw %k1, %eax
    393 ; KNL-NEXT:    movzbl %al, %eax
    394 ; KNL-NEXT:    popcntl %eax, %eax
    395 ; KNL-NEXT:    vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
    396 ; KNL-NEXT:    retq
    397   call void @llvm.masked.compressstore.v16f64(<16 x double> %V, double* %base, <16 x i1> %mask)
    398   ret void
    399 }
    400 
    401 declare void @llvm.masked.compressstore.v16f32(<16 x float>, float* , <16 x i1>)
    402 declare void @llvm.masked.compressstore.v8f32(<8 x float>, float* , <8 x i1>)
    403 declare void @llvm.masked.compressstore.v8f64(<8 x double>, double* , <8 x i1>)
    404 declare void @llvm.masked.compressstore.v16i32(<16 x i32>, i32* , <16 x i1>)
    405 declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32* , <8 x i1>)
    406 declare void @llvm.masked.compressstore.v8i64(<8 x i64>, i64* , <8 x i1>)
    407 declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32* , <4 x i1>)
    408 declare void @llvm.masked.compressstore.v4f32(<4 x float>, float* , <4 x i1>)
    409 declare void @llvm.masked.compressstore.v4i64(<4 x i64>, i64* , <4 x i1>)
    410 declare void @llvm.masked.compressstore.v2i64(<2 x i64>, i64* , <2 x i1>)
    411 declare void @llvm.masked.compressstore.v2f32(<2 x float>, float* , <2 x i1>)
    412 declare void @llvm.masked.compressstore.v32f32(<32 x float>, float* , <32 x i1>)
    413 declare void @llvm.masked.compressstore.v16f64(<16 x double>, double* , <16 x i1>)
    414 declare void @llvm.masked.compressstore.v32f64(<32 x double>, double* , <32 x i1>)
    415 
    416 declare <2 x float> @llvm.masked.expandload.v2f32(float* , <2 x i1> , <2 x float> )
    417 declare <32 x float> @llvm.masked.expandload.v32f32(float* , <32 x i1> , <32 x float> )
    418 declare <16 x double> @llvm.masked.expandload.v16f64(double* , <16 x i1> , <16 x double> )
    419