Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX256
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
      6 
      7 define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
      8 ; AVX256-LABEL: testv8i1_sext_v8i16:
      9 ; AVX256:       # %bb.0:
     10 ; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     11 ; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
     12 ; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
     13 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
     14 ; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
     15 ; AVX256-NEXT:    vzeroupper
     16 ; AVX256-NEXT:    retq
     17 ;
     18 ; AVX512VL-LABEL: testv8i1_sext_v8i16:
     19 ; AVX512VL:       # %bb.0:
     20 ; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     21 ; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
     22 ; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
     23 ; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
     24 ; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
     25 ; AVX512VL-NEXT:    vzeroupper
     26 ; AVX512VL-NEXT:    retq
     27 ;
     28 ; AVX512F-LABEL: testv8i1_sext_v8i16:
     29 ; AVX512F:       # %bb.0:
     30 ; AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     31 ; AVX512F-NEXT:    vpcmpeqd (%rdi), %ymm0, %ymm0
     32 ; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
     33 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
     34 ; AVX512F-NEXT:    vzeroupper
     35 ; AVX512F-NEXT:    retq
     36   %in = load <8 x i32>, <8 x i32>* %p
     37   %cmp = icmp eq <8 x i32> %in, zeroinitializer
     38   %ext = sext <8 x i1> %cmp to <8 x i16>
     39   ret <8 x i16> %ext
     40 }
     41 
     42 define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
     43 ; AVX256-LABEL: testv16i1_sext_v16i8:
     44 ; AVX256:       # %bb.0:
     45 ; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     46 ; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
     47 ; AVX256-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
     48 ; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
     49 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k2} {z}
     50 ; AVX256-NEXT:    vpmovdw %ymm1, %xmm1
     51 ; AVX256-NEXT:    vpacksswb %xmm0, %xmm1, %xmm1
     52 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
     53 ; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
     54 ; AVX256-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
     55 ; AVX256-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     56 ; AVX256-NEXT:    vzeroupper
     57 ; AVX256-NEXT:    retq
     58 ;
     59 ; AVX512VL-LABEL: testv16i1_sext_v16i8:
     60 ; AVX512VL:       # %bb.0:
     61 ; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     62 ; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
     63 ; AVX512VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k1
     64 ; AVX512VL-NEXT:    kunpckbw %k0, %k1, %k1
     65 ; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
     66 ; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
     67 ; AVX512VL-NEXT:    vzeroupper
     68 ; AVX512VL-NEXT:    retq
     69 ;
     70 ; AVX512F-LABEL: testv16i1_sext_v16i8:
     71 ; AVX512F:       # %bb.0:
     72 ; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
     73 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
     74 ; AVX512F-NEXT:    vmovdqa (%rsi), %ymm0
     75 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1
     76 ; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
     77 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
     78 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
     79 ; AVX512F-NEXT:    vzeroupper
     80 ; AVX512F-NEXT:    retq
     81   %in = load <8 x i32>, <8 x i32>* %p
     82   %cmp = icmp eq <8 x i32> %in, zeroinitializer
     83   %in2 = load <8 x i32>, <8 x i32>* %q
     84   %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
     85   %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
     86   %ext = sext <16 x i1> %concat to <16 x i8>
     87   ret <16 x i8> %ext
     88 }
     89 
     90 define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
     91 ; AVX256-LABEL: testv16i1_sext_v16i16:
     92 ; AVX256:       # %bb.0:
     93 ; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     94 ; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
     95 ; AVX256-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
     96 ; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
     97 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1} {z}
     98 ; AVX256-NEXT:    vpmovdw %ymm1, %xmm1
     99 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k2} {z}
    100 ; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
    101 ; AVX256-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    102 ; AVX256-NEXT:    retq
    103 ;
    104 ; AVX512VL-LABEL: testv16i1_sext_v16i16:
    105 ; AVX512VL:       # %bb.0:
    106 ; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    107 ; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
    108 ; AVX512VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k1
    109 ; AVX512VL-NEXT:    kunpckbw %k0, %k1, %k1
    110 ; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    111 ; AVX512VL-NEXT:    vpmovdw %zmm0, %ymm0
    112 ; AVX512VL-NEXT:    retq
    113 ;
    114 ; AVX512F-LABEL: testv16i1_sext_v16i16:
    115 ; AVX512F:       # %bb.0:
    116 ; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
    117 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
    118 ; AVX512F-NEXT:    vmovdqa (%rsi), %ymm0
    119 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    120 ; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
    121 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    122 ; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
    123 ; AVX512F-NEXT:    retq
    124   %in = load <8 x i32>, <8 x i32>* %p
    125   %cmp = icmp eq <8 x i32> %in, zeroinitializer
    126   %in2 = load <8 x i32>, <8 x i32>* %q
    127   %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
    128   %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    129   %ext = sext <16 x i1> %concat to <16 x i16>
    130   ret <16 x i16> %ext
    131 }
    132 
    133 define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
    134 ; AVX256-LABEL: testv8i1_zext_v8i16:
    135 ; AVX256:       # %bb.0:
    136 ; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    137 ; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
    138 ; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
    139 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
    140 ; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
    141 ; AVX256-NEXT:    vpsrlw $15, %xmm0, %xmm0
    142 ; AVX256-NEXT:    vzeroupper
    143 ; AVX256-NEXT:    retq
    144 ;
    145 ; AVX512VL-LABEL: testv8i1_zext_v8i16:
    146 ; AVX512VL:       # %bb.0:
    147 ; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    148 ; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
    149 ; AVX512VL-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
    150 ; AVX512VL-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
    151 ; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
    152 ; AVX512VL-NEXT:    vpsrlw $15, %xmm0, %xmm0
    153 ; AVX512VL-NEXT:    vzeroupper
    154 ; AVX512VL-NEXT:    retq
    155 ;
    156 ; AVX512F-LABEL: testv8i1_zext_v8i16:
    157 ; AVX512F:       # %bb.0:
    158 ; AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    159 ; AVX512F-NEXT:    vpcmpeqd (%rdi), %ymm0, %ymm0
    160 ; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
    161 ; AVX512F-NEXT:    vpsrlw $15, %xmm0, %xmm0
    162 ; AVX512F-NEXT:    vzeroupper
    163 ; AVX512F-NEXT:    retq
    164   %in = load <8 x i32>, <8 x i32>* %p
    165   %cmp = icmp eq <8 x i32> %in, zeroinitializer
    166   %ext = zext <8 x i1> %cmp to <8 x i16>
    167   ret <8 x i16> %ext
    168 }
    169 
    170 define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
    171 ; AVX256-LABEL: testv16i1_zext_v16i8:
    172 ; AVX256:       # %bb.0:
    173 ; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    174 ; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
    175 ; AVX256-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
    176 ; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
    177 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k2} {z}
    178 ; AVX256-NEXT:    vpmovdw %ymm1, %xmm1
    179 ; AVX256-NEXT:    vpsrlw $15, %xmm1, %xmm1
    180 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
    181 ; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
    182 ; AVX256-NEXT:    vpsrlw $15, %xmm0, %xmm0
    183 ; AVX256-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
    184 ; AVX256-NEXT:    vzeroupper
    185 ; AVX256-NEXT:    retq
    186 ;
    187 ; AVX512VL-LABEL: testv16i1_zext_v16i8:
    188 ; AVX512VL:       # %bb.0:
    189 ; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    190 ; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
    191 ; AVX512VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k1
    192 ; AVX512VL-NEXT:    kunpckbw %k0, %k1, %k1
    193 ; AVX512VL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
    194 ; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
    195 ; AVX512VL-NEXT:    vzeroupper
    196 ; AVX512VL-NEXT:    retq
    197 ;
    198 ; AVX512F-LABEL: testv16i1_zext_v16i8:
    199 ; AVX512F:       # %bb.0:
    200 ; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
    201 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
    202 ; AVX512F-NEXT:    vmovdqa (%rsi), %ymm0
    203 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    204 ; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
    205 ; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
    206 ; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
    207 ; AVX512F-NEXT:    vzeroupper
    208 ; AVX512F-NEXT:    retq
    209   %in = load <8 x i32>, <8 x i32>* %p
    210   %cmp = icmp eq <8 x i32> %in, zeroinitializer
    211   %in2 = load <8 x i32>, <8 x i32>* %q
    212   %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
    213   %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    214   %ext = zext <16 x i1> %concat to <16 x i8>
    215   ret <16 x i8> %ext
    216 }
    217 
    218 define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
    219 ; AVX256-LABEL: testv16i1_zext_v16i16:
    220 ; AVX256:       # %bb.0:
    221 ; AVX256-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    222 ; AVX256-NEXT:    vpcmpeqd (%rdi), %ymm0, %k1
    223 ; AVX256-NEXT:    vpcmpeqd (%rsi), %ymm0, %k2
    224 ; AVX256-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
    225 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm1 {%k1} {z}
    226 ; AVX256-NEXT:    vpmovdw %ymm1, %xmm1
    227 ; AVX256-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k2} {z}
    228 ; AVX256-NEXT:    vpmovdw %ymm0, %xmm0
    229 ; AVX256-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
    230 ; AVX256-NEXT:    vpsrlw $15, %ymm0, %ymm0
    231 ; AVX256-NEXT:    retq
    232 ;
    233 ; AVX512VL-LABEL: testv16i1_zext_v16i16:
    234 ; AVX512VL:       # %bb.0:
    235 ; AVX512VL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    236 ; AVX512VL-NEXT:    vpcmpeqd (%rdi), %ymm0, %k0
    237 ; AVX512VL-NEXT:    vpcmpeqd (%rsi), %ymm0, %k1
    238 ; AVX512VL-NEXT:    kunpckbw %k0, %k1, %k1
    239 ; AVX512VL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    240 ; AVX512VL-NEXT:    vpmovdw %zmm0, %ymm0
    241 ; AVX512VL-NEXT:    vpsrlw $15, %ymm0, %ymm0
    242 ; AVX512VL-NEXT:    retq
    243 ;
    244 ; AVX512F-LABEL: testv16i1_zext_v16i16:
    245 ; AVX512F:       # %bb.0:
    246 ; AVX512F-NEXT:    vmovdqa (%rdi), %ymm0
    247 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
    248 ; AVX512F-NEXT:    vmovdqa (%rsi), %ymm0
    249 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    250 ; AVX512F-NEXT:    kunpckbw %k0, %k1, %k1
    251 ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
    252 ; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
    253 ; AVX512F-NEXT:    vpsrlw $15, %ymm0, %ymm0
    254 ; AVX512F-NEXT:    retq
    255   %in = load <8 x i32>, <8 x i32>* %p
    256   %cmp = icmp eq <8 x i32> %in, zeroinitializer
    257   %in2 = load <8 x i32>, <8 x i32>* %q
    258   %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
    259   %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    260   %ext = zext <16 x i1> %concat to <16 x i16>
    261   ret <16 x i16> %ext
    262 }
    263