Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX512CD
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown  -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,AVX512VLCDBW
      4 ; RUN: llc < %s -mtriple=i686-unknown-unknown  -mattr=+avx512vl,avx512cd,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=ALL,X86-AVX512VLCDBW
      5 
      6 define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
      7 ; AVX512CD-LABEL: test_mm_epi64:
      8 ; AVX512CD:       # %bb.0: # %entry
      9 ; AVX512CD-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
     10 ; AVX512CD-NEXT:    vpmovsxwq %xmm0, %zmm0
     11 ; AVX512CD-NEXT:    vptestmq %zmm0, %zmm0, %k0
     12 ; AVX512CD-NEXT:    kmovw %k0, %eax
     13 ; AVX512CD-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     14 ; AVX512CD-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0
     15 ; AVX512CD-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
     16 ; AVX512CD-NEXT:    vzeroupper
     17 ; AVX512CD-NEXT:    retq
     18 ;
     19 ; AVX512VLCDBW-LABEL: test_mm_epi64:
     20 ; AVX512VLCDBW:       # %bb.0: # %entry
     21 ; AVX512VLCDBW-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
     22 ; AVX512VLCDBW-NEXT:    vpbroadcastmb2q %k0, %xmm0
     23 ; AVX512VLCDBW-NEXT:    retq
     24 ;
     25 ; X86-AVX512VLCDBW-LABEL: test_mm_epi64:
     26 ; X86-AVX512VLCDBW:       # %bb.0: # %entry
     27 ; X86-AVX512VLCDBW-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
     28 ; X86-AVX512VLCDBW-NEXT:    kmovd %k0, %eax
     29 ; X86-AVX512VLCDBW-NEXT:    movzbl %al, %eax
     30 ; X86-AVX512VLCDBW-NEXT:    vmovd %eax, %xmm0
     31 ; X86-AVX512VLCDBW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
     32 ; X86-AVX512VLCDBW-NEXT:    retl
     33 entry:
     34   %0 = icmp eq <8 x i16> %a, %b
     35   %1 = bitcast <8 x i1> %0 to i8
     36   %conv.i = zext i8 %1 to i64
     37   %vecinit.i.i = insertelement <2 x i64> undef, i64 %conv.i, i32 0
     38   %vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer
     39   ret <2 x i64> %vecinit1.i.i
     40 }
     41 
     42 define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
     43 ; AVX512CD-LABEL: test_mm_epi32:
     44 ; AVX512CD:       # %bb.0: # %entry
     45 ; AVX512CD-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
     46 ; AVX512CD-NEXT:    vpmovsxbd %xmm0, %zmm0
     47 ; AVX512CD-NEXT:    vptestmd %zmm0, %zmm0, %k0
     48 ; AVX512CD-NEXT:    kmovw %k0, %eax
     49 ; AVX512CD-NEXT:    vpxor %xmm0, %xmm0, %xmm0
     50 ; AVX512CD-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
     51 ; AVX512CD-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
     52 ; AVX512CD-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
     53 ; AVX512CD-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
     54 ; AVX512CD-NEXT:    vzeroupper
     55 ; AVX512CD-NEXT:    retq
     56 ;
     57 ; AVX512VLCDBW-LABEL: test_mm_epi32:
     58 ; AVX512VLCDBW:       # %bb.0: # %entry
     59 ; AVX512VLCDBW-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
     60 ; AVX512VLCDBW-NEXT:    vpbroadcastmw2d %k0, %xmm0
     61 ; AVX512VLCDBW-NEXT:    retq
     62 ;
     63 ; X86-AVX512VLCDBW-LABEL: test_mm_epi32:
     64 ; X86-AVX512VLCDBW:       # %bb.0: # %entry
     65 ; X86-AVX512VLCDBW-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
     66 ; X86-AVX512VLCDBW-NEXT:    vpbroadcastmw2d %k0, %xmm0
     67 ; X86-AVX512VLCDBW-NEXT:    retl
     68 entry:
     69   %0 = icmp eq <16 x i8> %a, %b
     70   %1 = bitcast <16 x i1> %0 to i16
     71   %conv.i = zext i16 %1 to i32
     72   %vecinit.i.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
     73   %vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer
     74   ret <4 x i32> %vecinit3.i.i
     75 }
     76 
     77 define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) {
     78 ; AVX512CD-LABEL: test_mm512_epi32:
     79 ; AVX512CD:       # %bb.0: # %entry
     80 ; AVX512CD-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
     81 ; AVX512CD-NEXT:    vpbroadcastmw2d %k0, %zmm0
     82 ; AVX512CD-NEXT:    retq
     83 ;
     84 ; AVX512VLCDBW-LABEL: test_mm512_epi32:
     85 ; AVX512VLCDBW:       # %bb.0: # %entry
     86 ; AVX512VLCDBW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
     87 ; AVX512VLCDBW-NEXT:    vpbroadcastmw2d %k0, %zmm0
     88 ; AVX512VLCDBW-NEXT:    retq
     89 ;
     90 ; X86-AVX512VLCDBW-LABEL: test_mm512_epi32:
     91 ; X86-AVX512VLCDBW:       # %bb.0: # %entry
     92 ; X86-AVX512VLCDBW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
     93 ; X86-AVX512VLCDBW-NEXT:    vpbroadcastmw2d %k0, %zmm0
     94 ; X86-AVX512VLCDBW-NEXT:    retl
     95 entry:
     96   %0 = icmp eq <16 x i32> %a, %b
     97   %1 = bitcast <16 x i1> %0 to i16
     98   %conv.i = zext i16 %1 to i32
     99   %vecinit.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0
    100   %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
    101   ret <16 x i32> %vecinit15.i.i
    102 }
    103 
    104 define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) {
    105 ; AVX512CD-LABEL: test_mm512_epi64:
    106 ; AVX512CD:       # %bb.0: # %entry
    107 ; AVX512CD-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
    108 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
    109 ; AVX512CD-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
    110 ; AVX512CD-NEXT:    vpbroadcastmb2q %k0, %zmm0
    111 ; AVX512CD-NEXT:    retq
    112 ;
    113 ; AVX512VLCDBW-LABEL: test_mm512_epi64:
    114 ; AVX512VLCDBW:       # %bb.0: # %entry
    115 ; AVX512VLCDBW-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
    116 ; AVX512VLCDBW-NEXT:    vpbroadcastmb2q %k0, %zmm0
    117 ; AVX512VLCDBW-NEXT:    retq
    118 ;
    119 ; X86-AVX512VLCDBW-LABEL: test_mm512_epi64:
    120 ; X86-AVX512VLCDBW:       # %bb.0: # %entry
    121 ; X86-AVX512VLCDBW-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
    122 ; X86-AVX512VLCDBW-NEXT:    kmovd %k0, %eax
    123 ; X86-AVX512VLCDBW-NEXT:    movzbl %al, %eax
    124 ; X86-AVX512VLCDBW-NEXT:    vmovd %eax, %xmm0
    125 ; X86-AVX512VLCDBW-NEXT:    vpbroadcastq %xmm0, %zmm0
    126 ; X86-AVX512VLCDBW-NEXT:    retl
    127 entry:
    128   %0 = icmp eq <8 x i32> %a, %b
    129   %1 = bitcast <8 x i1> %0 to i8
    130   %conv.i = zext i8 %1 to i64
    131   %vecinit.i.i = insertelement <8 x i64> undef, i64 %conv.i, i32 0
    132   %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
    133   ret <8 x i64> %vecinit7.i.i
    134 }
    135 
    136 define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) {
    137 ; AVX512CD-LABEL: test_mm256_epi64:
    138 ; AVX512CD:       # %bb.0: # %entry
    139 ; AVX512CD-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
    140 ; AVX512CD-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
    141 ; AVX512CD-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
    142 ; AVX512CD-NEXT:    kmovw %k0, %eax
    143 ; AVX512CD-NEXT:    movzbl %al, %eax
    144 ; AVX512CD-NEXT:    vmovq %rax, %xmm0
    145 ; AVX512CD-NEXT:    vpbroadcastq %xmm0, %ymm0
    146 ; AVX512CD-NEXT:    retq
    147 ;
    148 ; AVX512VLCDBW-LABEL: test_mm256_epi64:
    149 ; AVX512VLCDBW:       # %bb.0: # %entry
    150 ; AVX512VLCDBW-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
    151 ; AVX512VLCDBW-NEXT:    vpbroadcastmb2q %k0, %ymm0
    152 ; AVX512VLCDBW-NEXT:    retq
    153 ;
    154 ; X86-AVX512VLCDBW-LABEL: test_mm256_epi64:
    155 ; X86-AVX512VLCDBW:       # %bb.0: # %entry
    156 ; X86-AVX512VLCDBW-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
    157 ; X86-AVX512VLCDBW-NEXT:    kmovd %k0, %eax
    158 ; X86-AVX512VLCDBW-NEXT:    movzbl %al, %eax
    159 ; X86-AVX512VLCDBW-NEXT:    vmovd %eax, %xmm0
    160 ; X86-AVX512VLCDBW-NEXT:    vpbroadcastq %xmm0, %ymm0
    161 ; X86-AVX512VLCDBW-NEXT:    retl
    162 entry:
    163   %0 = icmp eq <8 x i32> %a, %b
    164   %1 = bitcast <8 x i1> %0 to i8
    165   %conv.i = zext i8 %1 to i64
    166   %vecinit.i.i = insertelement <4 x i64> undef, i64 %conv.i, i32 0
    167   %vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer
    168   ret <4 x i64> %vecinit3.i.i
    169 }
    170 
    171 define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
    172 ; AVX512CD-LABEL: test_mm256_epi32:
    173 ; AVX512CD:       # %bb.0: # %entry
    174 ; AVX512CD-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
    175 ; AVX512CD-NEXT:    vpmovsxwd %ymm0, %zmm0
    176 ; AVX512CD-NEXT:    vptestmd %zmm0, %zmm0, %k0
    177 ; AVX512CD-NEXT:    kmovw %k0, %eax
    178 ; AVX512CD-NEXT:    vpxor %xmm0, %xmm0, %xmm0
    179 ; AVX512CD-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
    180 ; AVX512CD-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0
    181 ; AVX512CD-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0
    182 ; AVX512CD-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0
    183 ; AVX512CD-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    184 ; AVX512CD-NEXT:    retq
    185 ;
    186 ; AVX512VLCDBW-LABEL: test_mm256_epi32:
    187 ; AVX512VLCDBW:       # %bb.0: # %entry
    188 ; AVX512VLCDBW-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
    189 ; AVX512VLCDBW-NEXT:    vpbroadcastmw2d %k0, %ymm0
    190 ; AVX512VLCDBW-NEXT:    retq
    191 ;
    192 ; X86-AVX512VLCDBW-LABEL: test_mm256_epi32:
    193 ; X86-AVX512VLCDBW:       # %bb.0: # %entry
    194 ; X86-AVX512VLCDBW-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
    195 ; X86-AVX512VLCDBW-NEXT:    vpbroadcastmw2d %k0, %ymm0
    196 ; X86-AVX512VLCDBW-NEXT:    retl
    197 entry:
    198   %0 = icmp eq <16 x i16> %a, %b
    199   %1 = bitcast <16 x i1> %0 to i16
    200   %conv.i = zext i16 %1 to i32
    201   %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv.i, i32 0
    202   %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer
    203   ret <8 x i32> %vecinit7.i.i
    204 }
    205 
    206