Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 define <4 x i32> @test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
      6 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
      7 ; X86:       # %bb.0:
      8 ; X86-NEXT:    vplzcntd %xmm0, %xmm2
      9 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
     10 ; X86-NEXT:    kmovw %eax, %k1
     11 ; X86-NEXT:    vplzcntd %xmm0, %xmm1 {%k1}
     12 ; X86-NEXT:    vplzcntd %xmm0, %xmm0 {%k1} {z}
     13 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
     14 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
     15 ; X86-NEXT:    retl
     16 ;
     17 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
     18 ; X64:       # %bb.0:
     19 ; X64-NEXT:    vplzcntd %xmm0, %xmm2
     20 ; X64-NEXT:    kmovw %edi, %k1
     21 ; X64-NEXT:    vplzcntd %xmm0, %xmm1 {%k1}
     22 ; X64-NEXT:    vplzcntd %xmm0, %xmm0 {%k1} {z}
     23 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
     24 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
     25 ; X64-NEXT:    retq
     26   %1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false)
     27   %2 = bitcast i8 %x2 to <8 x i1>
     28   %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     29   %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1
     30   %4 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false)
     31   %5 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false)
     32   %6 = bitcast i8 %x2 to <8 x i1>
     33   %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     34   %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer
     35   %res2 = add <4 x i32> %3, %4
     36   %res4 = add <4 x i32> %res2, %7
     37   ret <4 x i32> %res4
     38 }
     39 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #0
     40 
     41 define <8 x i32> @test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
     42 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
     43 ; X86:       # %bb.0:
     44 ; X86-NEXT:    vplzcntd %ymm0, %ymm2
     45 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
     46 ; X86-NEXT:    kmovw %eax, %k1
     47 ; X86-NEXT:    vplzcntd %ymm0, %ymm1 {%k1}
     48 ; X86-NEXT:    vpaddd %ymm2, %ymm1, %ymm0
     49 ; X86-NEXT:    retl
     50 ;
     51 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
     52 ; X64:       # %bb.0:
     53 ; X64-NEXT:    vplzcntd %ymm0, %ymm2
     54 ; X64-NEXT:    kmovw %edi, %k1
     55 ; X64-NEXT:    vplzcntd %ymm0, %ymm1 {%k1}
     56 ; X64-NEXT:    vpaddd %ymm2, %ymm1, %ymm0
     57 ; X64-NEXT:    retq
     58   %1 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %x0, i1 false)
     59   %2 = bitcast i8 %x2 to <8 x i1>
     60   %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
     61   %4 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %x0, i1 false)
     62   %res2 = add <8 x i32> %3, %4
     63   ret <8 x i32> %res2
     64 }
     65 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) #0
     66 
     67 define <2 x i64> @test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
     68 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
     69 ; X86:       # %bb.0:
     70 ; X86-NEXT:    vplzcntq %xmm0, %xmm2
     71 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
     72 ; X86-NEXT:    kmovw %eax, %k1
     73 ; X86-NEXT:    vplzcntq %xmm0, %xmm1 {%k1}
     74 ; X86-NEXT:    vpaddq %xmm2, %xmm1, %xmm0
     75 ; X86-NEXT:    retl
     76 ;
     77 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
     78 ; X64:       # %bb.0:
     79 ; X64-NEXT:    vplzcntq %xmm0, %xmm2
     80 ; X64-NEXT:    kmovw %edi, %k1
     81 ; X64-NEXT:    vplzcntq %xmm0, %xmm1 {%k1}
     82 ; X64-NEXT:    vpaddq %xmm2, %xmm1, %xmm0
     83 ; X64-NEXT:    retq
     84   %1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x0, i1 false)
     85   %2 = bitcast i8 %x2 to <8 x i1>
     86   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1>
     87   %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1
     88   %4 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x0, i1 false)
     89   %res2 = add <2 x i64> %3, %4
     90   ret <2 x i64> %res2
     91 }
     92 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0
     93 
     94 define <4 x i64> @test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
     95 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
     96 ; X86:       # %bb.0:
     97 ; X86-NEXT:    vplzcntq %ymm0, %ymm2
     98 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
     99 ; X86-NEXT:    kmovw %eax, %k1
    100 ; X86-NEXT:    vplzcntq %ymm0, %ymm1 {%k1}
    101 ; X86-NEXT:    vpaddq %ymm2, %ymm1, %ymm0
    102 ; X86-NEXT:    retl
    103 ;
    104 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
    105 ; X64:       # %bb.0:
    106 ; X64-NEXT:    vplzcntq %ymm0, %ymm2
    107 ; X64-NEXT:    kmovw %edi, %k1
    108 ; X64-NEXT:    vplzcntq %ymm0, %ymm1 {%k1}
    109 ; X64-NEXT:    vpaddq %ymm2, %ymm1, %ymm0
    110 ; X64-NEXT:    retq
    111   %1 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %x0, i1 false)
    112   %2 = bitcast i8 %x2 to <8 x i1>
    113   %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    114   %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1
    115   %4 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %x0, i1 false)
    116   %res2 = add <4 x i64> %3, %4
    117   ret <4 x i64> %res2
    118 }
    119 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) #0
    120 
    121 declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
    122 
    123 define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
    124 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
    125 ; X86:       # %bb.0:
    126 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
    127 ; X86-NEXT:    kmovw %eax, %k1
    128 ; X86-NEXT:    vpconflictd %xmm0, %xmm1 {%k1}
    129 ; X86-NEXT:    vpconflictd %xmm0, %xmm2 {%k1} {z}
    130 ; X86-NEXT:    vpconflictd %xmm0, %xmm0
    131 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
    132 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
    133 ; X86-NEXT:    retl
    134 ;
    135 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
    136 ; X64:       # %bb.0:
    137 ; X64-NEXT:    kmovw %edi, %k1
    138 ; X64-NEXT:    vpconflictd %xmm0, %xmm2 {%k1} {z}
    139 ; X64-NEXT:    vpconflictd %xmm0, %xmm1 {%k1}
    140 ; X64-NEXT:    vpconflictd %xmm0, %xmm0
    141 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
    142 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
    143 ; X64-NEXT:    retq
    144   %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
    145   %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
    146   %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
    147   %res2 = add <4 x i32> %res, %res1
    148   %res4 = add <4 x i32> %res2, %res3
    149   ret <4 x i32> %res4
    150 }
    151 
    152 declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
    153 
    154 define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
    155 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
    156 ; X86:       # %bb.0:
    157 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
    158 ; X86-NEXT:    kmovw %eax, %k1
    159 ; X86-NEXT:    vpconflictd %ymm0, %ymm1 {%k1}
    160 ; X86-NEXT:    vpconflictd %ymm0, %ymm0
    161 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    162 ; X86-NEXT:    retl
    163 ;
    164 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
    165 ; X64:       # %bb.0:
    166 ; X64-NEXT:    kmovw %edi, %k1
    167 ; X64-NEXT:    vpconflictd %ymm0, %ymm1 {%k1}
    168 ; X64-NEXT:    vpconflictd %ymm0, %ymm0
    169 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    170 ; X64-NEXT:    retq
    171   %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
    172   %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
    173   %res2 = add <8 x i32> %res, %res1
    174   ret <8 x i32> %res2
    175 }
    176 
    177 declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
    178 
    179 define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
    180 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
    181 ; X86:       # %bb.0:
    182 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
    183 ; X86-NEXT:    kmovw %eax, %k1
    184 ; X86-NEXT:    vpconflictq %xmm0, %xmm1 {%k1}
    185 ; X86-NEXT:    vpconflictq %xmm0, %xmm0
    186 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    187 ; X86-NEXT:    retl
    188 ;
    189 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
    190 ; X64:       # %bb.0:
    191 ; X64-NEXT:    kmovw %edi, %k1
    192 ; X64-NEXT:    vpconflictq %xmm0, %xmm1 {%k1}
    193 ; X64-NEXT:    vpconflictq %xmm0, %xmm0
    194 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    195 ; X64-NEXT:    retq
    196   %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
    197   %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
    198   %res2 = add <2 x i64> %res, %res1
    199   ret <2 x i64> %res2
    200 }
    201 
    202 declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
    203 
    204 define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
    205 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
    206 ; X86:       # %bb.0:
    207 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
    208 ; X86-NEXT:    kmovw %eax, %k1
    209 ; X86-NEXT:    vpconflictq %ymm0, %ymm1 {%k1}
    210 ; X86-NEXT:    vpconflictq %ymm0, %ymm0
    211 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    212 ; X86-NEXT:    retl
    213 ;
    214 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
    215 ; X64:       # %bb.0:
    216 ; X64-NEXT:    kmovw %edi, %k1
    217 ; X64-NEXT:    vpconflictq %ymm0, %ymm1 {%k1}
    218 ; X64-NEXT:    vpconflictq %ymm0, %ymm0
    219 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    220 ; X64-NEXT:    retq
    221   %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
    222   %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
    223   %res2 = add <4 x i64> %res, %res1
    224   ret <4 x i64> %res2
    225 }
    226 
    227