Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s
      3 
      4 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readonly
      5 
      6 declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8)
      7 
      8 define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
      9 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
     10 ; CHECK:       ## BB#0:
     11 ; CHECK-NEXT:    kmovw %edi, %k1
     12 ; CHECK-NEXT:    vplzcntd %xmm0, %xmm1 {%k1}
     13 ; CHECK-NEXT:    vplzcntd %xmm0, %xmm2 {%k1} {z}
     14 ; CHECK-NEXT:    vplzcntd %xmm0, %xmm0
     15 ; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
     16 ; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
     17 ; CHECK-NEXT:    retq
     18   %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
     19   %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
     20   %res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
     21   %res2 = add <4 x i32> %res, %res1
     22   %res4 = add <4 x i32> %res2, %res3
     23   ret <4 x i32> %res4
     24 }
     25 
     26 declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8)
     27 
     28 define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
     29 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
     30 ; CHECK:       ## BB#0:
     31 ; CHECK-NEXT:    kmovw %edi, %k1
     32 ; CHECK-NEXT:    vplzcntd %ymm0, %ymm1 {%k1}
     33 ; CHECK-NEXT:    vplzcntd %ymm0, %ymm0
     34 ; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
     35 ; CHECK-NEXT:    retq
     36   %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
     37   %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
     38   %res2 = add <8 x i32> %res, %res1
     39   ret <8 x i32> %res2
     40 }
     41 
     42 declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8)
     43 
     44 define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
     45 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
     46 ; CHECK:       ## BB#0:
     47 ; CHECK-NEXT:    kmovw %edi, %k1
     48 ; CHECK-NEXT:    vplzcntq %xmm0, %xmm1 {%k1}
     49 ; CHECK-NEXT:    vplzcntq %xmm0, %xmm0
     50 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
     51 ; CHECK-NEXT:    retq
     52   %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
     53   %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
     54   %res2 = add <2 x i64> %res, %res1
     55   ret <2 x i64> %res2
     56 }
     57 
     58 declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8)
     59 
     60 define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
     61 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
     62 ; CHECK:       ## BB#0:
     63 ; CHECK-NEXT:    kmovw %edi, %k1
     64 ; CHECK-NEXT:    vplzcntq %ymm0, %ymm1 {%k1}
     65 ; CHECK-NEXT:    vplzcntq %ymm0, %ymm0
     66 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
     67 ; CHECK-NEXT:    retq
     68   %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
     69   %res1 = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
     70   %res2 = add <4 x i64> %res, %res1
     71   ret <4 x i64> %res2
     72 }
     73 
     74 declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
     75 
     76 define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
     77 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
     78 ; CHECK:       ## BB#0:
     79 ; CHECK-NEXT:    kmovw %edi, %k1
     80 ; CHECK-NEXT:    vpconflictd %xmm0, %xmm1 {%k1}
     81 ; CHECK-NEXT:    vpconflictd %xmm0, %xmm2 {%k1} {z}
     82 ; CHECK-NEXT:    vpconflictd %xmm0, %xmm0
     83 ; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
     84 ; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
     85 ; CHECK-NEXT:    retq
     86   %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
     87   %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
     88   %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
     89   %res2 = add <4 x i32> %res, %res1
     90   %res4 = add <4 x i32> %res2, %res3
     91   ret <4 x i32> %res4
     92 }
     93 
     94 declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
     95 
     96 define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
     97 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
     98 ; CHECK:       ## BB#0:
     99 ; CHECK-NEXT:    kmovw %edi, %k1
    100 ; CHECK-NEXT:    vpconflictd %ymm0, %ymm1 {%k1}
    101 ; CHECK-NEXT:    vpconflictd %ymm0, %ymm0
    102 ; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    103 ; CHECK-NEXT:    retq
    104   %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
    105   %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
    106   %res2 = add <8 x i32> %res, %res1
    107   ret <8 x i32> %res2
    108 }
    109 
    110 declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
    111 
    112 define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
    113 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
    114 ; CHECK:       ## BB#0:
    115 ; CHECK-NEXT:    kmovw %edi, %k1
    116 ; CHECK-NEXT:    vpconflictq %xmm0, %xmm1 {%k1}
    117 ; CHECK-NEXT:    vpconflictq %xmm0, %xmm0
    118 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    119 ; CHECK-NEXT:    retq
    120   %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
    121   %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
    122   %res2 = add <2 x i64> %res, %res1
    123   ret <2 x i64> %res2
    124 }
    125 
    126 declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
    127 
    128 define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
    129 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
    130 ; CHECK:       ## BB#0:
    131 ; CHECK-NEXT:    kmovw %edi, %k1
    132 ; CHECK-NEXT:    vpconflictq %ymm0, %ymm1 {%k1}
    133 ; CHECK-NEXT:    vpconflictq %ymm0, %ymm0
    134 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    135 ; CHECK-NEXT:    retq
    136   %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
    137   %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
    138   %res2 = add <4 x i64> %res, %res1
    139   ret <4 x i64> %res2
    140 }
    141 
    142 define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
    143 ; CHECK-LABEL: test_x86_vbroadcastmw_256:
    144 ; CHECK:       ## BB#0:
    145 ; CHECK-NEXT:    kmovw %edi, %k0
    146 ; CHECK-NEXT:    vpbroadcastmw2d %k0, %ymm0
    147 ; CHECK-NEXT:    retq
    148   %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ;
    149   ret <8 x i32> %res
    150 }
    151 declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
    152 
    153 define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
    154 ; CHECK-LABEL: test_x86_vbroadcastmw_128:
    155 ; CHECK:       ## BB#0:
    156 ; CHECK-NEXT:    kmovw %edi, %k0
    157 ; CHECK-NEXT:    vpbroadcastmw2d %k0, %xmm0
    158 ; CHECK-NEXT:    retq
    159   %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ;
    160   ret <4 x i32> %res
    161 }
    162 declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
    163 
    164 define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
    165 ; CHECK-LABEL: test_x86_broadcastmb_256:
    166 ; CHECK:       ## BB#0:
    167 ; CHECK-NEXT:    kmovw %edi, %k0
    168 ; CHECK-NEXT:    vpbroadcastmb2q %k0, %ymm0
    169 ; CHECK-NEXT:    retq
    170   %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ;
    171   ret <4 x i64> %res
    172 }
    173 declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
    174 
    175 define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
    176 ; CHECK-LABEL: test_x86_broadcastmb_128:
    177 ; CHECK:       ## BB#0:
    178 ; CHECK-NEXT:    kmovw %edi, %k0
    179 ; CHECK-NEXT:    vpbroadcastmb2q %k0, %xmm0
    180 ; CHECK-NEXT:    retq
    181   %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ;
    182   ret <2 x i64> %res
    183 }
    184 declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
    185