Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s
      2 
      3 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readonly
      4 
      5 declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8)
      6 
      7 define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
      8 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
      9 ; CHECK:       ## BB#0:
     10 ; CHECK-NEXT:    movzbl %dil, %eax
     11 ; CHECK-NEXT:    kmovw %eax, %k1
     12 ; CHECK-NEXT:    vplzcntd %xmm0, %xmm1 {%k1}
     13 ; CHECK-NEXT:    vplzcntd %xmm0, %xmm2 {%k1} {z}
     14 ; CHECK-NEXT:    vplzcntd %xmm0, %xmm0
     15 ; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
     16 ; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
     17 ; CHECK-NEXT:    retq
     18   %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
     19   %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
     20   %res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
     21   %res2 = add <4 x i32> %res, %res1
     22   %res4 = add <4 x i32> %res2, %res3
     23   ret <4 x i32> %res4
     24 }
     25 
     26 declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8)
     27 
     28 define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
     29 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
     30 ; CHECK:       ## BB#0:
     31 ; CHECK-NEXT:    movzbl %dil, %eax
     32 ; CHECK-NEXT:    kmovw %eax, %k1
     33 ; CHECK-NEXT:    vplzcntd %ymm0, %ymm1 {%k1}
     34 ; CHECK-NEXT:    vplzcntd %ymm0, %ymm0
     35 ; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
     36 ; CHECK-NEXT:    retq
     37   %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
     38   %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
     39   %res2 = add <8 x i32> %res, %res1
     40   ret <8 x i32> %res2
     41 }
     42 
     43 declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8)
     44 
     45 define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
     46 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
     47 ; CHECK:       ## BB#0:
     48 ; CHECK-NEXT:    movzbl %dil, %eax
     49 ; CHECK-NEXT:    kmovw %eax, %k1
     50 ; CHECK-NEXT:    vplzcntq %xmm0, %xmm1 {%k1}
     51 ; CHECK-NEXT:    vplzcntq %xmm0, %xmm0
     52 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
     53 ; CHECK-NEXT:    retq
     54   %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
     55   %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
     56   %res2 = add <2 x i64> %res, %res1
     57   ret <2 x i64> %res2
     58 }
     59 
     60 declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8)
     61 
     62 define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
     63 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
     64 ; CHECK:       ## BB#0:
     65 ; CHECK-NEXT:    movzbl %dil, %eax
     66 ; CHECK-NEXT:    kmovw %eax, %k1
     67 ; CHECK-NEXT:    vplzcntq %ymm0, %ymm1 {%k1}
     68 ; CHECK-NEXT:    vplzcntq %ymm0, %ymm0
     69 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
     70 ; CHECK-NEXT:    retq
     71   %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
     72   %res1 = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
     73   %res2 = add <4 x i64> %res, %res1
     74   ret <4 x i64> %res2
     75 }
     76 
     77 declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
     78 
     79 define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
     80 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
     81 ; CHECK:       ## BB#0:
     82 ; CHECK-NEXT:    movzbl %dil, %eax
     83 ; CHECK-NEXT:    kmovw %eax, %k1
     84 ; CHECK-NEXT:    vpconflictd %xmm0, %xmm1 {%k1}
     85 ; CHECK-NEXT:    vpconflictd %xmm0, %xmm2 {%k1} {z}
     86 ; CHECK-NEXT:    vpconflictd %xmm0, %xmm0
     87 ; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
     88 ; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
     89 ; CHECK-NEXT:    retq
     90   %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
     91   %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
     92   %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
     93   %res2 = add <4 x i32> %res, %res1
     94   %res4 = add <4 x i32> %res2, %res3
     95   ret <4 x i32> %res4
     96 }
     97 
     98 declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
     99 
    100 define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
    101 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
    102 ; CHECK:       ## BB#0:
    103 ; CHECK-NEXT:    movzbl %dil, %eax
    104 ; CHECK-NEXT:    kmovw %eax, %k1
    105 ; CHECK-NEXT:    vpconflictd %ymm0, %ymm1 {%k1}
    106 ; CHECK-NEXT:    vpconflictd %ymm0, %ymm0
    107 ; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
    108 ; CHECK-NEXT:    retq
    109   %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
    110   %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
    111   %res2 = add <8 x i32> %res, %res1
    112   ret <8 x i32> %res2
    113 }
    114 
    115 declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
    116 
    117 define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
    118 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
    119 ; CHECK:       ## BB#0:
    120 ; CHECK-NEXT:    movzbl %dil, %eax
    121 ; CHECK-NEXT:    kmovw %eax, %k1
    122 ; CHECK-NEXT:    vpconflictq %xmm0, %xmm1 {%k1}
    123 ; CHECK-NEXT:    vpconflictq %xmm0, %xmm0
    124 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
    125 ; CHECK-NEXT:    retq
    126   %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
    127   %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
    128   %res2 = add <2 x i64> %res, %res1
    129   ret <2 x i64> %res2
    130 }
    131 
    132 declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
    133 
    134 define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
    135 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
    136 ; CHECK:       ## BB#0:
    137 ; CHECK-NEXT:    movzbl %dil, %eax
    138 ; CHECK-NEXT:    kmovw %eax, %k1
    139 ; CHECK-NEXT:    vpconflictq %ymm0, %ymm1 {%k1}
    140 ; CHECK-NEXT:    vpconflictq %ymm0, %ymm0
    141 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
    142 ; CHECK-NEXT:    retq
    143   %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
    144   %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
    145   %res2 = add <4 x i64> %res, %res1
    146   ret <4 x i64> %res2
    147 }
    148 
    149 define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
    150   ; CHECK: test_x86_vbroadcastmw_256
    151   ; CHECK: vpbroadcastmw2d %k0, %ymm0
    152   %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ; 
    153   ret <8 x i32> %res
    154 }
    155 declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
    156 
    157 define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
    158   ; CHECK: test_x86_vbroadcastmw_128
    159   ; CHECK: vpbroadcastmw2d %k0, %xmm0
    160   %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ; 
    161   ret <4 x i32> %res
    162 }
    163 declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
    164 
    165 define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
    166   ; CHECK: test_x86_broadcastmb_256
    167   ; CHECK: vpbroadcastmb2q %k0, %ymm0
    168   %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ; 
    169   ret <4 x i64> %res
    170 }
    171 declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
    172 
    173 define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
    174   ; CHECK: test_x86_broadcastmb_128
    175   ; CHECK: vpbroadcastmb2q %k0, %xmm0
    176   %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ; 
    177   ret <2 x i64> %res
    178 }
    179 declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
    180