1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s 2 3 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readonly 4 5 declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8) 6 7 define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 8 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 9 ; CHECK: ## BB#0: 10 ; CHECK-NEXT: movzbl %dil, %eax 11 ; CHECK-NEXT: kmovw %eax, %k1 12 ; CHECK-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 13 ; CHECK-NEXT: vplzcntd %xmm0, %xmm2 {%k1} {z} 14 ; CHECK-NEXT: vplzcntd %xmm0, %xmm0 15 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 16 ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 17 ; CHECK-NEXT: retq 18 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 19 %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 20 %res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) 21 %res2 = add <4 x i32> %res, %res1 22 %res4 = add <4 x i32> %res2, %res3 23 ret <4 x i32> %res4 24 } 25 26 declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8) 27 28 define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 29 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 30 ; CHECK: ## BB#0: 31 ; CHECK-NEXT: movzbl %dil, %eax 32 ; CHECK-NEXT: kmovw %eax, %k1 33 ; CHECK-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 34 ; CHECK-NEXT: vplzcntd %ymm0, %ymm0 35 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 36 ; CHECK-NEXT: retq 37 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 38 %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 39 %res2 = add <8 x i32> %res, %res1 40 ret <8 x i32> %res2 41 } 42 43 declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8) 44 45 define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 46 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 47 ; CHECK: ## BB#0: 48 ; CHECK-NEXT: movzbl %dil, %eax 49 ; CHECK-NEXT: kmovw %eax, %k1 50 ; CHECK-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 51 ; CHECK-NEXT: vplzcntq %xmm0, %xmm0 52 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 53 ; CHECK-NEXT: retq 54 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 55 %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 56 %res2 = add <2 x i64> %res, %res1 57 ret <2 x i64> %res2 58 } 59 60 declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8) 61 62 define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 63 ; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 64 ; CHECK: ## BB#0: 65 ; CHECK-NEXT: movzbl %dil, %eax 66 ; CHECK-NEXT: kmovw %eax, %k1 67 ; CHECK-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 68 ; CHECK-NEXT: vplzcntq %ymm0, %ymm0 69 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 70 ; CHECK-NEXT: retq 71 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 72 %res1 = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 73 %res2 = add <4 x i64> %res, %res1 74 ret <4 x i64> %res2 75 } 76 77 declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8) 78 79 define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 80 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 81 ; CHECK: ## BB#0: 82 ; CHECK-NEXT: movzbl %dil, %eax 83 ; CHECK-NEXT: kmovw %eax, %k1 84 ; CHECK-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 85 ; CHECK-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z} 86 ; CHECK-NEXT: vpconflictd %xmm0, %xmm0 87 ; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 88 ; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 89 ; CHECK-NEXT: retq 90 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 91 %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 92 %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) 93 %res2 = add <4 x i32> %res, %res1 94 %res4 = add <4 x i32> %res2, %res3 95 ret <4 x i32> %res4 96 } 97 98 declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8) 99 100 define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 101 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 102 ; CHECK: ## BB#0: 103 ; CHECK-NEXT: movzbl %dil, %eax 104 ; CHECK-NEXT: kmovw %eax, %k1 105 ; CHECK-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 106 ; CHECK-NEXT: vpconflictd %ymm0, %ymm0 107 ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 108 ; CHECK-NEXT: retq 109 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 110 %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 111 %res2 = add <8 x i32> %res, %res1 112 ret <8 x i32> %res2 113 } 114 115 declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8) 116 117 define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 118 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 119 ; CHECK: ## BB#0: 120 ; CHECK-NEXT: movzbl %dil, %eax 121 ; CHECK-NEXT: kmovw %eax, %k1 122 ; CHECK-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 123 ; CHECK-NEXT: vpconflictq %xmm0, %xmm0 124 ; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 125 ; CHECK-NEXT: retq 126 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 127 %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 128 %res2 = add <2 x i64> %res, %res1 129 ret <2 x i64> %res2 130 } 131 132 declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8) 133 134 define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 135 ; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 136 ; CHECK: ## BB#0: 137 ; CHECK-NEXT: movzbl %dil, %eax 138 ; CHECK-NEXT: kmovw %eax, %k1 139 ; CHECK-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 140 ; CHECK-NEXT: vpconflictq %ymm0, %ymm0 141 ; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 142 ; CHECK-NEXT: retq 143 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 144 %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 145 %res2 = add <4 x i64> %res, %res1 146 ret <4 x i64> %res2 147 } 148 149 define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) { 150 ; CHECK: test_x86_vbroadcastmw_256 151 ; CHECK: vpbroadcastmw2d %k0, %ymm0 152 %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ; 153 ret <8 x i32> %res 154 } 155 declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16) 156 157 define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) { 158 ; CHECK: test_x86_vbroadcastmw_128 159 ; CHECK: vpbroadcastmw2d %k0, %xmm0 160 %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ; 161 ret <4 x i32> %res 162 } 163 declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16) 164 165 define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) { 166 ; CHECK: test_x86_broadcastmb_256 167 ; CHECK: vpbroadcastmb2q %k0, %ymm0 168 %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ; 169 ret <4 x i64> %res 170 } 171 declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8) 172 173 define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) { 174 ; CHECK: test_x86_broadcastmb_128 175 ; CHECK: vpbroadcastmb2q %k0, %xmm0 176 %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ; 177 ret <2 x i64> %res 178 } 179 declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8) 180