1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5 define <4 x i32> @test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 6 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 7 ; X86: # %bb.0: 8 ; X86-NEXT: vplzcntd %xmm0, %xmm2 9 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 10 ; X86-NEXT: kmovw %eax, %k1 11 ; X86-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 12 ; X86-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z} 13 ; X86-NEXT: vpaddd %xmm0, %xmm2, %xmm0 14 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 15 ; X86-NEXT: retl 16 ; 17 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 18 ; X64: # %bb.0: 19 ; X64-NEXT: vplzcntd %xmm0, %xmm2 20 ; X64-NEXT: kmovw %edi, %k1 21 ; X64-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 22 ; X64-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z} 23 ; X64-NEXT: vpaddd %xmm0, %xmm2, %xmm0 24 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 25 ; X64-NEXT: retq 26 %1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false) 27 %2 = bitcast i8 %x2 to <8 x i1> 28 %extract1 = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 29 %3 = select <4 x i1> %extract1, <4 x i32> %1, <4 x i32> %x1 30 %4 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false) 31 %5 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %x0, i1 false) 32 %6 = bitcast i8 %x2 to <8 x i1> 33 %extract = shufflevector <8 x i1> %6, <8 x i1> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 34 %7 = select <4 x i1> %extract, <4 x i32> %5, <4 x i32> zeroinitializer 35 %res2 = add <4 x i32> %3, %4 36 %res4 = add <4 x i32> %res2, %7 37 ret <4 x i32> %res4 38 } 39 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #0 40 41 define <8 x i32> @test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 42 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 43 ; X86: # %bb.0: 44 ; X86-NEXT: vplzcntd %ymm0, %ymm2 45 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 46 ; X86-NEXT: kmovw %eax, %k1 47 ; X86-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 48 ; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm0 49 ; X86-NEXT: retl 50 ; 51 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 52 ; X64: # %bb.0: 53 ; X64-NEXT: vplzcntd %ymm0, %ymm2 54 ; X64-NEXT: kmovw %edi, %k1 55 ; X64-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 56 ; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 57 ; X64-NEXT: retq 58 %1 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %x0, i1 false) 59 %2 = bitcast i8 %x2 to <8 x i1> 60 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1 61 %4 = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %x0, i1 false) 62 %res2 = add <8 x i32> %3, %4 63 ret <8 x i32> %res2 64 } 65 declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) #0 66 67 define <2 x i64> @test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 68 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 69 ; X86: # %bb.0: 70 ; X86-NEXT: vplzcntq %xmm0, %xmm2 71 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 72 ; X86-NEXT: kmovw %eax, %k1 73 ; X86-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 74 ; X86-NEXT: vpaddq %xmm2, %xmm1, %xmm0 75 ; X86-NEXT: retl 76 ; 77 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 78 ; X64: # %bb.0: 79 ; X64-NEXT: vplzcntq %xmm0, %xmm2 80 ; X64-NEXT: kmovw %edi, %k1 81 ; X64-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 82 ; X64-NEXT: vpaddq %xmm2, %xmm1, %xmm0 83 ; X64-NEXT: retq 84 %1 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x0, i1 false) 85 %2 = bitcast i8 %x2 to <8 x i1> 86 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <2 x i32> <i32 0, i32 1> 87 %3 = select <2 x i1> %extract, <2 x i64> %1, <2 x i64> %x1 88 %4 = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %x0, i1 false) 89 %res2 = add <2 x i64> %3, %4 90 ret <2 x i64> %res2 91 } 92 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0 93 94 define <4 x i64> @test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 95 ; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 96 ; X86: # %bb.0: 97 ; X86-NEXT: vplzcntq %ymm0, %ymm2 98 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 99 ; X86-NEXT: kmovw %eax, %k1 100 ; X86-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 101 ; X86-NEXT: vpaddq %ymm2, %ymm1, %ymm0 102 ; X86-NEXT: retl 103 ; 104 ; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 105 ; X64: # %bb.0: 106 ; X64-NEXT: vplzcntq %ymm0, %ymm2 107 ; X64-NEXT: kmovw %edi, %k1 108 ; X64-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 109 ; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm0 110 ; X64-NEXT: retq 111 %1 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %x0, i1 false) 112 %2 = bitcast i8 %x2 to <8 x i1> 113 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 114 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %x1 115 %4 = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %x0, i1 false) 116 %res2 = add <4 x i64> %3, %4 117 ret <4 x i64> %res2 118 } 119 declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) #0 120 121 declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8) 122 123 define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 124 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 125 ; X86: # %bb.0: 126 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 127 ; X86-NEXT: kmovw %eax, %k1 128 ; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 129 ; X86-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z} 130 ; X86-NEXT: vpconflictd %xmm0, %xmm0 131 ; X86-NEXT: vpaddd %xmm2, %xmm0, %xmm0 132 ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 133 ; X86-NEXT: retl 134 ; 135 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 136 ; X64: # %bb.0: 137 ; X64-NEXT: kmovw %edi, %k1 138 ; X64-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z} 139 ; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 140 ; X64-NEXT: vpconflictd %xmm0, %xmm0 141 ; X64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 142 ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 143 ; X64-NEXT: retq 144 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 145 %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 146 %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) 147 %res2 = add <4 x i32> %res, %res1 148 %res4 = add <4 x i32> %res2, %res3 149 ret <4 x i32> %res4 150 } 151 152 declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8) 153 154 define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 155 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 156 ; X86: # %bb.0: 157 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 158 ; X86-NEXT: kmovw %eax, %k1 159 ; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 160 ; X86-NEXT: vpconflictd %ymm0, %ymm0 161 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 162 ; X86-NEXT: retl 163 ; 164 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 165 ; X64: # %bb.0: 166 ; X64-NEXT: kmovw %edi, %k1 167 ; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 168 ; X64-NEXT: vpconflictd %ymm0, %ymm0 169 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 170 ; X64-NEXT: retq 171 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 172 %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 173 %res2 = add <8 x i32> %res, %res1 174 ret <8 x i32> %res2 175 } 176 177 declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8) 178 179 define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 180 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 181 ; X86: # %bb.0: 182 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 183 ; X86-NEXT: kmovw %eax, %k1 184 ; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 185 ; X86-NEXT: vpconflictq %xmm0, %xmm0 186 ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 187 ; X86-NEXT: retl 188 ; 189 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 190 ; X64: # %bb.0: 191 ; X64-NEXT: kmovw %edi, %k1 192 ; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 193 ; X64-NEXT: vpconflictq %xmm0, %xmm0 194 ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 195 ; X64-NEXT: retq 196 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 197 %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 198 %res2 = add <2 x i64> %res, %res1 199 ret <2 x i64> %res2 200 } 201 202 declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8) 203 204 define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 205 ; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 206 ; X86: # %bb.0: 207 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 208 ; X86-NEXT: kmovw %eax, %k1 209 ; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 210 ; X86-NEXT: vpconflictq %ymm0, %ymm0 211 ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 212 ; X86-NEXT: retl 213 ; 214 ; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 215 ; X64: # %bb.0: 216 ; X64-NEXT: kmovw %edi, %k1 217 ; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 218 ; X64-NEXT: vpconflictq %ymm0, %ymm0 219 ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 220 ; X64-NEXT: retq 221 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 222 %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 223 %res2 = add <4 x i64> %res, %res1 224 ret <4 x i64> %res2 225 } 226 227