1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW 3 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32 4 5 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { 6 ; AVX512BW-LABEL: test_pcmpeq_b: 7 ; AVX512BW: ## BB#0: 8 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 9 ; AVX512BW-NEXT: kmovq %k0, %rax 10 ; AVX512BW-NEXT: retq 11 ; 12 ; AVX512F-32-LABEL: test_pcmpeq_b: 13 ; AVX512F-32: # BB#0: 14 ; AVX512F-32-NEXT: subl $12, %esp 15 ; AVX512F-32-NEXT: .Ltmp0: 16 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 17 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 18 ; AVX512F-32-NEXT: kmovq %k0, (%esp) 19 ; AVX512F-32-NEXT: movl (%esp), %eax 20 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 21 ; AVX512F-32-NEXT: addl $12, %esp 22 ; AVX512F-32-NEXT: retl 23 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 24 ret i64 %res 25 } 26 27 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 28 ; AVX512BW-LABEL: test_mask_pcmpeq_b: 29 ; AVX512BW: ## BB#0: 30 ; AVX512BW-NEXT: kmovq %rdi, %k1 31 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 32 ; AVX512BW-NEXT: kmovq %k0, %rax 33 ; AVX512BW-NEXT: retq 34 ; 35 ; AVX512F-32-LABEL: test_mask_pcmpeq_b: 36 ; AVX512F-32: # BB#0: 37 ; AVX512F-32-NEXT: subl $12, %esp 38 ; AVX512F-32-NEXT: .Ltmp1: 39 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 40 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 41 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 42 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 43 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 44 ; AVX512F-32-NEXT: kmovq %k0, (%esp) 45 ; AVX512F-32-NEXT: movl (%esp), %eax 46 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 47 ; AVX512F-32-NEXT: addl $12, %esp 48 ; AVX512F-32-NEXT: retl 49 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 50 ret i64 %res 51 } 52 53 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64) 54 55 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) { 56 ; AVX512BW-LABEL: test_pcmpeq_w: 57 ; AVX512BW: ## BB#0: 58 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 59 ; AVX512BW-NEXT: kmovd %k0, %eax 60 ; AVX512BW-NEXT: retq 61 ; 62 ; AVX512F-32-LABEL: test_pcmpeq_w: 63 ; AVX512F-32: # BB#0: 64 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 65 ; AVX512F-32-NEXT: kmovd %k0, %eax 66 ; AVX512F-32-NEXT: retl 67 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 68 ret i32 %res 69 } 70 71 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 72 ; AVX512BW-LABEL: test_mask_pcmpeq_w: 73 ; AVX512BW: ## BB#0: 74 ; AVX512BW-NEXT: kmovd %edi, %k1 75 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 76 ; AVX512BW-NEXT: kmovd %k0, %eax 77 ; AVX512BW-NEXT: retq 78 ; 79 ; AVX512F-32-LABEL: test_mask_pcmpeq_w: 80 ; AVX512F-32: # BB#0: 81 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 82 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 83 ; AVX512F-32-NEXT: kmovd %k0, %eax 84 ; AVX512F-32-NEXT: retl 85 %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 86 ret i32 %res 87 } 88 89 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32) 90 91 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) { 92 ; AVX512BW-LABEL: test_pcmpgt_b: 93 ; AVX512BW: ## BB#0: 94 ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 95 ; AVX512BW-NEXT: kmovq %k0, %rax 96 ; AVX512BW-NEXT: retq 97 ; 98 ; AVX512F-32-LABEL: test_pcmpgt_b: 99 ; AVX512F-32: # BB#0: 100 ; AVX512F-32-NEXT: subl $12, %esp 101 ; AVX512F-32-NEXT: .Ltmp2: 102 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 103 ; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 104 ; AVX512F-32-NEXT: kmovq %k0, (%esp) 105 ; AVX512F-32-NEXT: movl (%esp), %eax 106 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 107 ; AVX512F-32-NEXT: addl $12, %esp 108 ; AVX512F-32-NEXT: retl 109 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 110 ret i64 %res 111 } 112 113 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) { 114 ; AVX512BW-LABEL: test_mask_pcmpgt_b: 115 ; AVX512BW: ## BB#0: 116 ; AVX512BW-NEXT: kmovq %rdi, %k1 117 ; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} 118 ; AVX512BW-NEXT: kmovq %k0, %rax 119 ; AVX512BW-NEXT: retq 120 ; 121 ; AVX512F-32-LABEL: test_mask_pcmpgt_b: 122 ; AVX512F-32: # BB#0: 123 ; AVX512F-32-NEXT: subl $12, %esp 124 ; AVX512F-32-NEXT: .Ltmp3: 125 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 126 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 127 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 128 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 129 ; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} 130 ; AVX512F-32-NEXT: kmovq %k0, (%esp) 131 ; AVX512F-32-NEXT: movl (%esp), %eax 132 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 133 ; AVX512F-32-NEXT: addl $12, %esp 134 ; AVX512F-32-NEXT: retl 135 %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) 136 ret i64 %res 137 } 138 139 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64) 140 141 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) { 142 ; AVX512BW-LABEL: test_pcmpgt_w: 143 ; AVX512BW: ## BB#0: 144 ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 145 ; AVX512BW-NEXT: kmovd %k0, %eax 146 ; AVX512BW-NEXT: retq 147 ; 148 ; AVX512F-32-LABEL: test_pcmpgt_w: 149 ; AVX512F-32: # BB#0: 150 ; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 151 ; AVX512F-32-NEXT: kmovd %k0, %eax 152 ; AVX512F-32-NEXT: retl 153 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1) 154 ret i32 %res 155 } 156 157 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 158 ; AVX512BW-LABEL: test_mask_pcmpgt_w: 159 ; AVX512BW: ## BB#0: 160 ; AVX512BW-NEXT: kmovd %edi, %k1 161 ; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} 162 ; AVX512BW-NEXT: kmovd %k0, %eax 163 ; AVX512BW-NEXT: retq 164 ; 165 ; AVX512F-32-LABEL: test_mask_pcmpgt_w: 166 ; AVX512F-32: # BB#0: 167 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 168 ; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} 169 ; AVX512F-32-NEXT: kmovd %k0, %eax 170 ; AVX512F-32-NEXT: retl 171 %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask) 172 ret i32 %res 173 } 174 175 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) 176 177 define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 178 ; AVX512BW-LABEL: test_cmp_b_512: 179 ; AVX512BW: ## BB#0: 180 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 181 ; AVX512BW-NEXT: kmovq %k0, %rax 182 ; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 183 ; AVX512BW-NEXT: kmovq %k0, %rcx 184 ; AVX512BW-NEXT: addq %rax, %rcx 185 ; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 186 ; AVX512BW-NEXT: kmovq %k0, %rax 187 ; AVX512BW-NEXT: addq %rcx, %rax 188 ; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 189 ; AVX512BW-NEXT: kmovq %k0, %rcx 190 ; AVX512BW-NEXT: addq %rax, %rcx 191 ; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 192 ; AVX512BW-NEXT: kmovq %k0, %rax 193 ; AVX512BW-NEXT: addq %rcx, %rax 194 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 195 ; AVX512BW-NEXT: kmovq %k0, %rcx 196 ; AVX512BW-NEXT: addq %rax, %rcx 197 ; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 198 ; AVX512BW-NEXT: kmovq %k0, %rdx 199 ; AVX512BW-NEXT: addq %rcx, %rdx 200 ; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 201 ; AVX512BW-NEXT: kmovq %k0, %rax 202 ; AVX512BW-NEXT: addq %rdx, %rax 203 ; AVX512BW-NEXT: retq 204 ; 205 ; AVX512F-32-LABEL: test_cmp_b_512: 206 ; AVX512F-32: # BB#0: 207 ; AVX512F-32-NEXT: subl $68, %esp 208 ; AVX512F-32-NEXT: .Ltmp4: 209 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 210 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 211 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 212 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 213 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 214 ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 215 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 216 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 217 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 218 ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 219 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 220 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 221 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 222 ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 223 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 224 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 225 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 226 ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 227 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 228 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 229 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 230 ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 231 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 232 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 233 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 234 ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 235 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 236 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 237 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 238 ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 239 ; AVX512F-32-NEXT: kmovq %k0, (%esp) 240 ; AVX512F-32-NEXT: addl (%esp), %eax 241 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 242 ; AVX512F-32-NEXT: addl $68, %esp 243 ; AVX512F-32-NEXT: retl 244 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 245 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 246 %ret1 = add i64 %res0, %res1 247 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 248 %ret2 = add i64 %ret1, %res2 249 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 250 %ret3 = add i64 %ret2, %res3 251 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 252 %ret4 = add i64 %ret3, %res4 253 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 254 %ret5 = add i64 %ret4, %res5 255 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 256 %ret6 = add i64 %ret5, %res6 257 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 258 %ret7 = add i64 %ret6, %res7 259 ret i64 %ret7 260 } 261 262 define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 263 ; AVX512BW-LABEL: test_mask_cmp_b_512: 264 ; AVX512BW: ## BB#0: 265 ; AVX512BW-NEXT: kmovq %rdi, %k1 266 ; AVX512BW-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 267 ; AVX512BW-NEXT: kmovq %k0, %rax 268 ; AVX512BW-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1} 269 ; AVX512BW-NEXT: kmovq %k0, %rcx 270 ; AVX512BW-NEXT: addq %rax, %rcx 271 ; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} 272 ; AVX512BW-NEXT: kmovq %k0, %rax 273 ; AVX512BW-NEXT: addq %rcx, %rax 274 ; AVX512BW-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} 275 ; AVX512BW-NEXT: kmovq %k0, %rcx 276 ; AVX512BW-NEXT: addq %rax, %rcx 277 ; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} 278 ; AVX512BW-NEXT: kmovq %k0, %rax 279 ; AVX512BW-NEXT: addq %rcx, %rax 280 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} 281 ; AVX512BW-NEXT: kmovq %k0, %rcx 282 ; AVX512BW-NEXT: addq %rax, %rcx 283 ; AVX512BW-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} 284 ; AVX512BW-NEXT: kmovq %k0, %rdx 285 ; AVX512BW-NEXT: addq %rcx, %rdx 286 ; AVX512BW-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1} 287 ; AVX512BW-NEXT: kmovq %k0, %rax 288 ; AVX512BW-NEXT: addq %rdx, %rax 289 ; AVX512BW-NEXT: retq 290 ; 291 ; AVX512F-32-LABEL: test_mask_cmp_b_512: 292 ; AVX512F-32: # BB#0: 293 ; AVX512F-32-NEXT: subl $68, %esp 294 ; AVX512F-32-NEXT: .Ltmp5: 295 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 296 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 297 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 298 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 299 ; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} 300 ; AVX512F-32-NEXT: kmovq %k0, (%esp) 301 ; AVX512F-32-NEXT: movl (%esp), %eax 302 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 303 ; AVX512F-32-NEXT: vpcmpltb %zmm1, %zmm0, %k0 {%k1} 304 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 305 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 306 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 307 ; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} 308 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 309 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 310 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 311 ; AVX512F-32-NEXT: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} 312 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 313 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 314 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 315 ; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} 316 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 317 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 318 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 319 ; AVX512F-32-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} 320 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 321 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 322 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 323 ; AVX512F-32-NEXT: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} 324 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 325 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 326 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 327 ; AVX512F-32-NEXT: vpcmpordb %zmm1, %zmm0, %k0 {%k1} 328 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 329 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 330 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 331 ; AVX512F-32-NEXT: addl $68, %esp 332 ; AVX512F-32-NEXT: retl 333 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 334 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 335 %ret1 = add i64 %res0, %res1 336 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 337 %ret2 = add i64 %ret1, %res2 338 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 339 %ret3 = add i64 %ret2, %res3 340 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 341 %ret4 = add i64 %ret3, %res4 342 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 343 %ret5 = add i64 %ret4, %res5 344 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 345 %ret6 = add i64 %ret5, %res6 346 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 347 %ret7 = add i64 %ret6, %res7 348 ret i64 %ret7 349 } 350 351 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 352 353 define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 354 ; AVX512BW-LABEL: test_ucmp_b_512: 355 ; AVX512BW: ## BB#0: 356 ; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 357 ; AVX512BW-NEXT: kmovq %k0, %rax 358 ; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 359 ; AVX512BW-NEXT: kmovq %k0, %rcx 360 ; AVX512BW-NEXT: addq %rax, %rcx 361 ; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 362 ; AVX512BW-NEXT: kmovq %k0, %rax 363 ; AVX512BW-NEXT: addq %rcx, %rax 364 ; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 365 ; AVX512BW-NEXT: kmovq %k0, %rcx 366 ; AVX512BW-NEXT: addq %rax, %rcx 367 ; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 368 ; AVX512BW-NEXT: kmovq %k0, %rax 369 ; AVX512BW-NEXT: addq %rcx, %rax 370 ; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 371 ; AVX512BW-NEXT: kmovq %k0, %rcx 372 ; AVX512BW-NEXT: addq %rax, %rcx 373 ; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 374 ; AVX512BW-NEXT: kmovq %k0, %rdx 375 ; AVX512BW-NEXT: addq %rcx, %rdx 376 ; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 377 ; AVX512BW-NEXT: kmovq %k0, %rax 378 ; AVX512BW-NEXT: addq %rdx, %rax 379 ; AVX512BW-NEXT: retq 380 ; 381 ; AVX512F-32-LABEL: test_ucmp_b_512: 382 ; AVX512F-32: # BB#0: 383 ; AVX512F-32-NEXT: subl $68, %esp 384 ; AVX512F-32-NEXT: .Ltmp6: 385 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 386 ; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 387 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 388 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 389 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 390 ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 391 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 392 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 393 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 394 ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 395 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 396 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 397 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 398 ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 399 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 400 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 401 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 402 ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 403 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 404 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 405 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 406 ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 407 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 408 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 409 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 410 ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 411 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 412 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 413 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 414 ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 415 ; AVX512F-32-NEXT: kmovq %k0, (%esp) 416 ; AVX512F-32-NEXT: addl (%esp), %eax 417 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 418 ; AVX512F-32-NEXT: addl $68, %esp 419 ; AVX512F-32-NEXT: retl 420 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) 421 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) 422 %ret1 = add i64 %res0, %res1 423 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) 424 %ret2 = add i64 %ret1, %res2 425 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) 426 %ret3 = add i64 %ret2, %res3 427 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) 428 %ret4 = add i64 %ret3, %res4 429 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) 430 %ret5 = add i64 %ret4, %res5 431 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) 432 %ret6 = add i64 %ret5, %res6 433 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) 434 %ret7 = add i64 %ret6, %res7 435 ret i64 %ret7 436 } 437 438 define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 439 ; AVX512BW-LABEL: test_mask_x86_avx512_ucmp_b_512: 440 ; AVX512BW: ## BB#0: 441 ; AVX512BW-NEXT: kmovq %rdi, %k1 442 ; AVX512BW-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1} 443 ; AVX512BW-NEXT: kmovq %k0, %rax 444 ; AVX512BW-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} 445 ; AVX512BW-NEXT: kmovq %k0, %rcx 446 ; AVX512BW-NEXT: addq %rax, %rcx 447 ; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} 448 ; AVX512BW-NEXT: kmovq %k0, %rax 449 ; AVX512BW-NEXT: addq %rcx, %rax 450 ; AVX512BW-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} 451 ; AVX512BW-NEXT: kmovq %k0, %rcx 452 ; AVX512BW-NEXT: addq %rax, %rcx 453 ; AVX512BW-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} 454 ; AVX512BW-NEXT: kmovq %k0, %rax 455 ; AVX512BW-NEXT: addq %rcx, %rax 456 ; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} 457 ; AVX512BW-NEXT: kmovq %k0, %rcx 458 ; AVX512BW-NEXT: addq %rax, %rcx 459 ; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} 460 ; AVX512BW-NEXT: kmovq %k0, %rdx 461 ; AVX512BW-NEXT: addq %rcx, %rdx 462 ; AVX512BW-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1} 463 ; AVX512BW-NEXT: kmovq %k0, %rax 464 ; AVX512BW-NEXT: addq %rdx, %rax 465 ; AVX512BW-NEXT: retq 466 ; 467 ; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512: 468 ; AVX512F-32: # BB#0: 469 ; AVX512F-32-NEXT: subl $68, %esp 470 ; AVX512F-32-NEXT: .Ltmp7: 471 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 72 472 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 473 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 474 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 475 ; AVX512F-32-NEXT: vpcmpequb %zmm1, %zmm0, %k0 {%k1} 476 ; AVX512F-32-NEXT: kmovq %k0, (%esp) 477 ; AVX512F-32-NEXT: movl (%esp), %eax 478 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 479 ; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} 480 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 481 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 482 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 483 ; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} 484 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 485 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 486 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 487 ; AVX512F-32-NEXT: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} 488 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 489 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 490 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 491 ; AVX512F-32-NEXT: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} 492 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 493 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 494 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 495 ; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} 496 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 497 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 498 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 499 ; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} 500 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 501 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 502 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 503 ; AVX512F-32-NEXT: vpcmpordub %zmm1, %zmm0, %k0 {%k1} 504 ; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) 505 ; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax 506 ; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx 507 ; AVX512F-32-NEXT: addl $68, %esp 508 ; AVX512F-32-NEXT: retl 509 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) 510 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) 511 %ret1 = add i64 %res0, %res1 512 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) 513 %ret2 = add i64 %ret1, %res2 514 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) 515 %ret3 = add i64 %ret2, %res3 516 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) 517 %ret4 = add i64 %ret3, %res4 518 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) 519 %ret5 = add i64 %ret4, %res5 520 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) 521 %ret6 = add i64 %ret5, %res6 522 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) 523 %ret7 = add i64 %ret6, %res7 524 ret i64 %ret7 525 } 526 527 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone 528 529 define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 530 ; AVX512BW-LABEL: test_cmp_w_512: 531 ; AVX512BW: ## BB#0: 532 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 533 ; AVX512BW-NEXT: kmovd %k0, %eax 534 ; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 535 ; AVX512BW-NEXT: kmovd %k0, %ecx 536 ; AVX512BW-NEXT: addl %eax, %ecx 537 ; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 538 ; AVX512BW-NEXT: kmovd %k0, %eax 539 ; AVX512BW-NEXT: addl %ecx, %eax 540 ; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 541 ; AVX512BW-NEXT: kmovd %k0, %ecx 542 ; AVX512BW-NEXT: addl %eax, %ecx 543 ; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 544 ; AVX512BW-NEXT: kmovd %k0, %eax 545 ; AVX512BW-NEXT: addl %ecx, %eax 546 ; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 547 ; AVX512BW-NEXT: kmovd %k0, %ecx 548 ; AVX512BW-NEXT: addl %eax, %ecx 549 ; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 550 ; AVX512BW-NEXT: kmovd %k0, %edx 551 ; AVX512BW-NEXT: addl %ecx, %edx 552 ; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 553 ; AVX512BW-NEXT: kmovd %k0, %eax 554 ; AVX512BW-NEXT: addl %edx, %eax 555 ; AVX512BW-NEXT: retq 556 ; 557 ; AVX512F-32-LABEL: test_cmp_w_512: 558 ; AVX512F-32: # BB#0: 559 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 560 ; AVX512F-32-NEXT: kmovd %k0, %eax 561 ; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 562 ; AVX512F-32-NEXT: kmovd %k0, %ecx 563 ; AVX512F-32-NEXT: addl %eax, %ecx 564 ; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 565 ; AVX512F-32-NEXT: kmovd %k0, %eax 566 ; AVX512F-32-NEXT: addl %ecx, %eax 567 ; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 568 ; AVX512F-32-NEXT: kmovd %k0, %ecx 569 ; AVX512F-32-NEXT: addl %eax, %ecx 570 ; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 571 ; AVX512F-32-NEXT: kmovd %k0, %eax 572 ; AVX512F-32-NEXT: addl %ecx, %eax 573 ; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 574 ; AVX512F-32-NEXT: kmovd %k0, %ecx 575 ; AVX512F-32-NEXT: addl %eax, %ecx 576 ; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 577 ; AVX512F-32-NEXT: kmovd %k0, %edx 578 ; AVX512F-32-NEXT: addl %ecx, %edx 579 ; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 580 ; AVX512F-32-NEXT: kmovd %k0, %eax 581 ; AVX512F-32-NEXT: addl %edx, %eax 582 ; AVX512F-32-NEXT: retl 583 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 584 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 585 %ret1 = add i32 %res0, %res1 586 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 587 %ret2 = add i32 %ret1, %res2 588 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 589 %ret3 = add i32 %ret2, %res3 590 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 591 %ret4 = add i32 %ret3, %res4 592 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 593 %ret5 = add i32 %ret4, %res5 594 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 595 %ret6 = add i32 %ret5, %res6 596 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 597 %ret7 = add i32 %ret6, %res7 598 ret i32 %ret7 599 } 600 601 define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 602 ; AVX512BW-LABEL: test_mask_cmp_w_512: 603 ; AVX512BW: ## BB#0: 604 ; AVX512BW-NEXT: kmovd %edi, %k1 605 ; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 606 ; AVX512BW-NEXT: kmovd %k0, %eax 607 ; AVX512BW-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1} 608 ; AVX512BW-NEXT: kmovd %k0, %ecx 609 ; AVX512BW-NEXT: addl %eax, %ecx 610 ; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} 611 ; AVX512BW-NEXT: kmovd %k0, %eax 612 ; AVX512BW-NEXT: addl %ecx, %eax 613 ; AVX512BW-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} 614 ; AVX512BW-NEXT: kmovd %k0, %ecx 615 ; AVX512BW-NEXT: addl %eax, %ecx 616 ; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} 617 ; AVX512BW-NEXT: kmovd %k0, %eax 618 ; AVX512BW-NEXT: addl %ecx, %eax 619 ; AVX512BW-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} 620 ; AVX512BW-NEXT: kmovd %k0, %ecx 621 ; AVX512BW-NEXT: addl %eax, %ecx 622 ; AVX512BW-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} 623 ; AVX512BW-NEXT: kmovd %k0, %edx 624 ; AVX512BW-NEXT: addl %ecx, %edx 625 ; AVX512BW-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1} 626 ; AVX512BW-NEXT: kmovd %k0, %eax 627 ; AVX512BW-NEXT: addl %edx, %eax 628 ; AVX512BW-NEXT: retq 629 ; 630 ; AVX512F-32-LABEL: test_mask_cmp_w_512: 631 ; AVX512F-32: # BB#0: 632 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 633 ; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 634 ; AVX512F-32-NEXT: kmovd %k0, %eax 635 ; AVX512F-32-NEXT: vpcmpltw %zmm1, %zmm0, %k0 {%k1} 636 ; AVX512F-32-NEXT: kmovd %k0, %ecx 637 ; AVX512F-32-NEXT: addl %eax, %ecx 638 ; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1} 639 ; AVX512F-32-NEXT: kmovd %k0, %eax 640 ; AVX512F-32-NEXT: addl %ecx, %eax 641 ; AVX512F-32-NEXT: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} 642 ; AVX512F-32-NEXT: kmovd %k0, %ecx 643 ; AVX512F-32-NEXT: addl %eax, %ecx 644 ; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} 645 ; AVX512F-32-NEXT: kmovd %k0, %eax 646 ; AVX512F-32-NEXT: addl %ecx, %eax 647 ; AVX512F-32-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} 648 ; AVX512F-32-NEXT: kmovd %k0, %ecx 649 ; AVX512F-32-NEXT: addl %eax, %ecx 650 ; AVX512F-32-NEXT: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} 651 ; AVX512F-32-NEXT: kmovd %k0, %edx 652 ; AVX512F-32-NEXT: addl %ecx, %edx 653 ; AVX512F-32-NEXT: vpcmpordw %zmm1, %zmm0, %k0 {%k1} 654 ; AVX512F-32-NEXT: kmovd %k0, %eax 655 ; AVX512F-32-NEXT: addl %edx, %eax 656 ; AVX512F-32-NEXT: retl 657 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 658 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 659 %ret1 = add i32 %res0, %res1 660 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 661 %ret2 = add i32 %ret1, %res2 662 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 663 %ret3 = add i32 %ret2, %res3 664 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 665 %ret4 = add i32 %ret3, %res4 666 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 667 %ret5 = add i32 %ret4, %res5 668 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 669 %ret6 = add i32 %ret5, %res6 670 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 671 %ret7 = add i32 %ret6, %res7 672 ret i32 %ret7 673 } 674 675 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 676 677 define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 678 ; AVX512BW-LABEL: test_ucmp_w_512: 679 ; AVX512BW: ## BB#0: 680 ; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 681 ; AVX512BW-NEXT: kmovd %k0, %eax 682 ; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 683 ; AVX512BW-NEXT: kmovd %k0, %ecx 684 ; AVX512BW-NEXT: addl %eax, %ecx 685 ; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 686 ; AVX512BW-NEXT: kmovd %k0, %eax 687 ; AVX512BW-NEXT: addl %ecx, %eax 688 ; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 689 ; AVX512BW-NEXT: kmovd %k0, %ecx 690 ; AVX512BW-NEXT: addl %eax, %ecx 691 ; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 692 ; AVX512BW-NEXT: kmovd %k0, %eax 693 ; AVX512BW-NEXT: addl %ecx, %eax 694 ; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 695 ; AVX512BW-NEXT: kmovd %k0, %ecx 696 ; AVX512BW-NEXT: addl %eax, %ecx 697 ; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 698 ; AVX512BW-NEXT: kmovd %k0, %edx 699 ; AVX512BW-NEXT: addl %ecx, %edx 700 ; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 701 ; AVX512BW-NEXT: kmovd %k0, %eax 702 ; AVX512BW-NEXT: addl %edx, %eax 703 ; AVX512BW-NEXT: retq 704 ; 705 ; AVX512F-32-LABEL: test_ucmp_w_512: 706 ; AVX512F-32: # BB#0: 707 ; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 708 ; AVX512F-32-NEXT: kmovd %k0, %eax 709 ; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 710 ; AVX512F-32-NEXT: kmovd %k0, %ecx 711 ; AVX512F-32-NEXT: addl %eax, %ecx 712 ; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 713 ; AVX512F-32-NEXT: kmovd %k0, %eax 714 ; AVX512F-32-NEXT: addl %ecx, %eax 715 ; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 716 ; AVX512F-32-NEXT: kmovd %k0, %ecx 717 ; AVX512F-32-NEXT: addl %eax, %ecx 718 ; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 719 ; AVX512F-32-NEXT: kmovd %k0, %eax 720 ; AVX512F-32-NEXT: addl %ecx, %eax 721 ; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 722 ; AVX512F-32-NEXT: kmovd %k0, %ecx 723 ; AVX512F-32-NEXT: addl %eax, %ecx 724 ; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 725 ; AVX512F-32-NEXT: kmovd %k0, %edx 726 ; AVX512F-32-NEXT: addl %ecx, %edx 727 ; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 728 ; AVX512F-32-NEXT: kmovd %k0, %eax 729 ; AVX512F-32-NEXT: addl %edx, %eax 730 ; AVX512F-32-NEXT: retl 731 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) 732 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) 733 %ret1 = add i32 %res0, %res1 734 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) 735 %ret2 = add i32 %ret1, %res2 736 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) 737 %ret3 = add i32 %ret2, %res3 738 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) 739 %ret4 = add i32 %ret3, %res4 740 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) 741 %ret5 = add i32 %ret4, %res5 742 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) 743 %ret6 = add i32 %ret5, %res6 744 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) 745 %ret7 = add i32 %ret6, %res7 746 ret i32 %ret7 747 } 748 749 define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 750 ; AVX512BW-LABEL: test_mask_ucmp_w_512: 751 ; AVX512BW: ## BB#0: 752 ; AVX512BW-NEXT: kmovd %edi, %k1 753 ; AVX512BW-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1} 754 ; AVX512BW-NEXT: kmovd %k0, %eax 755 ; AVX512BW-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} 756 ; AVX512BW-NEXT: kmovd %k0, %ecx 757 ; AVX512BW-NEXT: addl %eax, %ecx 758 ; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} 759 ; AVX512BW-NEXT: kmovd %k0, %eax 760 ; AVX512BW-NEXT: addl %ecx, %eax 761 ; AVX512BW-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} 762 ; AVX512BW-NEXT: kmovd %k0, %ecx 763 ; AVX512BW-NEXT: addl %eax, %ecx 764 ; AVX512BW-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} 765 ; AVX512BW-NEXT: kmovd %k0, %eax 766 ; AVX512BW-NEXT: addl %ecx, %eax 767 ; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} 768 ; AVX512BW-NEXT: kmovd %k0, %ecx 769 ; AVX512BW-NEXT: addl %eax, %ecx 770 ; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} 771 ; AVX512BW-NEXT: kmovd %k0, %edx 772 ; AVX512BW-NEXT: addl %ecx, %edx 773 ; AVX512BW-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1} 774 ; AVX512BW-NEXT: kmovd %k0, %eax 775 ; AVX512BW-NEXT: addl %edx, %eax 776 ; AVX512BW-NEXT: retq 777 ; 778 ; AVX512F-32-LABEL: test_mask_ucmp_w_512: 779 ; AVX512F-32: # BB#0: 780 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 781 ; AVX512F-32-NEXT: vpcmpequw %zmm1, %zmm0, %k0 {%k1} 782 ; AVX512F-32-NEXT: kmovd %k0, %eax 783 ; AVX512F-32-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} 784 ; AVX512F-32-NEXT: kmovd %k0, %ecx 785 ; AVX512F-32-NEXT: addl %eax, %ecx 786 ; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} 787 ; AVX512F-32-NEXT: kmovd %k0, %eax 788 ; AVX512F-32-NEXT: addl %ecx, %eax 789 ; AVX512F-32-NEXT: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} 790 ; AVX512F-32-NEXT: kmovd %k0, %ecx 791 ; AVX512F-32-NEXT: addl %eax, %ecx 792 ; AVX512F-32-NEXT: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} 793 ; AVX512F-32-NEXT: kmovd %k0, %eax 794 ; AVX512F-32-NEXT: addl %ecx, %eax 795 ; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} 796 ; AVX512F-32-NEXT: kmovd %k0, %ecx 797 ; AVX512F-32-NEXT: addl %eax, %ecx 798 ; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} 799 ; AVX512F-32-NEXT: kmovd %k0, %edx 800 ; AVX512F-32-NEXT: addl %ecx, %edx 801 ; AVX512F-32-NEXT: vpcmporduw %zmm1, %zmm0, %k0 {%k1} 802 ; AVX512F-32-NEXT: kmovd %k0, %eax 803 ; AVX512F-32-NEXT: addl %edx, %eax 804 ; AVX512F-32-NEXT: retl 805 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) 806 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) 807 %ret1 = add i32 %res0, %res1 808 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) 809 %ret2 = add i32 %ret1, %res2 810 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) 811 %ret3 = add i32 %ret2, %res3 812 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) 813 %ret4 = add i32 %ret3, %res4 814 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) 815 %ret5 = add i32 %ret4, %res5 816 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) 817 %ret6 = add i32 %ret5, %res6 818 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) 819 %ret7 = add i32 %ret6, %res7 820 ret i32 %ret7 821 } 822 823 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone 824 825 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly 826 827 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) { 828 ; AVX512BW-LABEL: test_x86_mask_blend_w_512: 829 ; AVX512BW: ## BB#0: 830 ; AVX512BW-NEXT: kmovd %edi, %k1 831 ; AVX512BW-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1} 832 ; AVX512BW-NEXT: retq 833 ; 834 ; AVX512F-32-LABEL: test_x86_mask_blend_w_512: 835 ; AVX512F-32: # BB#0: 836 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 837 ; AVX512F-32-NEXT: vpblendmw %zmm1, %zmm0, %zmm0 {%k1} 838 ; AVX512F-32-NEXT: retl 839 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1] 840 ret <32 x i16> %res 841 } 842 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly 843 844 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) { 845 ; AVX512BW-LABEL: test_x86_mask_blend_b_512: 846 ; AVX512BW: ## BB#0: 847 ; AVX512BW-NEXT: kmovq %rdi, %k1 848 ; AVX512BW-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} 849 ; AVX512BW-NEXT: retq 850 ; 851 ; AVX512F-32-LABEL: test_x86_mask_blend_b_512: 852 ; AVX512F-32: # BB#0: 853 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 854 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 855 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 856 ; AVX512F-32-NEXT: vpblendmb %zmm1, %zmm0, %zmm0 {%k1} 857 ; AVX512F-32-NEXT: retl 858 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1] 859 ret <64 x i8> %res 860 } 861 862 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 863 ; AVX512BW-LABEL: test_mask_packs_epi32_rr_512: 864 ; AVX512BW: ## BB#0: 865 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 866 ; AVX512BW-NEXT: retq 867 ; 868 ; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512: 869 ; AVX512F-32: # BB#0: 870 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 871 ; AVX512F-32-NEXT: retl 872 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 873 ret <32 x i16> %res 874 } 875 876 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 877 ; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512: 878 ; AVX512BW: ## BB#0: 879 ; AVX512BW-NEXT: kmovd %edi, %k1 880 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} 881 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 882 ; AVX512BW-NEXT: retq 883 ; 884 ; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512: 885 ; AVX512F-32: # BB#0: 886 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 887 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} 888 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 889 ; AVX512F-32-NEXT: retl 890 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 891 ret <32 x i16> %res 892 } 893 894 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 895 ; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512: 896 ; AVX512BW: ## BB#0: 897 ; AVX512BW-NEXT: kmovd %edi, %k1 898 ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} 899 ; AVX512BW-NEXT: retq 900 ; 901 ; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512: 902 ; AVX512F-32: # BB#0: 903 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 904 ; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} 905 ; AVX512F-32-NEXT: retl 906 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 907 ret <32 x i16> %res 908 } 909 910 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 911 ; AVX512BW-LABEL: test_mask_packs_epi32_rm_512: 912 ; AVX512BW: ## BB#0: 913 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 914 ; AVX512BW-NEXT: retq 915 ; 916 ; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512: 917 ; AVX512F-32: # BB#0: 918 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 919 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 920 ; AVX512F-32-NEXT: retl 921 %b = load <16 x i32>, <16 x i32>* %ptr_b 922 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 923 ret <32 x i16> %res 924 } 925 926 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 927 ; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512: 928 ; AVX512BW: ## BB#0: 929 ; AVX512BW-NEXT: kmovd %esi, %k1 930 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} 931 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 932 ; AVX512BW-NEXT: retq 933 ; 934 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512: 935 ; AVX512F-32: # BB#0: 936 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 937 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 938 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} 939 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 940 ; AVX512F-32-NEXT: retl 941 %b = load <16 x i32>, <16 x i32>* %ptr_b 942 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 943 ret <32 x i16> %res 944 } 945 946 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 947 ; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512: 948 ; AVX512BW: ## BB#0: 949 ; AVX512BW-NEXT: kmovd %esi, %k1 950 ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} 951 ; AVX512BW-NEXT: retq 952 ; 953 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512: 954 ; AVX512F-32: # BB#0: 955 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 956 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 957 ; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} 958 ; AVX512F-32-NEXT: retl 959 %b = load <16 x i32>, <16 x i32>* %ptr_b 960 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 961 ret <32 x i16> %res 962 } 963 964 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 965 ; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512: 966 ; AVX512BW: ## BB#0: 967 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 968 ; AVX512BW-NEXT: retq 969 ; 970 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512: 971 ; AVX512F-32: # BB#0: 972 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 973 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 974 ; AVX512F-32-NEXT: retl 975 %q = load i32, i32* %ptr_b 976 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 977 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 978 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 979 ret <32 x i16> %res 980 } 981 982 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 983 ; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512: 984 ; AVX512BW: ## BB#0: 985 ; AVX512BW-NEXT: kmovd %esi, %k1 986 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} 987 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 988 ; AVX512BW-NEXT: retq 989 ; 990 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512: 991 ; AVX512F-32: # BB#0: 992 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 993 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 994 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} 995 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 996 ; AVX512F-32-NEXT: retl 997 %q = load i32, i32* %ptr_b 998 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 999 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1000 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1001 ret <32 x i16> %res 1002 } 1003 1004 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 1005 ; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512: 1006 ; AVX512BW: ## BB#0: 1007 ; AVX512BW-NEXT: kmovd %esi, %k1 1008 ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 1009 ; AVX512BW-NEXT: retq 1010 ; 1011 ; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512: 1012 ; AVX512F-32: # BB#0: 1013 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1014 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1015 ; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} 1016 ; AVX512F-32-NEXT: retl 1017 %q = load i32, i32* %ptr_b 1018 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1019 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1020 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1021 ret <32 x i16> %res 1022 } 1023 1024 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 1025 1026 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1027 ; AVX512BW-LABEL: test_mask_packs_epi16_rr_512: 1028 ; AVX512BW: ## BB#0: 1029 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 1030 ; AVX512BW-NEXT: retq 1031 ; 1032 ; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512: 1033 ; AVX512F-32: # BB#0: 1034 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 1035 ; AVX512F-32-NEXT: retl 1036 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1037 ret <64 x i8> %res 1038 } 1039 1040 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 1041 ; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512: 1042 ; AVX512BW: ## BB#0: 1043 ; AVX512BW-NEXT: kmovq %rdi, %k1 1044 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} 1045 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1046 ; AVX512BW-NEXT: retq 1047 ; 1048 ; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512: 1049 ; AVX512F-32: # BB#0: 1050 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1051 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1052 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1053 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} 1054 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1055 ; AVX512F-32-NEXT: retl 1056 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1057 ret <64 x i8> %res 1058 } 1059 1060 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 1061 ; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512: 1062 ; AVX512BW: ## BB#0: 1063 ; AVX512BW-NEXT: kmovq %rdi, %k1 1064 ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1065 ; AVX512BW-NEXT: retq 1066 ; 1067 ; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512: 1068 ; AVX512F-32: # BB#0: 1069 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1070 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1071 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1072 ; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1073 ; AVX512F-32-NEXT: retl 1074 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1075 ret <64 x i8> %res 1076 } 1077 1078 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1079 ; AVX512BW-LABEL: test_mask_packs_epi16_rm_512: 1080 ; AVX512BW: ## BB#0: 1081 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 1082 ; AVX512BW-NEXT: retq 1083 ; 1084 ; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512: 1085 ; AVX512F-32: # BB#0: 1086 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1087 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 1088 ; AVX512F-32-NEXT: retl 1089 %b = load <32 x i16>, <32 x i16>* %ptr_b 1090 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1091 ret <64 x i8> %res 1092 } 1093 1094 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 1095 ; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512: 1096 ; AVX512BW: ## BB#0: 1097 ; AVX512BW-NEXT: kmovq %rsi, %k1 1098 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} 1099 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1100 ; AVX512BW-NEXT: retq 1101 ; 1102 ; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512: 1103 ; AVX512F-32: # BB#0: 1104 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1105 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1106 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1107 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1108 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} 1109 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1110 ; AVX512F-32-NEXT: retl 1111 %b = load <32 x i16>, <32 x i16>* %ptr_b 1112 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1113 ret <64 x i8> %res 1114 } 1115 1116 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 1117 ; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512: 1118 ; AVX512BW: ## BB#0: 1119 ; AVX512BW-NEXT: kmovq %rsi, %k1 1120 ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} 1121 ; AVX512BW-NEXT: retq 1122 ; 1123 ; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512: 1124 ; AVX512F-32: # BB#0: 1125 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1126 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1127 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1128 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1129 ; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} 1130 ; AVX512F-32-NEXT: retl 1131 %b = load <32 x i16>, <32 x i16>* %ptr_b 1132 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1133 ret <64 x i8> %res 1134 } 1135 1136 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 1137 1138 1139 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { 1140 ; AVX512BW-LABEL: test_mask_packus_epi32_rr_512: 1141 ; AVX512BW: ## BB#0: 1142 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 1143 ; AVX512BW-NEXT: retq 1144 ; 1145 ; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512: 1146 ; AVX512F-32: # BB#0: 1147 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 1148 ; AVX512F-32-NEXT: retl 1149 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1150 ret <32 x i16> %res 1151 } 1152 1153 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { 1154 ; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512: 1155 ; AVX512BW: ## BB#0: 1156 ; AVX512BW-NEXT: kmovd %edi, %k1 1157 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} 1158 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1159 ; AVX512BW-NEXT: retq 1160 ; 1161 ; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512: 1162 ; AVX512F-32: # BB#0: 1163 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1164 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} 1165 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1166 ; AVX512F-32-NEXT: retl 1167 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1168 ret <32 x i16> %res 1169 } 1170 1171 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { 1172 ; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512: 1173 ; AVX512BW: ## BB#0: 1174 ; AVX512BW-NEXT: kmovd %edi, %k1 1175 ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} 1176 ; AVX512BW-NEXT: retq 1177 ; 1178 ; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512: 1179 ; AVX512F-32: # BB#0: 1180 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1181 ; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} 1182 ; AVX512F-32-NEXT: retl 1183 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1184 ret <32 x i16> %res 1185 } 1186 1187 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { 1188 ; AVX512BW-LABEL: test_mask_packus_epi32_rm_512: 1189 ; AVX512BW: ## BB#0: 1190 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 1191 ; AVX512BW-NEXT: retq 1192 ; 1193 ; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512: 1194 ; AVX512F-32: # BB#0: 1195 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1196 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 1197 ; AVX512F-32-NEXT: retl 1198 %b = load <16 x i32>, <16 x i32>* %ptr_b 1199 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1200 ret <32 x i16> %res 1201 } 1202 1203 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1204 ; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512: 1205 ; AVX512BW: ## BB#0: 1206 ; AVX512BW-NEXT: kmovd %esi, %k1 1207 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} 1208 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1209 ; AVX512BW-NEXT: retq 1210 ; 1211 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512: 1212 ; AVX512F-32: # BB#0: 1213 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1214 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1215 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} 1216 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1217 ; AVX512F-32-NEXT: retl 1218 %b = load <16 x i32>, <16 x i32>* %ptr_b 1219 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1220 ret <32 x i16> %res 1221 } 1222 1223 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { 1224 ; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512: 1225 ; AVX512BW: ## BB#0: 1226 ; AVX512BW-NEXT: kmovd %esi, %k1 1227 ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} 1228 ; AVX512BW-NEXT: retq 1229 ; 1230 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512: 1231 ; AVX512F-32: # BB#0: 1232 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1233 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1234 ; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} 1235 ; AVX512F-32-NEXT: retl 1236 %b = load <16 x i32>, <16 x i32>* %ptr_b 1237 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1238 ret <32 x i16> %res 1239 } 1240 1241 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { 1242 ; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512: 1243 ; AVX512BW: ## BB#0: 1244 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 1245 ; AVX512BW-NEXT: retq 1246 ; 1247 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512: 1248 ; AVX512F-32: # BB#0: 1249 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1250 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 1251 ; AVX512F-32-NEXT: retl 1252 %q = load i32, i32* %ptr_b 1253 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1254 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1255 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) 1256 ret <32 x i16> %res 1257 } 1258 1259 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1260 ; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512: 1261 ; AVX512BW: ## BB#0: 1262 ; AVX512BW-NEXT: kmovd %esi, %k1 1263 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} 1264 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1265 ; AVX512BW-NEXT: retq 1266 ; 1267 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512: 1268 ; AVX512F-32: # BB#0: 1269 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1270 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1271 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} 1272 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1273 ; AVX512F-32-NEXT: retl 1274 %q = load i32, i32* %ptr_b 1275 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1276 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1277 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) 1278 ret <32 x i16> %res 1279 } 1280 1281 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { 1282 ; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512: 1283 ; AVX512BW: ## BB#0: 1284 ; AVX512BW-NEXT: kmovd %esi, %k1 1285 ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 1286 ; AVX512BW-NEXT: retq 1287 ; 1288 ; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512: 1289 ; AVX512F-32: # BB#0: 1290 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1291 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1292 ; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} 1293 ; AVX512F-32-NEXT: retl 1294 %q = load i32, i32* %ptr_b 1295 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 1296 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer 1297 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) 1298 ret <32 x i16> %res 1299 } 1300 1301 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) 1302 1303 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1304 ; AVX512BW-LABEL: test_mask_packus_epi16_rr_512: 1305 ; AVX512BW: ## BB#0: 1306 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 1307 ; AVX512BW-NEXT: retq 1308 ; 1309 ; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512: 1310 ; AVX512F-32: # BB#0: 1311 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 1312 ; AVX512F-32-NEXT: retl 1313 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1314 ret <64 x i8> %res 1315 } 1316 1317 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { 1318 ; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512: 1319 ; AVX512BW: ## BB#0: 1320 ; AVX512BW-NEXT: kmovq %rdi, %k1 1321 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} 1322 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1323 ; AVX512BW-NEXT: retq 1324 ; 1325 ; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512: 1326 ; AVX512F-32: # BB#0: 1327 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1328 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1329 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1330 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} 1331 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1332 ; AVX512F-32-NEXT: retl 1333 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1334 ret <64 x i8> %res 1335 } 1336 1337 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { 1338 ; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512: 1339 ; AVX512BW: ## BB#0: 1340 ; AVX512BW-NEXT: kmovq %rdi, %k1 1341 ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1342 ; AVX512BW-NEXT: retq 1343 ; 1344 ; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512: 1345 ; AVX512F-32: # BB#0: 1346 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1347 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1348 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1349 ; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} 1350 ; AVX512F-32-NEXT: retl 1351 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1352 ret <64 x i8> %res 1353 } 1354 1355 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1356 ; AVX512BW-LABEL: test_mask_packus_epi16_rm_512: 1357 ; AVX512BW: ## BB#0: 1358 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 1359 ; AVX512BW-NEXT: retq 1360 ; 1361 ; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512: 1362 ; AVX512F-32: # BB#0: 1363 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1364 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 1365 ; AVX512F-32-NEXT: retl 1366 %b = load <32 x i16>, <32 x i16>* %ptr_b 1367 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) 1368 ret <64 x i8> %res 1369 } 1370 1371 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { 1372 ; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512: 1373 ; AVX512BW: ## BB#0: 1374 ; AVX512BW-NEXT: kmovq %rsi, %k1 1375 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} 1376 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1377 ; AVX512BW-NEXT: retq 1378 ; 1379 ; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512: 1380 ; AVX512F-32: # BB#0: 1381 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1382 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1383 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1384 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1385 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} 1386 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1387 ; AVX512F-32-NEXT: retl 1388 %b = load <32 x i16>, <32 x i16>* %ptr_b 1389 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) 1390 ret <64 x i8> %res 1391 } 1392 1393 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { 1394 ; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512: 1395 ; AVX512BW: ## BB#0: 1396 ; AVX512BW-NEXT: kmovq %rsi, %k1 1397 ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} 1398 ; AVX512BW-NEXT: retq 1399 ; 1400 ; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512: 1401 ; AVX512F-32: # BB#0: 1402 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1403 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1404 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1405 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1406 ; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} 1407 ; AVX512F-32-NEXT: retl 1408 %b = load <32 x i16>, <32 x i16>* %ptr_b 1409 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) 1410 ret <64 x i8> %res 1411 } 1412 1413 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) 1414 1415 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1416 ; AVX512BW-LABEL: test_mask_adds_epi16_rr_512: 1417 ; AVX512BW: ## BB#0: 1418 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 1419 ; AVX512BW-NEXT: retq 1420 ; 1421 ; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512: 1422 ; AVX512F-32: # BB#0: 1423 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 1424 ; AVX512F-32-NEXT: retl 1425 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1426 ret <32 x i16> %res 1427 } 1428 1429 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1430 ; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512: 1431 ; AVX512BW: ## BB#0: 1432 ; AVX512BW-NEXT: kmovd %edi, %k1 1433 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} 1434 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1435 ; AVX512BW-NEXT: retq 1436 ; 1437 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512: 1438 ; AVX512F-32: # BB#0: 1439 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1440 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} 1441 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1442 ; AVX512F-32-NEXT: retl 1443 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1444 ret <32 x i16> %res 1445 } 1446 1447 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1448 ; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512: 1449 ; AVX512BW: ## BB#0: 1450 ; AVX512BW-NEXT: kmovd %edi, %k1 1451 ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1452 ; AVX512BW-NEXT: retq 1453 ; 1454 ; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512: 1455 ; AVX512F-32: # BB#0: 1456 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1457 ; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1458 ; AVX512F-32-NEXT: retl 1459 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1460 ret <32 x i16> %res 1461 } 1462 1463 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1464 ; AVX512BW-LABEL: test_mask_adds_epi16_rm_512: 1465 ; AVX512BW: ## BB#0: 1466 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 1467 ; AVX512BW-NEXT: retq 1468 ; 1469 ; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512: 1470 ; AVX512F-32: # BB#0: 1471 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1472 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 1473 ; AVX512F-32-NEXT: retl 1474 %b = load <32 x i16>, <32 x i16>* %ptr_b 1475 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1476 ret <32 x i16> %res 1477 } 1478 1479 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1480 ; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512: 1481 ; AVX512BW: ## BB#0: 1482 ; AVX512BW-NEXT: kmovd %esi, %k1 1483 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} 1484 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1485 ; AVX512BW-NEXT: retq 1486 ; 1487 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512: 1488 ; AVX512F-32: # BB#0: 1489 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1490 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1491 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} 1492 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1493 ; AVX512F-32-NEXT: retl 1494 %b = load <32 x i16>, <32 x i16>* %ptr_b 1495 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1496 ret <32 x i16> %res 1497 } 1498 1499 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1500 ; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512: 1501 ; AVX512BW: ## BB#0: 1502 ; AVX512BW-NEXT: kmovd %esi, %k1 1503 ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} 1504 ; AVX512BW-NEXT: retq 1505 ; 1506 ; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512: 1507 ; AVX512F-32: # BB#0: 1508 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1509 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1510 ; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} 1511 ; AVX512F-32-NEXT: retl 1512 %b = load <32 x i16>, <32 x i16>* %ptr_b 1513 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1514 ret <32 x i16> %res 1515 } 1516 1517 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1518 1519 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1520 ; AVX512BW-LABEL: test_mask_subs_epi16_rr_512: 1521 ; AVX512BW: ## BB#0: 1522 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 1523 ; AVX512BW-NEXT: retq 1524 ; 1525 ; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512: 1526 ; AVX512F-32: # BB#0: 1527 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 1528 ; AVX512F-32-NEXT: retl 1529 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1530 ret <32 x i16> %res 1531 } 1532 1533 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1534 ; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512: 1535 ; AVX512BW: ## BB#0: 1536 ; AVX512BW-NEXT: kmovd %edi, %k1 1537 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} 1538 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1539 ; AVX512BW-NEXT: retq 1540 ; 1541 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512: 1542 ; AVX512F-32: # BB#0: 1543 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1544 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} 1545 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1546 ; AVX512F-32-NEXT: retl 1547 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1548 ret <32 x i16> %res 1549 } 1550 1551 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1552 ; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512: 1553 ; AVX512BW: ## BB#0: 1554 ; AVX512BW-NEXT: kmovd %edi, %k1 1555 ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1556 ; AVX512BW-NEXT: retq 1557 ; 1558 ; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512: 1559 ; AVX512F-32: # BB#0: 1560 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1561 ; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} 1562 ; AVX512F-32-NEXT: retl 1563 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1564 ret <32 x i16> %res 1565 } 1566 1567 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1568 ; AVX512BW-LABEL: test_mask_subs_epi16_rm_512: 1569 ; AVX512BW: ## BB#0: 1570 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 1571 ; AVX512BW-NEXT: retq 1572 ; 1573 ; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512: 1574 ; AVX512F-32: # BB#0: 1575 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1576 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 1577 ; AVX512F-32-NEXT: retl 1578 %b = load <32 x i16>, <32 x i16>* %ptr_b 1579 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1580 ret <32 x i16> %res 1581 } 1582 1583 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1584 ; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512: 1585 ; AVX512BW: ## BB#0: 1586 ; AVX512BW-NEXT: kmovd %esi, %k1 1587 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} 1588 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1589 ; AVX512BW-NEXT: retq 1590 ; 1591 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512: 1592 ; AVX512F-32: # BB#0: 1593 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1594 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1595 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} 1596 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1597 ; AVX512F-32-NEXT: retl 1598 %b = load <32 x i16>, <32 x i16>* %ptr_b 1599 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1600 ret <32 x i16> %res 1601 } 1602 1603 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1604 ; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512: 1605 ; AVX512BW: ## BB#0: 1606 ; AVX512BW-NEXT: kmovd %esi, %k1 1607 ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} 1608 ; AVX512BW-NEXT: retq 1609 ; 1610 ; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512: 1611 ; AVX512F-32: # BB#0: 1612 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1613 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1614 ; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} 1615 ; AVX512F-32-NEXT: retl 1616 %b = load <32 x i16>, <32 x i16>* %ptr_b 1617 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1618 ret <32 x i16> %res 1619 } 1620 1621 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1622 1623 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1624 ; AVX512BW-LABEL: test_mask_adds_epu16_rr_512: 1625 ; AVX512BW: ## BB#0: 1626 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 1627 ; AVX512BW-NEXT: retq 1628 ; 1629 ; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512: 1630 ; AVX512F-32: # BB#0: 1631 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 1632 ; AVX512F-32-NEXT: retl 1633 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1634 ret <32 x i16> %res 1635 } 1636 1637 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1638 ; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512: 1639 ; AVX512BW: ## BB#0: 1640 ; AVX512BW-NEXT: kmovd %edi, %k1 1641 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} 1642 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1643 ; AVX512BW-NEXT: retq 1644 ; 1645 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512: 1646 ; AVX512F-32: # BB#0: 1647 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1648 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} 1649 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1650 ; AVX512F-32-NEXT: retl 1651 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1652 ret <32 x i16> %res 1653 } 1654 1655 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1656 ; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512: 1657 ; AVX512BW: ## BB#0: 1658 ; AVX512BW-NEXT: kmovd %edi, %k1 1659 ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1660 ; AVX512BW-NEXT: retq 1661 ; 1662 ; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512: 1663 ; AVX512F-32: # BB#0: 1664 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1665 ; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1666 ; AVX512F-32-NEXT: retl 1667 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1668 ret <32 x i16> %res 1669 } 1670 1671 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1672 ; AVX512BW-LABEL: test_mask_adds_epu16_rm_512: 1673 ; AVX512BW: ## BB#0: 1674 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 1675 ; AVX512BW-NEXT: retq 1676 ; 1677 ; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512: 1678 ; AVX512F-32: # BB#0: 1679 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1680 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 1681 ; AVX512F-32-NEXT: retl 1682 %b = load <32 x i16>, <32 x i16>* %ptr_b 1683 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1684 ret <32 x i16> %res 1685 } 1686 1687 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1688 ; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512: 1689 ; AVX512BW: ## BB#0: 1690 ; AVX512BW-NEXT: kmovd %esi, %k1 1691 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} 1692 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1693 ; AVX512BW-NEXT: retq 1694 ; 1695 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512: 1696 ; AVX512F-32: # BB#0: 1697 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1698 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1699 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} 1700 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1701 ; AVX512F-32-NEXT: retl 1702 %b = load <32 x i16>, <32 x i16>* %ptr_b 1703 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1704 ret <32 x i16> %res 1705 } 1706 1707 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1708 ; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512: 1709 ; AVX512BW: ## BB#0: 1710 ; AVX512BW-NEXT: kmovd %esi, %k1 1711 ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} 1712 ; AVX512BW-NEXT: retq 1713 ; 1714 ; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512: 1715 ; AVX512F-32: # BB#0: 1716 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1717 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1718 ; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} 1719 ; AVX512F-32-NEXT: retl 1720 %b = load <32 x i16>, <32 x i16>* %ptr_b 1721 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1722 ret <32 x i16> %res 1723 } 1724 1725 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1726 1727 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { 1728 ; AVX512BW-LABEL: test_mask_subs_epu16_rr_512: 1729 ; AVX512BW: ## BB#0: 1730 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 1731 ; AVX512BW-NEXT: retq 1732 ; 1733 ; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512: 1734 ; AVX512F-32: # BB#0: 1735 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 1736 ; AVX512F-32-NEXT: retl 1737 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1738 ret <32 x i16> %res 1739 } 1740 1741 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { 1742 ; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512: 1743 ; AVX512BW: ## BB#0: 1744 ; AVX512BW-NEXT: kmovd %edi, %k1 1745 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} 1746 ; AVX512BW-NEXT: vmovaps %zmm2, %zmm0 1747 ; AVX512BW-NEXT: retq 1748 ; 1749 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512: 1750 ; AVX512F-32: # BB#0: 1751 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1752 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} 1753 ; AVX512F-32-NEXT: vmovaps %zmm2, %zmm0 1754 ; AVX512F-32-NEXT: retl 1755 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1756 ret <32 x i16> %res 1757 } 1758 1759 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { 1760 ; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512: 1761 ; AVX512BW: ## BB#0: 1762 ; AVX512BW-NEXT: kmovd %edi, %k1 1763 ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1764 ; AVX512BW-NEXT: retq 1765 ; 1766 ; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512: 1767 ; AVX512F-32: # BB#0: 1768 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1769 ; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} 1770 ; AVX512F-32-NEXT: retl 1771 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1772 ret <32 x i16> %res 1773 } 1774 1775 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { 1776 ; AVX512BW-LABEL: test_mask_subs_epu16_rm_512: 1777 ; AVX512BW: ## BB#0: 1778 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 1779 ; AVX512BW-NEXT: retq 1780 ; 1781 ; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512: 1782 ; AVX512F-32: # BB#0: 1783 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1784 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 1785 ; AVX512F-32-NEXT: retl 1786 %b = load <32 x i16>, <32 x i16>* %ptr_b 1787 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) 1788 ret <32 x i16> %res 1789 } 1790 1791 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { 1792 ; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512: 1793 ; AVX512BW: ## BB#0: 1794 ; AVX512BW-NEXT: kmovd %esi, %k1 1795 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} 1796 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm0 1797 ; AVX512BW-NEXT: retq 1798 ; 1799 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512: 1800 ; AVX512F-32: # BB#0: 1801 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1802 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1803 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} 1804 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm0 1805 ; AVX512F-32-NEXT: retl 1806 %b = load <32 x i16>, <32 x i16>* %ptr_b 1807 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) 1808 ret <32 x i16> %res 1809 } 1810 1811 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { 1812 ; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512: 1813 ; AVX512BW: ## BB#0: 1814 ; AVX512BW-NEXT: kmovd %esi, %k1 1815 ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} 1816 ; AVX512BW-NEXT: retq 1817 ; 1818 ; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512: 1819 ; AVX512F-32: # BB#0: 1820 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1821 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1822 ; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} 1823 ; AVX512F-32-NEXT: retl 1824 %b = load <32 x i16>, <32 x i16>* %ptr_b 1825 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) 1826 ret <32 x i16> %res 1827 } 1828 1829 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1830 1831 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1832 1833 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1834 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 1835 ; AVX512BW: ## BB#0: 1836 ; AVX512BW-NEXT: kmovq %rdi, %k1 1837 ; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} 1838 ; AVX512BW-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 1839 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1840 ; AVX512BW-NEXT: retq 1841 ; 1842 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512: 1843 ; AVX512F-32: # BB#0: 1844 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1845 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1846 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1847 ; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} 1848 ; AVX512F-32-NEXT: vpmaxsb %zmm1, %zmm0, %zmm0 1849 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1850 ; AVX512F-32-NEXT: retl 1851 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1852 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1853 %res2 = add <64 x i8> %res, %res1 1854 ret <64 x i8> %res2 1855 } 1856 1857 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1858 1859 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1860 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 1861 ; AVX512BW: ## BB#0: 1862 ; AVX512BW-NEXT: kmovd %edi, %k1 1863 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} 1864 ; AVX512BW-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 1865 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1866 ; AVX512BW-NEXT: retq 1867 ; 1868 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512: 1869 ; AVX512F-32: # BB#0: 1870 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1871 ; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} 1872 ; AVX512F-32-NEXT: vpmaxsw %zmm1, %zmm0, %zmm0 1873 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1874 ; AVX512F-32-NEXT: retl 1875 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1876 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1877 %res2 = add <32 x i16> %res, %res1 1878 ret <32 x i16> %res2 1879 } 1880 1881 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1882 1883 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1884 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 1885 ; AVX512BW: ## BB#0: 1886 ; AVX512BW-NEXT: kmovq %rdi, %k1 1887 ; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} 1888 ; AVX512BW-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 1889 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1890 ; AVX512BW-NEXT: retq 1891 ; 1892 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512: 1893 ; AVX512F-32: # BB#0: 1894 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1895 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1896 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1897 ; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm2 {%k1} 1898 ; AVX512F-32-NEXT: vpmaxub %zmm1, %zmm0, %zmm0 1899 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1900 ; AVX512F-32-NEXT: retl 1901 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1902 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1903 %res2 = add <64 x i8> %res, %res1 1904 ret <64 x i8> %res2 1905 } 1906 1907 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1908 1909 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1910 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 1911 ; AVX512BW: ## BB#0: 1912 ; AVX512BW-NEXT: kmovd %edi, %k1 1913 ; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} 1914 ; AVX512BW-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 1915 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1916 ; AVX512BW-NEXT: retq 1917 ; 1918 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512: 1919 ; AVX512F-32: # BB#0: 1920 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1921 ; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} 1922 ; AVX512F-32-NEXT: vpmaxuw %zmm1, %zmm0, %zmm0 1923 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1924 ; AVX512F-32-NEXT: retl 1925 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1926 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1927 %res2 = add <32 x i16> %res, %res1 1928 ret <32 x i16> %res2 1929 } 1930 1931 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1932 1933 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1934 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_b_512: 1935 ; AVX512BW: ## BB#0: 1936 ; AVX512BW-NEXT: kmovq %rdi, %k1 1937 ; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} 1938 ; AVX512BW-NEXT: vpminsb %zmm1, %zmm0, %zmm0 1939 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1940 ; AVX512BW-NEXT: retq 1941 ; 1942 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512: 1943 ; AVX512F-32: # BB#0: 1944 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1945 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1946 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1947 ; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm2 {%k1} 1948 ; AVX512F-32-NEXT: vpminsb %zmm1, %zmm0, %zmm0 1949 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1950 ; AVX512F-32-NEXT: retl 1951 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 1952 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 1953 %res2 = add <64 x i8> %res, %res1 1954 ret <64 x i8> %res2 1955 } 1956 1957 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 1958 1959 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 1960 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_w_512: 1961 ; AVX512BW: ## BB#0: 1962 ; AVX512BW-NEXT: kmovd %edi, %k1 1963 ; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} 1964 ; AVX512BW-NEXT: vpminsw %zmm1, %zmm0, %zmm0 1965 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1966 ; AVX512BW-NEXT: retq 1967 ; 1968 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512: 1969 ; AVX512F-32: # BB#0: 1970 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1971 ; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm2 {%k1} 1972 ; AVX512F-32-NEXT: vpminsw %zmm1, %zmm0, %zmm0 1973 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 1974 ; AVX512F-32-NEXT: retl 1975 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 1976 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 1977 %res2 = add <32 x i16> %res, %res1 1978 ret <32 x i16> %res2 1979 } 1980 1981 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 1982 1983 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 1984 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_b_512: 1985 ; AVX512BW: ## BB#0: 1986 ; AVX512BW-NEXT: kmovq %rdi, %k1 1987 ; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} 1988 ; AVX512BW-NEXT: vpminub %zmm1, %zmm0, %zmm0 1989 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 1990 ; AVX512BW-NEXT: retq 1991 ; 1992 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512: 1993 ; AVX512F-32: # BB#0: 1994 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 1995 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 1996 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 1997 ; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm2 {%k1} 1998 ; AVX512F-32-NEXT: vpminub %zmm1, %zmm0, %zmm0 1999 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2000 ; AVX512F-32-NEXT: retl 2001 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2002 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2003 %res2 = add <64 x i8> %res, %res1 2004 ret <64 x i8> %res2 2005 } 2006 2007 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2008 2009 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2010 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_w_512: 2011 ; AVX512BW: ## BB#0: 2012 ; AVX512BW-NEXT: kmovd %edi, %k1 2013 ; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} 2014 ; AVX512BW-NEXT: vpminuw %zmm1, %zmm0, %zmm0 2015 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2016 ; AVX512BW-NEXT: retq 2017 ; 2018 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512: 2019 ; AVX512F-32: # BB#0: 2020 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2021 ; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm2 {%k1} 2022 ; AVX512F-32-NEXT: vpminuw %zmm1, %zmm0, %zmm0 2023 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2024 ; AVX512F-32-NEXT: retl 2025 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2026 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2027 %res2 = add <32 x i16> %res, %res1 2028 ret <32 x i16> %res2 2029 } 2030 2031 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2032 2033 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2034 ; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 2035 ; AVX512BW: ## BB#0: 2036 ; AVX512BW-NEXT: kmovd %edi, %k1 2037 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3 2038 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} 2039 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 2040 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2041 ; AVX512BW-NEXT: retq 2042 ; 2043 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: 2044 ; AVX512F-32: # BB#0: 2045 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2046 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 2047 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} 2048 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 2049 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2050 ; AVX512F-32-NEXT: retl 2051 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2052 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2053 %res2 = add <32 x i16> %res, %res1 2054 ret <32 x i16> %res2 2055 } 2056 2057 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2058 2059 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2060 ; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 2061 ; AVX512BW: ## BB#0: 2062 ; AVX512BW-NEXT: kmovd %edi, %k1 2063 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3 2064 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z} 2065 ; AVX512BW-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 2066 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2067 ; AVX512BW-NEXT: retq 2068 ; 2069 ; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: 2070 ; AVX512F-32: # BB#0: 2071 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2072 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 2073 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z} 2074 ; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 2075 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2076 ; AVX512F-32-NEXT: retl 2077 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2078 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2079 %res2 = add <32 x i16> %res, %res1 2080 ret <32 x i16> %res2 2081 } 2082 2083 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2084 2085 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2086 ; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 2087 ; AVX512BW: ## BB#0: 2088 ; AVX512BW-NEXT: kmovd %edi, %k1 2089 ; AVX512BW-NEXT: vmovaps %zmm1, %zmm3 2090 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1} 2091 ; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 2092 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2093 ; AVX512BW-NEXT: retq 2094 ; 2095 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: 2096 ; AVX512F-32: # BB#0: 2097 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2098 ; AVX512F-32-NEXT: vmovaps %zmm1, %zmm3 2099 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1} 2100 ; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 2101 ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0 2102 ; AVX512F-32-NEXT: retl 2103 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2104 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2105 %res2 = add <32 x i16> %res, %res1 2106 ret <32 x i16> %res2 2107 } 2108 2109 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2110 2111 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2112 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_b_512: 2113 ; AVX512BW: ## BB#0: 2114 ; AVX512BW-NEXT: kmovq %rdi, %k1 2115 ; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} 2116 ; AVX512BW-NEXT: vpavgb %zmm1, %zmm0, %zmm0 2117 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2118 ; AVX512BW-NEXT: retq 2119 ; 2120 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512: 2121 ; AVX512F-32: # BB#0: 2122 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2123 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2124 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2125 ; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm2 {%k1} 2126 ; AVX512F-32-NEXT: vpavgb %zmm1, %zmm0, %zmm0 2127 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2128 ; AVX512F-32-NEXT: retl 2129 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2130 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2131 %res2 = add <64 x i8> %res, %res1 2132 ret <64 x i8> %res2 2133 } 2134 2135 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2136 2137 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2138 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_w_512: 2139 ; AVX512BW: ## BB#0: 2140 ; AVX512BW-NEXT: kmovd %edi, %k1 2141 ; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} 2142 ; AVX512BW-NEXT: vpavgw %zmm1, %zmm0, %zmm0 2143 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2144 ; AVX512BW-NEXT: retq 2145 ; 2146 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512: 2147 ; AVX512F-32: # BB#0: 2148 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2149 ; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm2 {%k1} 2150 ; AVX512F-32-NEXT: vpavgw %zmm1, %zmm0, %zmm0 2151 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2152 ; AVX512F-32-NEXT: retl 2153 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2154 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2155 %res2 = add <32 x i16> %res, %res1 2156 ret <32 x i16> %res2 2157 } 2158 2159 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2160 2161 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2162 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 2163 ; AVX512BW: ## BB#0: 2164 ; AVX512BW-NEXT: kmovq %rdi, %k1 2165 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} 2166 ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 2167 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2168 ; AVX512BW-NEXT: retq 2169 ; 2170 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512: 2171 ; AVX512F-32: # BB#0: 2172 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2173 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2174 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2175 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} 2176 ; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 2177 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2178 ; AVX512F-32-NEXT: retl 2179 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2180 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2181 %res2 = add <64 x i8> %res, %res1 2182 ret <64 x i8> %res2 2183 } 2184 2185 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) 2186 2187 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { 2188 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512: 2189 ; AVX512BW: ## BB#0: 2190 ; AVX512BW-NEXT: kmovd %edi, %k1 2191 ; AVX512BW-NEXT: vpabsw %zmm0, %zmm1 {%k1} 2192 ; AVX512BW-NEXT: vpabsw %zmm0, %zmm0 2193 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2194 ; AVX512BW-NEXT: retq 2195 ; 2196 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512: 2197 ; AVX512F-32: # BB#0: 2198 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2199 ; AVX512F-32-NEXT: vpabsw %zmm0, %zmm1 {%k1} 2200 ; AVX512F-32-NEXT: vpabsw %zmm0, %zmm0 2201 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2202 ; AVX512F-32-NEXT: retl 2203 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) 2204 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) 2205 %res2 = add <32 x i16> %res, %res1 2206 ret <32 x i16> %res2 2207 } 2208 2209 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) 2210 2211 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { 2212 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2213 ; AVX512BW: ## BB#0: 2214 ; AVX512BW-NEXT: kmovq %rdi, %k1 2215 ; AVX512BW-NEXT: vpabsb %zmm0, %zmm1 {%k1} 2216 ; AVX512BW-NEXT: vpabsb %zmm0, %zmm0 2217 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 2218 ; AVX512BW-NEXT: retq 2219 ; 2220 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512: 2221 ; AVX512F-32: # BB#0: 2222 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2223 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2224 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2225 ; AVX512F-32-NEXT: vpabsb %zmm0, %zmm1 {%k1} 2226 ; AVX512F-32-NEXT: vpabsb %zmm0, %zmm0 2227 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 2228 ; AVX512F-32-NEXT: retl 2229 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) 2230 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) 2231 %res2 = add <64 x i8> %res, %res1 2232 ret <64 x i8> %res2 2233 } 2234 2235 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2236 2237 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2238 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2239 ; AVX512BW: ## BB#0: 2240 ; AVX512BW-NEXT: kmovd %edi, %k1 2241 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} 2242 ; AVX512BW-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 2243 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2244 ; AVX512BW-NEXT: retq 2245 ; 2246 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: 2247 ; AVX512F-32: # BB#0: 2248 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2249 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} 2250 ; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm0 2251 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2252 ; AVX512F-32-NEXT: retl 2253 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2254 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2255 %res2 = add <32 x i16> %res, %res1 2256 ret <32 x i16> %res2 2257 } 2258 2259 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2260 2261 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2262 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2263 ; AVX512BW: ## BB#0: 2264 ; AVX512BW-NEXT: kmovd %edi, %k1 2265 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} 2266 ; AVX512BW-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 2267 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2268 ; AVX512BW-NEXT: retq 2269 ; 2270 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512: 2271 ; AVX512F-32: # BB#0: 2272 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2273 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} 2274 ; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm0 2275 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2276 ; AVX512F-32-NEXT: retl 2277 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2278 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2279 %res2 = add <32 x i16> %res, %res1 2280 ret <32 x i16> %res2 2281 } 2282 2283 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2284 2285 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2286 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2287 ; AVX512BW: ## BB#0: 2288 ; AVX512BW-NEXT: kmovd %edi, %k1 2289 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} 2290 ; AVX512BW-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 2291 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2292 ; AVX512BW-NEXT: retq 2293 ; 2294 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: 2295 ; AVX512F-32: # BB#0: 2296 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2297 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} 2298 ; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm0 2299 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2300 ; AVX512F-32-NEXT: retl 2301 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2302 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2303 %res2 = add <32 x i16> %res, %res1 2304 ret <32 x i16> %res2 2305 } 2306 2307 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) 2308 2309 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 2310 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 2311 ; AVX512BW: ## BB#0: 2312 ; AVX512BW-NEXT: kmovd %edi, %k1 2313 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm1 {%k1} 2314 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} 2315 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 2316 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2317 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2318 ; AVX512BW-NEXT: retq 2319 ; 2320 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512: 2321 ; AVX512F-32: # BB#0: 2322 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2323 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1} 2324 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} 2325 ; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0 2326 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2327 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2328 ; AVX512F-32-NEXT: retl 2329 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 2330 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 2331 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 2332 %res3 = add <32 x i8> %res0, %res1 2333 %res4 = add <32 x i8> %res3, %res2 2334 ret <32 x i8> %res4 2335 } 2336 2337 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) 2338 2339 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 2340 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 2341 ; AVX512BW: ## BB#0: 2342 ; AVX512BW-NEXT: kmovd %esi, %k1 2343 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) 2344 ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1} 2345 ; AVX512BW-NEXT: retq 2346 ; 2347 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: 2348 ; AVX512F-32: # BB#0: 2349 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2350 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 2351 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) 2352 ; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1} 2353 ; AVX512F-32-NEXT: retl 2354 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 2355 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 2356 ret void 2357 } 2358 2359 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) 2360 2361 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 2362 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 2363 ; AVX512BW: ## BB#0: 2364 ; AVX512BW-NEXT: kmovd %edi, %k1 2365 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm1 {%k1} 2366 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} 2367 ; AVX512BW-NEXT: vpmovswb %zmm0, %ymm0 2368 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2369 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2370 ; AVX512BW-NEXT: retq 2371 ; 2372 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: 2373 ; AVX512F-32: # BB#0: 2374 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2375 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1} 2376 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} 2377 ; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0 2378 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2379 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2380 ; AVX512F-32-NEXT: retl 2381 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 2382 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 2383 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 2384 %res3 = add <32 x i8> %res0, %res1 2385 %res4 = add <32 x i8> %res3, %res2 2386 ret <32 x i8> %res4 2387 } 2388 2389 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) 2390 2391 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 2392 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 2393 ; AVX512BW: ## BB#0: 2394 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) 2395 ; AVX512BW-NEXT: kmovd %esi, %k1 2396 ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1} 2397 ; AVX512BW-NEXT: retq 2398 ; 2399 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: 2400 ; AVX512F-32: # BB#0: 2401 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 2402 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2403 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) 2404 ; AVX512F-32-NEXT: kmovd %eax, %k1 2405 ; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1} 2406 ; AVX512F-32-NEXT: retl 2407 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 2408 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 2409 ret void 2410 } 2411 2412 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) 2413 2414 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { 2415 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 2416 ; AVX512BW: ## BB#0: 2417 ; AVX512BW-NEXT: kmovd %edi, %k1 2418 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} 2419 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} 2420 ; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 2421 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2422 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2423 ; AVX512BW-NEXT: retq 2424 ; 2425 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: 2426 ; AVX512F-32: # BB#0: 2427 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2428 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} 2429 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} 2430 ; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0 2431 ; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 2432 ; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 2433 ; AVX512F-32-NEXT: retl 2434 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) 2435 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) 2436 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) 2437 %res3 = add <32 x i8> %res0, %res1 2438 %res4 = add <32 x i8> %res3, %res2 2439 ret <32 x i8> %res4 2440 } 2441 2442 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) 2443 2444 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { 2445 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 2446 ; AVX512BW: ## BB#0: 2447 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) 2448 ; AVX512BW-NEXT: kmovd %esi, %k1 2449 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} 2450 ; AVX512BW-NEXT: retq 2451 ; 2452 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: 2453 ; AVX512F-32: # BB#0: 2454 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax 2455 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2456 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) 2457 ; AVX512F-32-NEXT: kmovd %eax, %k1 2458 ; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1} 2459 ; AVX512F-32-NEXT: retl 2460 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) 2461 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) 2462 ret void 2463 } 2464 2465 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) 2466 2467 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { 2468 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2469 ; AVX512BW: ## BB#0: 2470 ; AVX512BW-NEXT: kmovd %edi, %k1 2471 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} 2472 ; AVX512BW-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 2473 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2474 ; AVX512BW-NEXT: retq 2475 ; 2476 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: 2477 ; AVX512F-32: # BB#0: 2478 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2479 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} 2480 ; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 2481 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2482 ; AVX512F-32-NEXT: retl 2483 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) 2484 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) 2485 %res2 = add <32 x i16> %res, %res1 2486 ret <32 x i16> %res2 2487 } 2488 2489 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) 2490 2491 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { 2492 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2493 ; AVX512BW: ## BB#0: 2494 ; AVX512BW-NEXT: kmovw %edi, %k1 2495 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} 2496 ; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 2497 ; AVX512BW-NEXT: vpaddd %zmm0, %zmm2, %zmm0 2498 ; AVX512BW-NEXT: retq 2499 ; 2500 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: 2501 ; AVX512F-32: # BB#0: 2502 ; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 2503 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} 2504 ; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 2505 ; AVX512F-32-NEXT: vpaddd %zmm0, %zmm2, %zmm0 2506 ; AVX512F-32-NEXT: retl 2507 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) 2508 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) 2509 %res2 = add <16 x i32> %res, %res1 2510 ret <16 x i32> %res2 2511 } 2512 2513 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2514 2515 define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2516 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 2517 ; AVX512BW: ## BB#0: 2518 ; AVX512BW-NEXT: kmovq %rdi, %k1 2519 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63] 2520 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 2521 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2522 ; AVX512BW-NEXT: retq 2523 ; 2524 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: 2525 ; AVX512F-32: # BB#0: 2526 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2527 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2528 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2529 ; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63] 2530 ; AVX512F-32-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 2531 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2532 ; AVX512F-32-NEXT: retl 2533 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2534 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2535 %res2 = add <64 x i8> %res, %res1 2536 ret <64 x i8> %res2 2537 } 2538 2539 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) 2540 2541 define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { 2542 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 2543 ; AVX512BW: ## BB#0: 2544 ; AVX512BW-NEXT: kmovq %rdi, %k1 2545 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55] 2546 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 2547 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2548 ; AVX512BW-NEXT: retq 2549 ; 2550 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: 2551 ; AVX512F-32: # BB#0: 2552 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2553 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2554 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2555 ; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55] 2556 ; AVX512F-32-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 2557 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2558 ; AVX512F-32-NEXT: retl 2559 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) 2560 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) 2561 %res2 = add <64 x i8> %res, %res1 2562 ret <64 x i8> %res2 2563 } 2564 2565 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2566 2567 define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2568 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 2569 ; AVX512BW: ## BB#0: 2570 ; AVX512BW-NEXT: kmovd %edi, %k1 2571 ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31] 2572 ; AVX512BW-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 2573 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2574 ; AVX512BW-NEXT: retq 2575 ; 2576 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: 2577 ; AVX512F-32: # BB#0: 2578 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2579 ; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31] 2580 ; AVX512F-32-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 2581 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2582 ; AVX512F-32-NEXT: retl 2583 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2584 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2585 %res2 = add <32 x i16> %res, %res1 2586 ret <32 x i16> %res2 2587 } 2588 2589 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 2590 2591 define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { 2592 ; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 2593 ; AVX512BW: ## BB#0: 2594 ; AVX512BW-NEXT: kmovd %edi, %k1 2595 ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27] 2596 ; AVX512BW-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 2597 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2598 ; AVX512BW-NEXT: retq 2599 ; 2600 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: 2601 ; AVX512F-32: # BB#0: 2602 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2603 ; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27] 2604 ; AVX512F-32-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 2605 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 2606 ; AVX512F-32-NEXT: retl 2607 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) 2608 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) 2609 %res2 = add <32 x i16> %res, %res1 2610 ret <32 x i16> %res2 2611 } 2612 2613 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64) 2614 2615 define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) { 2616 ; AVX512BW-LABEL: test_int_x86_avx512_mask_palignr_512: 2617 ; AVX512BW: ## BB#0: 2618 ; AVX512BW-NEXT: kmovq %rdi, %k1 2619 ; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} 2620 ; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z} 2621 ; AVX512BW-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 2622 ; AVX512BW-NEXT: vpaddb %zmm3, %zmm2, %zmm1 2623 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 2624 ; AVX512BW-NEXT: retq 2625 ; 2626 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_palignr_512: 2627 ; AVX512F-32: # BB#0: 2628 ; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 2629 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2630 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2631 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 2632 ; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} 2633 ; AVX512F-32-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} 2634 ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm2, %zmm0 2635 ; AVX512F-32-NEXT: vpaddb %zmm3, %zmm0, %zmm0 2636 ; AVX512F-32-NEXT: retl 2637 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4) 2638 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4) 2639 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1) 2640 %res3 = add <64 x i8> %res, %res1 2641 %res4 = add <64 x i8> %res3, %res2 2642 ret <64 x i8> %res4 2643 } 2644 2645 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) 2646 2647 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { 2648 ; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 2649 ; AVX512BW: ## BB#0: 2650 ; AVX512BW-NEXT: kmovd %edi, %k1 2651 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} 2652 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} 2653 ; AVX512BW-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 2654 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2655 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2656 ; AVX512BW-NEXT: retq 2657 ; 2658 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: 2659 ; AVX512F-32: # BB#0: 2660 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2661 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} 2662 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} 2663 ; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 2664 ; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm1 2665 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 2666 ; AVX512F-32-NEXT: retl 2667 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) 2668 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4) 2669 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1) 2670 %res3 = add <32 x i16> %res, %res1 2671 %res4 = add <32 x i16> %res3, %res2 2672 ret <32 x i16> %res4 2673 } 2674 2675 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) 2676 2677 define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) { 2678 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_dq_512: 2679 ; AVX512BW: ## BB#0: 2680 ; AVX512BW-NEXT: vpslldq $8, %zmm0, %zmm1 2681 ; AVX512BW-NEXT: vpslldq $4, %zmm0, %zmm0 2682 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2683 ; AVX512BW-NEXT: retq 2684 ; 2685 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_dq_512: 2686 ; AVX512F-32: # BB#0: 2687 ; AVX512F-32-NEXT: vpslldq $8, %zmm0, %zmm1 2688 ; AVX512F-32-NEXT: vpslldq $4, %zmm0, %zmm0 2689 ; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2690 ; AVX512F-32-NEXT: retl 2691 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) 2692 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) 2693 %res2 = add <8 x i64> %res, %res1 2694 ret <8 x i64> %res2 2695 } 2696 2697 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) 2698 2699 define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) { 2700 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_dq_512: 2701 ; AVX512BW: ## BB#0: 2702 ; AVX512BW-NEXT: vpsrldq $8, %zmm0, %zmm1 2703 ; AVX512BW-NEXT: vpsrldq $4, %zmm0, %zmm0 2704 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2705 ; AVX512BW-NEXT: retq 2706 ; 2707 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_dq_512: 2708 ; AVX512F-32: # BB#0: 2709 ; AVX512F-32-NEXT: vpsrldq $8, %zmm0, %zmm1 2710 ; AVX512F-32-NEXT: vpsrldq $4, %zmm0, %zmm0 2711 ; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2712 ; AVX512F-32-NEXT: retl 2713 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) 2714 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) 2715 %res2 = add <8 x i64> %res, %res1 2716 ret <8 x i64> %res2 2717 } 2718 declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) 2719 2720 define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ 2721 ; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512: 2722 ; AVX512BW: ## BB#0: 2723 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 2724 ; AVX512BW-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 2725 ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2726 ; AVX512BW-NEXT: retq 2727 ; 2728 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512: 2729 ; AVX512F-32: # BB#0: 2730 ; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 2731 ; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 2732 ; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 2733 ; AVX512F-32-NEXT: retl 2734 %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) 2735 %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) 2736 %res2 = add <8 x i64> %res, %res1 2737 ret <8 x i64> %res2 2738 } 2739 2740 declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32) 2741 2742 define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) { 2743 ; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd: 2744 ; AVX512BW: ## BB#0: 2745 ; AVX512BW-NEXT: kmovd %edi, %k0 2746 ; AVX512BW-NEXT: kmovd %esi, %k1 2747 ; AVX512BW-NEXT: kunpckwd %k1, %k0, %k0 2748 ; AVX512BW-NEXT: kmovd %k0, %eax 2749 ; AVX512BW-NEXT: retq 2750 ; 2751 ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd: 2752 ; AVX512F-32: # BB#0: 2753 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2754 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2755 ; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0 2756 ; AVX512F-32-NEXT: kmovd %k0, %eax 2757 ; AVX512F-32-NEXT: retl 2758 %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1) 2759 ret i32 %res 2760 } 2761 2762 declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64) 2763 2764 define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) { 2765 ; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd: 2766 ; AVX512BW: ## BB#0: 2767 ; AVX512BW-NEXT: kmovq %rdi, %k0 2768 ; AVX512BW-NEXT: kmovq %rsi, %k1 2769 ; AVX512BW-NEXT: kunpckdq %k1, %k0, %k0 2770 ; AVX512BW-NEXT: kmovq %k0, %rax 2771 ; AVX512BW-NEXT: retq 2772 ; 2773 ; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd: 2774 ; AVX512F-32: # BB#0: 2775 ; AVX512F-32-NEXT: subl $12, %esp 2776 ; AVX512F-32-NEXT: .Ltmp8: 2777 ; AVX512F-32-NEXT: .cfi_def_cfa_offset 16 2778 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 2779 ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 2780 ; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k0 2781 ; AVX512F-32-NEXT: kmovq %k0, (%esp) 2782 ; AVX512F-32-NEXT: movl (%esp), %eax 2783 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx 2784 ; AVX512F-32-NEXT: addl $12, %esp 2785 ; AVX512F-32-NEXT: retl 2786 %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1) 2787 ret i64 %res 2788 } 2789