1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=CHECK,X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c 6 7 define i64 @test_mm512_kunpackd(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) { 8 ; X86-LABEL: test_mm512_kunpackd: 9 ; X86: # %bb.0: # %entry 10 ; X86-NEXT: pushl %ebp 11 ; X86-NEXT: .cfi_def_cfa_offset 8 12 ; X86-NEXT: .cfi_offset %ebp, -8 13 ; X86-NEXT: movl %esp, %ebp 14 ; X86-NEXT: .cfi_def_cfa_register %ebp 15 ; X86-NEXT: andl $-64, %esp 16 ; X86-NEXT: subl $64, %esp 17 ; X86-NEXT: vmovdqa64 136(%ebp), %zmm3 18 ; X86-NEXT: vpcmpneqb %zmm0, %zmm1, %k0 19 ; X86-NEXT: vpcmpneqb 8(%ebp), %zmm2, %k1 20 ; X86-NEXT: vpcmpneqb 72(%ebp), %zmm3, %k2 21 ; X86-NEXT: kandd %k0, %k2, %k0 22 ; X86-NEXT: kmovd %k0, %eax 23 ; X86-NEXT: kshiftrq $32, %k2, %k0 24 ; X86-NEXT: kandd %k1, %k0, %k0 25 ; X86-NEXT: kmovd %k0, %edx 26 ; X86-NEXT: movl %ebp, %esp 27 ; X86-NEXT: popl %ebp 28 ; X86-NEXT: .cfi_def_cfa %esp, 4 29 ; X86-NEXT: vzeroupper 30 ; X86-NEXT: retl 31 ; 32 ; X64-LABEL: test_mm512_kunpackd: 33 ; X64: # %bb.0: # %entry 34 ; X64-NEXT: vpcmpneqb %zmm0, %zmm1, %k0 35 ; X64-NEXT: vpcmpneqb %zmm3, %zmm2, %k1 36 ; X64-NEXT: kunpckdq %k0, %k1, %k1 37 ; X64-NEXT: vpcmpneqb %zmm5, %zmm4, %k0 {%k1} 38 ; X64-NEXT: kmovq %k0, %rax 39 ; X64-NEXT: vzeroupper 40 ; X64-NEXT: retq 41 entry: 42 %0 = bitcast <8 x i64> %__E to <64 x i8> 43 %1 = bitcast <8 x i64> %__F to <64 x i8> 44 %2 = bitcast <8 x i64> %__B to <64 x i8> 45 %3 = bitcast <8 x i64> %__A to <64 x i8> 46 %4 = icmp ne <64 x i8> %2, %3 47 %5 = bitcast <8 x i64> %__C to <64 x i8> 48 %6 = bitcast <8 x i64> %__D to <64 x i8> 49 %7 = icmp ne <64 x i8> %5, %6 50 %8 = shufflevector <64 x i1> %4, <64 x i1> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 51 %9 = shufflevector <64 x i1> %7, <64 x i1> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 52 %10 = shufflevector <32 x i1> %8, <32 x i1> %9, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 53 %11 = icmp ne <64 x i8> %0, %1 54 %12 = and <64 x i1> %11, %10 55 %13 = bitcast <64 x i1> %12 to i64 56 ret i64 %13 57 } 58 59 define i32 @test_mm512_kunpackw(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) { 60 ; X86-LABEL: test_mm512_kunpackw: 61 ; X86: # %bb.0: # %entry 62 ; X86-NEXT: pushl %ebp 63 ; X86-NEXT: .cfi_def_cfa_offset 8 64 ; X86-NEXT: .cfi_offset %ebp, -8 65 ; X86-NEXT: movl %esp, %ebp 66 ; X86-NEXT: .cfi_def_cfa_register %ebp 67 ; X86-NEXT: andl $-64, %esp 68 ; X86-NEXT: subl $64, %esp 69 ; X86-NEXT: vmovdqa64 136(%ebp), %zmm3 70 ; X86-NEXT: vpcmpneqw %zmm0, %zmm1, %k0 71 ; X86-NEXT: vpcmpneqw 8(%ebp), %zmm2, %k1 72 ; X86-NEXT: kunpckwd %k0, %k1, %k1 73 ; X86-NEXT: vpcmpneqw 72(%ebp), %zmm3, %k0 {%k1} 74 ; X86-NEXT: kmovd %k0, %eax 75 ; X86-NEXT: movl %ebp, %esp 76 ; X86-NEXT: popl %ebp 77 ; X86-NEXT: .cfi_def_cfa %esp, 4 78 ; X86-NEXT: vzeroupper 79 ; X86-NEXT: retl 80 ; 81 ; X64-LABEL: test_mm512_kunpackw: 82 ; X64: # %bb.0: # %entry 83 ; X64-NEXT: vpcmpneqw %zmm0, %zmm1, %k0 84 ; X64-NEXT: vpcmpneqw %zmm3, %zmm2, %k1 85 ; X64-NEXT: kunpckwd %k0, %k1, %k1 86 ; X64-NEXT: vpcmpneqw %zmm5, %zmm4, %k0 {%k1} 87 ; X64-NEXT: kmovd %k0, %eax 88 ; X64-NEXT: vzeroupper 89 ; X64-NEXT: retq 90 entry: 91 %0 = bitcast <8 x i64> %__E to <32 x i16> 92 %1 = bitcast <8 x i64> %__F to <32 x i16> 93 %2 = bitcast <8 x i64> %__B to <32 x i16> 94 %3 = bitcast <8 x i64> %__A to <32 x i16> 95 %4 = icmp ne <32 x i16> %2, %3 96 %5 = bitcast <8 x i64> %__C to <32 x i16> 97 %6 = bitcast <8 x i64> %__D to <32 x i16> 98 %7 = icmp ne <32 x i16> %5, %6 99 %8 = shufflevector <32 x i1> %4, <32 x i1> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 100 %9 = shufflevector <32 x i1> %7, <32 x i1> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 101 %10 = shufflevector <16 x i1> %8, <16 x i1> %9, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 102 %11 = icmp ne <32 x i16> %0, %1 103 %12 = and <32 x i1> %11, %10 104 %13 = bitcast <32 x i1> %12 to i32 105 ret i32 %13 106 } 107 108 109 define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext %__A) { 110 ; X86-LABEL: test_mm512_mask_set1_epi8: 111 ; X86: # %bb.0: # %entry 112 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 113 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 114 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 115 ; X86-NEXT: kunpckdq %k1, %k0, %k1 116 ; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1} 117 ; X86-NEXT: retl 118 ; 119 ; X64-LABEL: test_mm512_mask_set1_epi8: 120 ; X64: # %bb.0: # %entry 121 ; X64-NEXT: kmovq %rdi, %k1 122 ; X64-NEXT: vpbroadcastb %esi, %zmm0 {%k1} 123 ; X64-NEXT: retq 124 entry: 125 %vecinit.i.i = insertelement <64 x i8> undef, i8 %__A, i32 0 126 %vecinit63.i.i = shufflevector <64 x i8> %vecinit.i.i, <64 x i8> undef, <64 x i32> zeroinitializer 127 %0 = bitcast <8 x i64> %__O to <64 x i8> 128 %1 = bitcast i64 %__M to <64 x i1> 129 %2 = select <64 x i1> %1, <64 x i8> %vecinit63.i.i, <64 x i8> %0 130 %3 = bitcast <64 x i8> %2 to <8 x i64> 131 ret <8 x i64> %3 132 } 133 134 define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { 135 ; X86-LABEL: test_mm512_maskz_set1_epi8: 136 ; X86: # %bb.0: # %entry 137 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 138 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 139 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 140 ; X86-NEXT: kunpckdq %k1, %k0, %k1 141 ; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1} {z} 142 ; X86-NEXT: retl 143 ; 144 ; X64-LABEL: test_mm512_maskz_set1_epi8: 145 ; X64: # %bb.0: # %entry 146 ; X64-NEXT: kmovq %rdi, %k1 147 ; X64-NEXT: vpbroadcastb %esi, %zmm0 {%k1} {z} 148 ; X64-NEXT: retq 149 entry: 150 %vecinit.i.i = insertelement <64 x i8> undef, i8 %__A, i32 0 151 %vecinit63.i.i = shufflevector <64 x i8> %vecinit.i.i, <64 x i8> undef, <64 x i32> zeroinitializer 152 %0 = bitcast i64 %__M to <64 x i1> 153 %1 = select <64 x i1> %0, <64 x i8> %vecinit63.i.i, <64 x i8> zeroinitializer 154 %2 = bitcast <64 x i8> %1 to <8 x i64> 155 ret <8 x i64> %2 156 } 157 158 define <8 x i64> @test_mm512_mask_set1_epi16(<8 x i64> %__O, i32 %__M, i16 signext %__A) { 159 ; X86-LABEL: test_mm512_mask_set1_epi16: 160 ; X86: # %bb.0: # %entry 161 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 162 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 163 ; X86-NEXT: vpbroadcastw %eax, %zmm0 {%k1} 164 ; X86-NEXT: retl 165 ; 166 ; X64-LABEL: test_mm512_mask_set1_epi16: 167 ; X64: # %bb.0: # %entry 168 ; X64-NEXT: kmovd %edi, %k1 169 ; X64-NEXT: vpbroadcastw %esi, %zmm0 {%k1} 170 ; X64-NEXT: retq 171 entry: 172 %vecinit.i.i = insertelement <32 x i16> undef, i16 %__A, i32 0 173 %vecinit31.i.i = shufflevector <32 x i16> %vecinit.i.i, <32 x i16> undef, <32 x i32> zeroinitializer 174 %0 = bitcast <8 x i64> %__O to <32 x i16> 175 %1 = bitcast i32 %__M to <32 x i1> 176 %2 = select <32 x i1> %1, <32 x i16> %vecinit31.i.i, <32 x i16> %0 177 %3 = bitcast <32 x i16> %2 to <8 x i64> 178 ret <8 x i64> %3 179 } 180 181 define <8 x i64> @test_mm512_maskz_set1_epi16(i32 %__M, i16 signext %__A) { 182 ; X86-LABEL: test_mm512_maskz_set1_epi16: 183 ; X86: # %bb.0: # %entry 184 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 185 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 186 ; X86-NEXT: vpbroadcastw %eax, %zmm0 {%k1} {z} 187 ; X86-NEXT: retl 188 ; 189 ; X64-LABEL: test_mm512_maskz_set1_epi16: 190 ; X64: # %bb.0: # %entry 191 ; X64-NEXT: kmovd %edi, %k1 192 ; X64-NEXT: vpbroadcastw %esi, %zmm0 {%k1} {z} 193 ; X64-NEXT: retq 194 entry: 195 %vecinit.i.i = insertelement <32 x i16> undef, i16 %__A, i32 0 196 %vecinit31.i.i = shufflevector <32 x i16> %vecinit.i.i, <32 x i16> undef, <32 x i32> zeroinitializer 197 %0 = bitcast i32 %__M to <32 x i1> 198 %1 = select <32 x i1> %0, <32 x i16> %vecinit31.i.i, <32 x i16> zeroinitializer 199 %2 = bitcast <32 x i16> %1 to <8 x i64> 200 ret <8 x i64> %2 201 } 202 203 define <8 x i64> @test_mm512_broadcastb_epi8(<2 x i64> %a0) { 204 ; CHECK-LABEL: test_mm512_broadcastb_epi8: 205 ; CHECK: # %bb.0: 206 ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 207 ; CHECK-NEXT: ret{{[l|q]}} 208 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 209 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <64 x i32> zeroinitializer 210 %res1 = bitcast <64 x i8> %res0 to <8 x i64> 211 ret <8 x i64> %res1 212 } 213 214 define <8 x i64> @test_mm512_mask_broadcastb_epi8(<8 x i64> %a0, i64* %a1, <2 x i64> %a2) { 215 ; X86-LABEL: test_mm512_mask_broadcastb_epi8: 216 ; X86: # %bb.0: 217 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 218 ; X86-NEXT: kmovq (%eax), %k1 219 ; X86-NEXT: vpbroadcastb %xmm1, %zmm0 {%k1} 220 ; X86-NEXT: retl 221 ; 222 ; X64-LABEL: test_mm512_mask_broadcastb_epi8: 223 ; X64: # %bb.0: 224 ; X64-NEXT: kmovq (%rdi), %k1 225 ; X64-NEXT: vpbroadcastb %xmm1, %zmm0 {%k1} 226 ; X64-NEXT: retq 227 %arg0 = bitcast <8 x i64> %a0 to <64 x i8> 228 %bc1 = bitcast i64* %a1 to <64 x i1>* 229 %arg1 = load <64 x i1>, <64 x i1>* %bc1 230 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 231 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <64 x i32> zeroinitializer 232 %res1 = select <64 x i1> %arg1, <64 x i8> %res0, <64 x i8> %arg0 233 %res2 = bitcast <64 x i8> %res1 to <8 x i64> 234 ret <8 x i64> %res2 235 } 236 237 define <8 x i64> @test_mm512_maskz_broadcastb_epi8(i64* %a0, <2 x i64> %a1) { 238 ; X86-LABEL: test_mm512_maskz_broadcastb_epi8: 239 ; X86: # %bb.0: 240 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 241 ; X86-NEXT: kmovq (%eax), %k1 242 ; X86-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} 243 ; X86-NEXT: retl 244 ; 245 ; X64-LABEL: test_mm512_maskz_broadcastb_epi8: 246 ; X64: # %bb.0: 247 ; X64-NEXT: kmovq (%rdi), %k1 248 ; X64-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} 249 ; X64-NEXT: retq 250 %bc0 = bitcast i64* %a0 to <64 x i1>* 251 %arg0 = load <64 x i1>, <64 x i1>* %bc0 252 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 253 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <64 x i32> zeroinitializer 254 %res1 = select <64 x i1> %arg0, <64 x i8> %res0, <64 x i8> zeroinitializer 255 %res2 = bitcast <64 x i8> %res1 to <8 x i64> 256 ret <8 x i64> %res2 257 } 258 259 define <8 x i64> @test_mm512_broadcastw_epi16(<2 x i64> %a0) { 260 ; CHECK-LABEL: test_mm512_broadcastw_epi16: 261 ; CHECK: # %bb.0: 262 ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 263 ; CHECK-NEXT: ret{{[l|q]}} 264 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 265 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <32 x i32> zeroinitializer 266 %res1 = bitcast <32 x i16> %res0 to <8 x i64> 267 ret <8 x i64> %res1 268 } 269 270 define <8 x i64> @test_mm512_mask_broadcastw_epi16(<8 x i64> %a0, i32 %a1, <2 x i64> %a2) { 271 ; X86-LABEL: test_mm512_mask_broadcastw_epi16: 272 ; X86: # %bb.0: 273 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 274 ; X86-NEXT: vpbroadcastw %xmm1, %zmm0 {%k1} 275 ; X86-NEXT: retl 276 ; 277 ; X64-LABEL: test_mm512_mask_broadcastw_epi16: 278 ; X64: # %bb.0: 279 ; X64-NEXT: kmovd %edi, %k1 280 ; X64-NEXT: vpbroadcastw %xmm1, %zmm0 {%k1} 281 ; X64-NEXT: retq 282 %arg0 = bitcast <8 x i64> %a0 to <32 x i16> 283 %arg1 = bitcast i32 %a1 to <32 x i1> 284 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 285 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <32 x i32> zeroinitializer 286 %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0 287 %res2 = bitcast <32 x i16> %res1 to <8 x i64> 288 ret <8 x i64> %res2 289 } 290 291 define <8 x i64> @test_mm512_maskz_broadcastw_epi16(i32 %a0, <2 x i64> %a1) { 292 ; X86-LABEL: test_mm512_maskz_broadcastw_epi16: 293 ; X86: # %bb.0: 294 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 295 ; X86-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} 296 ; X86-NEXT: retl 297 ; 298 ; X64-LABEL: test_mm512_maskz_broadcastw_epi16: 299 ; X64: # %bb.0: 300 ; X64-NEXT: kmovd %edi, %k1 301 ; X64-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} 302 ; X64-NEXT: retq 303 %arg0 = bitcast i32 %a0 to <32 x i1> 304 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 305 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <32 x i32> zeroinitializer 306 %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer 307 %res2 = bitcast <32 x i16> %res1 to <8 x i64> 308 ret <8 x i64> %res2 309 } 310 311 define <8 x i64> @test_mm512_bslli_epi128(<8 x i64> %a0) { 312 ; CHECK-LABEL: test_mm512_bslli_epi128: 313 ; CHECK: # %bb.0: 314 ; CHECK-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 315 ; CHECK-NEXT: ret{{[l|q]}} 316 %arg0 = bitcast <8 x i64> %a0 to <64 x i8> 317 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> zeroinitializer, <64 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 69, i32 70, i32 71, i32 72, i32 73, i32 74, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 85, i32 86, i32 87, i32 88, i32 89, i32 90, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 101, i32 102, i32 103, i32 104, i32 105, i32 106, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116, i32 117, i32 118, i32 119, i32 120, i32 121, i32 122> 318 %res1 = bitcast <64 x i8> %res0 to <8 x i64> 319 ret <8 x i64> %res1 320 } 321 322 define <8 x i64> @test_mm512_bsrli_epi128(<8 x i64> %a0) { 323 ; CHECK-LABEL: test_mm512_bsrli_epi128: 324 ; CHECK: # %bb.0: 325 ; CHECK-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zmm0[21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zmm0[37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zmm0[53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero 326 ; CHECK-NEXT: ret{{[l|q]}} 327 %arg0 = bitcast <8 x i64> %a0 to <64 x i8> 328 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> zeroinitializer, <64 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 64, i32 65, i32 66, i32 67, i32 68, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 80, i32 81, i32 82, i32 83, i32 84, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 96, i32 97, i32 98, i32 99, i32 100, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 112, i32 113, i32 114, i32 115, i32 116> 329 %res1 = bitcast <64 x i8> %res0 to <8 x i64> 330 ret <8 x i64> %res1 331 } 332 333 define <8 x i64> @test_mm512_unpackhi_epi8(<8 x i64> %a0, <8 x i64> %a1) { 334 ; CHECK-LABEL: test_mm512_unpackhi_epi8: 335 ; CHECK: # %bb.0: 336 ; CHECK-NEXT: vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 337 ; CHECK-NEXT: ret{{[l|q]}} 338 %arg0 = bitcast <8 x i64> %a0 to <64 x i8> 339 %arg1 = bitcast <8 x i64> %a1 to <64 x i8> 340 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> %arg1, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127> 341 %res1 = bitcast <64 x i8> %res0 to <8 x i64> 342 ret <8 x i64> %res1 343 } 344 345 ; TODO - improve support for i64 -> mmask64 on 32-bit targets 346 define <8 x i64> @test_mm512_mask_unpackhi_epi8(<8 x i64> %a0, i64* %a1, <8 x i64> %a2, <8 x i64> %a3) { 347 ; X86-LABEL: test_mm512_mask_unpackhi_epi8: 348 ; X86: # %bb.0: 349 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 350 ; X86-NEXT: kmovq (%eax), %k1 351 ; X86-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63] 352 ; X86-NEXT: retl 353 ; 354 ; X64-LABEL: test_mm512_mask_unpackhi_epi8: 355 ; X64: # %bb.0: 356 ; X64-NEXT: kmovq (%rdi), %k1 357 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} = zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31],zmm1[40],zmm2[40],zmm1[41],zmm2[41],zmm1[42],zmm2[42],zmm1[43],zmm2[43],zmm1[44],zmm2[44],zmm1[45],zmm2[45],zmm1[46],zmm2[46],zmm1[47],zmm2[47],zmm1[56],zmm2[56],zmm1[57],zmm2[57],zmm1[58],zmm2[58],zmm1[59],zmm2[59],zmm1[60],zmm2[60],zmm1[61],zmm2[61],zmm1[62],zmm2[62],zmm1[63],zmm2[63] 358 ; X64-NEXT: retq 359 %arg0 = bitcast <8 x i64> %a0 to <64 x i8> 360 %arg1 = bitcast i64* %a1 to <64 x i1>* 361 %sel1 = load <64 x i1>, <64 x i1>* %arg1 362 %arg2 = bitcast <8 x i64> %a2 to <64 x i8> 363 %arg3 = bitcast <8 x i64> %a3 to <64 x i8> 364 %res0 = shufflevector <64 x i8> %arg2, <64 x i8> %arg3, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127> 365 %res1 = select <64 x i1> %sel1, <64 x i8> %res0, <64 x i8> %arg0 366 %res2 = bitcast <64 x i8> %res1 to <8 x i64> 367 ret <8 x i64> %res2 368 } 369 370 define <8 x i64> @test_mm512_maskz_unpackhi_epi8(i64* %a0, <8 x i64> %a1, <8 x i64> %a2) { 371 ; X86-LABEL: test_mm512_maskz_unpackhi_epi8: 372 ; X86: # %bb.0: 373 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 374 ; X86-NEXT: kmovq (%eax), %k1 375 ; X86-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 376 ; X86-NEXT: retl 377 ; 378 ; X64-LABEL: test_mm512_maskz_unpackhi_epi8: 379 ; X64: # %bb.0: 380 ; X64-NEXT: kmovq (%rdi), %k1 381 ; X64-NEXT: vpunpckhbw {{.*#+}} zmm0 {%k1} {z} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] 382 ; X64-NEXT: retq 383 %arg0 = bitcast i64* %a0 to <64 x i1>* 384 %sel0 = load <64 x i1>, <64 x i1>* %arg0 385 %arg1 = bitcast <8 x i64> %a1 to <64 x i8> 386 %arg2 = bitcast <8 x i64> %a2 to <64 x i8> 387 %res0 = shufflevector <64 x i8> %arg1, <64 x i8> %arg2, <64 x i32> <i32 8, i32 72, i32 9, i32 73, i32 10, i32 74, i32 11, i32 75, i32 12, i32 76, i32 13, i32 77, i32 14, i32 78, i32 15, i32 79, i32 24, i32 88, i32 25, i32 89, i32 26, i32 90, i32 27, i32 91, i32 28, i32 92, i32 29, i32 93, i32 30, i32 94, i32 31, i32 95, i32 40, i32 104, i32 41, i32 105, i32 42, i32 106, i32 43, i32 107, i32 44, i32 108, i32 45, i32 109, i32 46, i32 110, i32 47, i32 111, i32 56, i32 120, i32 57, i32 121, i32 58, i32 122, i32 59, i32 123, i32 60, i32 124, i32 61, i32 125, i32 62, i32 126, i32 63, i32 127> 388 %res1 = select <64 x i1> %sel0, <64 x i8> %res0, <64 x i8> zeroinitializer 389 %res2 = bitcast <64 x i8> %res1 to <8 x i64> 390 ret <8 x i64> %res2 391 } 392 393 define <8 x i64> @test_mm512_unpackhi_epi16(<8 x i64> %a0, <8 x i64> %a1) { 394 ; CHECK-LABEL: test_mm512_unpackhi_epi16: 395 ; CHECK: # %bb.0: 396 ; CHECK-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 397 ; CHECK-NEXT: ret{{[l|q]}} 398 %arg0 = bitcast <8 x i64> %a0 to <32 x i16> 399 %arg1 = bitcast <8 x i64> %a1 to <32 x i16> 400 %res0 = shufflevector <32 x i16> %arg0, <32 x i16> %arg1, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 401 %res1 = bitcast <32 x i16> %res0 to <8 x i64> 402 ret <8 x i64> %res1 403 } 404 405 define <8 x i64> @test_mm512_mask_unpackhi_epi16(<8 x i64> %a0, i32 %a1, <8 x i64> %a2, <8 x i64> %a3) { 406 ; X86-LABEL: test_mm512_mask_unpackhi_epi16: 407 ; X86: # %bb.0: 408 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 409 ; X86-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31] 410 ; X86-NEXT: retl 411 ; 412 ; X64-LABEL: test_mm512_mask_unpackhi_epi16: 413 ; X64: # %bb.0: 414 ; X64-NEXT: kmovd %edi, %k1 415 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} = zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[12],zmm2[12],zmm1[13],zmm2[13],zmm1[14],zmm2[14],zmm1[15],zmm2[15],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[28],zmm2[28],zmm1[29],zmm2[29],zmm1[30],zmm2[30],zmm1[31],zmm2[31] 416 ; X64-NEXT: retq 417 %arg0 = bitcast <8 x i64> %a0 to <32 x i16> 418 %arg1 = bitcast i32 %a1 to <32 x i1> 419 %arg2 = bitcast <8 x i64> %a2 to <32 x i16> 420 %arg3 = bitcast <8 x i64> %a3 to <32 x i16> 421 %res0 = shufflevector <32 x i16> %arg2, <32 x i16> %arg3, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 422 %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0 423 %res2 = bitcast <32 x i16> %res1 to <8 x i64> 424 ret <8 x i64> %res2 425 } 426 427 define <8 x i64> @test_mm512_maskz_unpackhi_epi16(i32 %a0, <8 x i64> %a1, <8 x i64> %a2) { 428 ; X86-LABEL: test_mm512_maskz_unpackhi_epi16: 429 ; X86: # %bb.0: 430 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 431 ; X86-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 432 ; X86-NEXT: retl 433 ; 434 ; X64-LABEL: test_mm512_maskz_unpackhi_epi16: 435 ; X64: # %bb.0: 436 ; X64-NEXT: kmovd %edi, %k1 437 ; X64-NEXT: vpunpckhwd {{.*#+}} zmm0 {%k1} {z} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31] 438 ; X64-NEXT: retq 439 %arg0 = bitcast i32 %a0 to <32 x i1> 440 %arg1 = bitcast <8 x i64> %a1 to <32 x i16> 441 %arg2 = bitcast <8 x i64> %a2 to <32 x i16> 442 %res0 = shufflevector <32 x i16> %arg1, <32 x i16> %arg2, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63> 443 %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer 444 %res2 = bitcast <32 x i16> %res1 to <8 x i64> 445 ret <8 x i64> %res2 446 } 447 448 define <8 x i64> @test_mm512_unpacklo_epi8(<8 x i64> %a0, <8 x i64> %a1) { 449 ; CHECK-LABEL: test_mm512_unpacklo_epi8: 450 ; CHECK: # %bb.0: 451 ; CHECK-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 452 ; CHECK-NEXT: ret{{[l|q]}} 453 %arg0 = bitcast <8 x i64> %a0 to <64 x i8> 454 %arg1 = bitcast <8 x i64> %a1 to <64 x i8> 455 %res0 = shufflevector <64 x i8> %arg0, <64 x i8> %arg1, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119> 456 %res1 = bitcast <64 x i8> %res0 to <8 x i64> 457 ret <8 x i64> %res1 458 } 459 460 define <8 x i64> @test_mm512_mask_unpacklo_epi8(<8 x i64> %a0, i64* %a1, <8 x i64> %a2, <8 x i64> %a3) { 461 ; X86-LABEL: test_mm512_mask_unpacklo_epi8: 462 ; X86: # %bb.0: 463 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 464 ; X86-NEXT: kmovq (%eax), %k1 465 ; X86-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55] 466 ; X86-NEXT: retl 467 ; 468 ; X64-LABEL: test_mm512_mask_unpacklo_epi8: 469 ; X64: # %bb.0: 470 ; X64-NEXT: kmovq (%rdi), %k1 471 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[20],zmm2[20],zmm1[21],zmm2[21],zmm1[22],zmm2[22],zmm1[23],zmm2[23],zmm1[32],zmm2[32],zmm1[33],zmm2[33],zmm1[34],zmm2[34],zmm1[35],zmm2[35],zmm1[36],zmm2[36],zmm1[37],zmm2[37],zmm1[38],zmm2[38],zmm1[39],zmm2[39],zmm1[48],zmm2[48],zmm1[49],zmm2[49],zmm1[50],zmm2[50],zmm1[51],zmm2[51],zmm1[52],zmm2[52],zmm1[53],zmm2[53],zmm1[54],zmm2[54],zmm1[55],zmm2[55] 472 ; X64-NEXT: retq 473 %arg0 = bitcast <8 x i64> %a0 to <64 x i8> 474 %arg1 = bitcast i64* %a1 to <64 x i1>* 475 %sel1 = load <64 x i1>, <64 x i1>* %arg1 476 %arg2 = bitcast <8 x i64> %a2 to <64 x i8> 477 %arg3 = bitcast <8 x i64> %a3 to <64 x i8> 478 %res0 = shufflevector <64 x i8> %arg2, <64 x i8> %arg3, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119> 479 %res1 = select <64 x i1> %sel1, <64 x i8> %res0, <64 x i8> %arg0 480 %res2 = bitcast <64 x i8> %res1 to <8 x i64> 481 ret <8 x i64> %res2 482 } 483 484 define <8 x i64> @test_mm512_maskz_unpacklo_epi8(i64* %a0, <8 x i64> %a1, <8 x i64> %a2) { 485 ; X86-LABEL: test_mm512_maskz_unpacklo_epi8: 486 ; X86: # %bb.0: 487 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 488 ; X86-NEXT: kmovq (%eax), %k1 489 ; X86-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 490 ; X86-NEXT: retl 491 ; 492 ; X64-LABEL: test_mm512_maskz_unpacklo_epi8: 493 ; X64: # %bb.0: 494 ; X64-NEXT: kmovq (%rdi), %k1 495 ; X64-NEXT: vpunpcklbw {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] 496 ; X64-NEXT: retq 497 %arg0 = bitcast i64* %a0 to <64 x i1>* 498 %sel0 = load <64 x i1>, <64 x i1>* %arg0 499 %arg1 = bitcast <8 x i64> %a1 to <64 x i8> 500 %arg2 = bitcast <8 x i64> %a2 to <64 x i8> 501 %res0 = shufflevector <64 x i8> %arg1, <64 x i8> %arg2, <64 x i32> <i32 0, i32 64, i32 1, i32 65, i32 2, i32 66, i32 3, i32 67, i32 4, i32 68, i32 5, i32 69, i32 6, i32 70, i32 7, i32 71, i32 16, i32 80, i32 17, i32 81, i32 18, i32 82, i32 19, i32 83, i32 20, i32 84, i32 21, i32 85, i32 22, i32 86, i32 23, i32 87, i32 32, i32 96, i32 33, i32 97, i32 34, i32 98, i32 35, i32 99, i32 36, i32 100, i32 37, i32 101, i32 38, i32 102, i32 39, i32 103, i32 48, i32 112, i32 49, i32 113, i32 50, i32 114, i32 51, i32 115, i32 52, i32 116, i32 53, i32 117, i32 54, i32 118, i32 55, i32 119> 502 %res1 = select <64 x i1> %sel0, <64 x i8> %res0, <64 x i8> zeroinitializer 503 %res2 = bitcast <64 x i8> %res1 to <8 x i64> 504 ret <8 x i64> %res2 505 } 506 507 define <8 x i64> @test_mm512_unpacklo_epi16(<8 x i64> %a0, <8 x i64> %a1) { 508 ; CHECK-LABEL: test_mm512_unpacklo_epi16: 509 ; CHECK: # %bb.0: 510 ; CHECK-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 511 ; CHECK-NEXT: ret{{[l|q]}} 512 %arg0 = bitcast <8 x i64> %a0 to <32 x i16> 513 %arg1 = bitcast <8 x i64> %a1 to <32 x i16> 514 %res0 = shufflevector <32 x i16> %arg0, <32 x i16> %arg1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59> 515 %res1 = bitcast <32 x i16> %res0 to <8 x i64> 516 ret <8 x i64> %res1 517 } 518 519 define <8 x i64> @test_mm512_mask_unpacklo_epi16(<8 x i64> %a0, i32 %a1, <8 x i64> %a2, <8 x i64> %a3) { 520 ; X86-LABEL: test_mm512_mask_unpacklo_epi16: 521 ; X86: # %bb.0: 522 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 523 ; X86-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27] 524 ; X86-NEXT: retl 525 ; 526 ; X64-LABEL: test_mm512_mask_unpacklo_epi16: 527 ; X64: # %bb.0: 528 ; X64-NEXT: kmovd %edi, %k1 529 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[16],zmm2[16],zmm1[17],zmm2[17],zmm1[18],zmm2[18],zmm1[19],zmm2[19],zmm1[24],zmm2[24],zmm1[25],zmm2[25],zmm1[26],zmm2[26],zmm1[27],zmm2[27] 530 ; X64-NEXT: retq 531 %arg0 = bitcast <8 x i64> %a0 to <32 x i16> 532 %arg1 = bitcast i32 %a1 to <32 x i1> 533 %arg2 = bitcast <8 x i64> %a2 to <32 x i16> 534 %arg3 = bitcast <8 x i64> %a3 to <32 x i16> 535 %res0 = shufflevector <32 x i16> %arg2, <32 x i16> %arg3, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59> 536 %res1 = select <32 x i1> %arg1, <32 x i16> %res0, <32 x i16> %arg0 537 %res2 = bitcast <32 x i16> %res1 to <8 x i64> 538 ret <8 x i64> %res2 539 } 540 541 define <8 x i64> @test_mm512_maskz_unpacklo_epi16(i32 %a0, <8 x i64> %a1, <8 x i64> %a2) { 542 ; X86-LABEL: test_mm512_maskz_unpacklo_epi16: 543 ; X86: # %bb.0: 544 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 545 ; X86-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 546 ; X86-NEXT: retl 547 ; 548 ; X64-LABEL: test_mm512_maskz_unpacklo_epi16: 549 ; X64: # %bb.0: 550 ; X64-NEXT: kmovd %edi, %k1 551 ; X64-NEXT: vpunpcklwd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27] 552 ; X64-NEXT: retq 553 %arg0 = bitcast i32 %a0 to <32 x i1> 554 %arg1 = bitcast <8 x i64> %a1 to <32 x i16> 555 %arg2 = bitcast <8 x i64> %a2 to <32 x i16> 556 %res0 = shufflevector <32 x i16> %arg1, <32 x i16> %arg2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59> 557 %res1 = select <32 x i1> %arg0, <32 x i16> %res0, <32 x i16> zeroinitializer 558 %res2 = bitcast <32 x i16> %res1 to <8 x i64> 559 ret <8 x i64> %res2 560 } 561 562 define i64 @test_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) { 563 ; X86-LABEL: test_mm512_test_epi8_mask: 564 ; X86: # %bb.0: # %entry 565 ; X86-NEXT: vptestmb %zmm0, %zmm1, %k0 566 ; X86-NEXT: kshiftrq $32, %k0, %k1 567 ; X86-NEXT: kmovd %k0, %eax 568 ; X86-NEXT: kmovd %k1, %edx 569 ; X86-NEXT: vzeroupper 570 ; X86-NEXT: retl 571 ; 572 ; X64-LABEL: test_mm512_test_epi8_mask: 573 ; X64: # %bb.0: # %entry 574 ; X64-NEXT: vptestmb %zmm0, %zmm1, %k0 575 ; X64-NEXT: kmovq %k0, %rax 576 ; X64-NEXT: vzeroupper 577 ; X64-NEXT: retq 578 entry: 579 %and1.i.i = and <8 x i64> %__B, %__A 580 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8> 581 %1 = icmp ne <64 x i8> %0, zeroinitializer 582 %2 = bitcast <64 x i1> %1 to i64 583 ret i64 %2 584 } 585 586 define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) { 587 ; X86-LABEL: test_mm512_mask_test_epi8_mask: 588 ; X86: # %bb.0: # %entry 589 ; X86-NEXT: vptestmb %zmm0, %zmm1, %k0 590 ; X86-NEXT: kshiftrq $32, %k0, %k1 591 ; X86-NEXT: kmovd %k1, %edx 592 ; X86-NEXT: kmovd %k0, %eax 593 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 594 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 595 ; X86-NEXT: vzeroupper 596 ; X86-NEXT: retl 597 ; 598 ; X64-LABEL: test_mm512_mask_test_epi8_mask: 599 ; X64: # %bb.0: # %entry 600 ; X64-NEXT: kmovq %rdi, %k1 601 ; X64-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1} 602 ; X64-NEXT: kmovq %k0, %rax 603 ; X64-NEXT: vzeroupper 604 ; X64-NEXT: retq 605 entry: 606 %and1.i.i = and <8 x i64> %__B, %__A 607 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8> 608 %1 = icmp ne <64 x i8> %0, zeroinitializer 609 %2 = bitcast i64 %__U to <64 x i1> 610 %3 = and <64 x i1> %1, %2 611 %4 = bitcast <64 x i1> %3 to i64 612 ret i64 %4 613 } 614 615 define i32 @test_mm512_test_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) { 616 ; CHECK-LABEL: test_mm512_test_epi16_mask: 617 ; CHECK: # %bb.0: # %entry 618 ; CHECK-NEXT: vptestmw %zmm0, %zmm1, %k0 619 ; CHECK-NEXT: kmovd %k0, %eax 620 ; CHECK-NEXT: vzeroupper 621 ; CHECK-NEXT: ret{{[l|q]}} 622 entry: 623 %and1.i.i = and <8 x i64> %__B, %__A 624 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16> 625 %1 = icmp ne <32 x i16> %0, zeroinitializer 626 %2 = bitcast <32 x i1> %1 to i32 627 ret i32 %2 628 } 629 630 define i32 @test_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 631 ; X86-LABEL: test_mm512_mask_test_epi16_mask: 632 ; X86: # %bb.0: # %entry 633 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 634 ; X86-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1} 635 ; X86-NEXT: kmovd %k0, %eax 636 ; X86-NEXT: vzeroupper 637 ; X86-NEXT: retl 638 ; 639 ; X64-LABEL: test_mm512_mask_test_epi16_mask: 640 ; X64: # %bb.0: # %entry 641 ; X64-NEXT: kmovd %edi, %k1 642 ; X64-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1} 643 ; X64-NEXT: kmovd %k0, %eax 644 ; X64-NEXT: vzeroupper 645 ; X64-NEXT: retq 646 entry: 647 %and1.i.i = and <8 x i64> %__B, %__A 648 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16> 649 %1 = icmp ne <32 x i16> %0, zeroinitializer 650 %2 = bitcast i32 %__U to <32 x i1> 651 %3 = and <32 x i1> %1, %2 652 %4 = bitcast <32 x i1> %3 to i32 653 ret i32 %4 654 } 655 656 define i64 @test_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) { 657 ; X86-LABEL: test_mm512_testn_epi8_mask: 658 ; X86: # %bb.0: # %entry 659 ; X86-NEXT: vptestnmb %zmm0, %zmm1, %k0 660 ; X86-NEXT: kshiftrq $32, %k0, %k1 661 ; X86-NEXT: kmovd %k0, %eax 662 ; X86-NEXT: kmovd %k1, %edx 663 ; X86-NEXT: vzeroupper 664 ; X86-NEXT: retl 665 ; 666 ; X64-LABEL: test_mm512_testn_epi8_mask: 667 ; X64: # %bb.0: # %entry 668 ; X64-NEXT: vptestnmb %zmm0, %zmm1, %k0 669 ; X64-NEXT: kmovq %k0, %rax 670 ; X64-NEXT: vzeroupper 671 ; X64-NEXT: retq 672 entry: 673 %and1.i.i = and <8 x i64> %__B, %__A 674 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8> 675 %1 = icmp eq <64 x i8> %0, zeroinitializer 676 %2 = bitcast <64 x i1> %1 to i64 677 ret i64 %2 678 } 679 680 define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) { 681 ; X86-LABEL: test_mm512_mask_testn_epi8_mask: 682 ; X86: # %bb.0: # %entry 683 ; X86-NEXT: vptestnmb %zmm0, %zmm1, %k0 684 ; X86-NEXT: kshiftrq $32, %k0, %k1 685 ; X86-NEXT: kmovd %k1, %edx 686 ; X86-NEXT: kmovd %k0, %eax 687 ; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 688 ; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 689 ; X86-NEXT: vzeroupper 690 ; X86-NEXT: retl 691 ; 692 ; X64-LABEL: test_mm512_mask_testn_epi8_mask: 693 ; X64: # %bb.0: # %entry 694 ; X64-NEXT: kmovq %rdi, %k1 695 ; X64-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1} 696 ; X64-NEXT: kmovq %k0, %rax 697 ; X64-NEXT: vzeroupper 698 ; X64-NEXT: retq 699 entry: 700 %and1.i.i = and <8 x i64> %__B, %__A 701 %0 = bitcast <8 x i64> %and1.i.i to <64 x i8> 702 %1 = icmp eq <64 x i8> %0, zeroinitializer 703 %2 = bitcast i64 %__U to <64 x i1> 704 %3 = and <64 x i1> %1, %2 705 %4 = bitcast <64 x i1> %3 to i64 706 ret i64 %4 707 } 708 709 define i32 @test_mm512_testn_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) { 710 ; CHECK-LABEL: test_mm512_testn_epi16_mask: 711 ; CHECK: # %bb.0: # %entry 712 ; CHECK-NEXT: vptestnmw %zmm0, %zmm1, %k0 713 ; CHECK-NEXT: kmovd %k0, %eax 714 ; CHECK-NEXT: vzeroupper 715 ; CHECK-NEXT: ret{{[l|q]}} 716 entry: 717 %and1.i.i = and <8 x i64> %__B, %__A 718 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16> 719 %1 = icmp eq <32 x i16> %0, zeroinitializer 720 %2 = bitcast <32 x i1> %1 to i32 721 ret i32 %2 722 } 723 724 define i32 @test_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 725 ; X86-LABEL: test_mm512_mask_testn_epi16_mask: 726 ; X86: # %bb.0: # %entry 727 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 728 ; X86-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1} 729 ; X86-NEXT: kmovd %k0, %eax 730 ; X86-NEXT: vzeroupper 731 ; X86-NEXT: retl 732 ; 733 ; X64-LABEL: test_mm512_mask_testn_epi16_mask: 734 ; X64: # %bb.0: # %entry 735 ; X64-NEXT: kmovd %edi, %k1 736 ; X64-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1} 737 ; X64-NEXT: kmovd %k0, %eax 738 ; X64-NEXT: vzeroupper 739 ; X64-NEXT: retq 740 entry: 741 %and1.i.i = and <8 x i64> %__B, %__A 742 %0 = bitcast <8 x i64> %and1.i.i to <32 x i16> 743 %1 = icmp eq <32 x i16> %0, zeroinitializer 744 %2 = bitcast i32 %__U to <32 x i1> 745 %3 = and <32 x i1> %1, %2 746 %4 = bitcast <32 x i1> %3 to i32 747 ret i32 %4 748 } 749 750 define <4 x i64> @test_mm512_cvtepi16_epi8(<8 x i64> %__A) { 751 ; CHECK-LABEL: test_mm512_cvtepi16_epi8: 752 ; CHECK: # %bb.0: # %entry 753 ; CHECK-NEXT: vpmovwb %zmm0, %ymm0 754 ; CHECK-NEXT: ret{{[l|q]}} 755 entry: 756 %0 = bitcast <8 x i64> %__A to <32 x i16> 757 %conv.i = trunc <32 x i16> %0 to <32 x i8> 758 %1 = bitcast <32 x i8> %conv.i to <4 x i64> 759 ret <4 x i64> %1 760 } 761 762 define <4 x i64> @test_mm512_mask_cvtepi16_epi8(<4 x i64> %__O, i32 %__M, <8 x i64> %__A) { 763 ; X86-LABEL: test_mm512_mask_cvtepi16_epi8: 764 ; X86: # %bb.0: # %entry 765 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 766 ; X86-NEXT: vpmovwb %zmm1, %ymm0 {%k1} 767 ; X86-NEXT: retl 768 ; 769 ; X64-LABEL: test_mm512_mask_cvtepi16_epi8: 770 ; X64: # %bb.0: # %entry 771 ; X64-NEXT: kmovd %edi, %k1 772 ; X64-NEXT: vpmovwb %zmm1, %ymm0 {%k1} 773 ; X64-NEXT: retq 774 entry: 775 %0 = bitcast <8 x i64> %__A to <32 x i16> 776 %conv.i.i = trunc <32 x i16> %0 to <32 x i8> 777 %1 = bitcast <4 x i64> %__O to <32 x i8> 778 %2 = bitcast i32 %__M to <32 x i1> 779 %3 = select <32 x i1> %2, <32 x i8> %conv.i.i, <32 x i8> %1 780 %4 = bitcast <32 x i8> %3 to <4 x i64> 781 ret <4 x i64> %4 782 } 783 784 define <4 x i64> @test_mm512_maskz_cvtepi16_epi8(i32 %__M, <8 x i64> %__A) { 785 ; X86-LABEL: test_mm512_maskz_cvtepi16_epi8: 786 ; X86: # %bb.0: # %entry 787 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 788 ; X86-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} 789 ; X86-NEXT: retl 790 ; 791 ; X64-LABEL: test_mm512_maskz_cvtepi16_epi8: 792 ; X64: # %bb.0: # %entry 793 ; X64-NEXT: kmovd %edi, %k1 794 ; X64-NEXT: vpmovwb %zmm0, %ymm0 {%k1} {z} 795 ; X64-NEXT: retq 796 entry: 797 %0 = bitcast <8 x i64> %__A to <32 x i16> 798 %conv.i.i = trunc <32 x i16> %0 to <32 x i8> 799 %1 = bitcast i32 %__M to <32 x i1> 800 %2 = select <32 x i1> %1, <32 x i8> %conv.i.i, <32 x i8> zeroinitializer 801 %3 = bitcast <32 x i8> %2 to <4 x i64> 802 ret <4 x i64> %3 803 } 804 805 define <8 x i64> @test_mm512_mask2_permutex2var_epi16(<8 x i64> %__A, <8 x i64> %__I, i32 %__U, <8 x i64> %__B) { 806 ; X86-LABEL: test_mm512_mask2_permutex2var_epi16: 807 ; X86: # %bb.0: # %entry 808 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 809 ; X86-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} 810 ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 811 ; X86-NEXT: retl 812 ; 813 ; X64-LABEL: test_mm512_mask2_permutex2var_epi16: 814 ; X64: # %bb.0: # %entry 815 ; X64-NEXT: kmovd %edi, %k1 816 ; X64-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} 817 ; X64-NEXT: vmovdqa64 %zmm1, %zmm0 818 ; X64-NEXT: retq 819 entry: 820 %0 = bitcast <8 x i64> %__A to <32 x i16> 821 %1 = bitcast <8 x i64> %__I to <32 x i16> 822 %2 = bitcast <8 x i64> %__B to <32 x i16> 823 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 824 %4 = bitcast i32 %__U to <32 x i1> 825 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %1 826 %6 = bitcast <32 x i16> %5 to <8 x i64> 827 ret <8 x i64> %6 828 } 829 830 define <8 x i64> @test_mm512_permutex2var_epi16(<8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) { 831 ; CHECK-LABEL: test_mm512_permutex2var_epi16: 832 ; CHECK: # %bb.0: # %entry 833 ; CHECK-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 834 ; CHECK-NEXT: ret{{[l|q]}} 835 entry: 836 %0 = bitcast <8 x i64> %__A to <32 x i16> 837 %1 = bitcast <8 x i64> %__I to <32 x i16> 838 %2 = bitcast <8 x i64> %__B to <32 x i16> 839 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 840 %4 = bitcast <32 x i16> %3 to <8 x i64> 841 ret <8 x i64> %4 842 } 843 844 define <8 x i64> @test_mm512_mask_permutex2var_epi16(<8 x i64> %__A, i32 %__U, <8 x i64> %__I, <8 x i64> %__B) { 845 ; X86-LABEL: test_mm512_mask_permutex2var_epi16: 846 ; X86: # %bb.0: # %entry 847 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 848 ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1} 849 ; X86-NEXT: retl 850 ; 851 ; X64-LABEL: test_mm512_mask_permutex2var_epi16: 852 ; X64: # %bb.0: # %entry 853 ; X64-NEXT: kmovd %edi, %k1 854 ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1} 855 ; X64-NEXT: retq 856 entry: 857 %0 = bitcast <8 x i64> %__A to <32 x i16> 858 %1 = bitcast <8 x i64> %__I to <32 x i16> 859 %2 = bitcast <8 x i64> %__B to <32 x i16> 860 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 861 %4 = bitcast i32 %__U to <32 x i1> 862 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0 863 %6 = bitcast <32 x i16> %5 to <8 x i64> 864 ret <8 x i64> %6 865 } 866 867 define <8 x i64> @test_mm512_maskz_permutex2var_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) { 868 ; X86-LABEL: test_mm512_maskz_permutex2var_epi16: 869 ; X86: # %bb.0: # %entry 870 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 871 ; X86-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1} {z} 872 ; X86-NEXT: retl 873 ; 874 ; X64-LABEL: test_mm512_maskz_permutex2var_epi16: 875 ; X64: # %bb.0: # %entry 876 ; X64-NEXT: kmovd %edi, %k1 877 ; X64-NEXT: vpermt2w %zmm2, %zmm1, %zmm0 {%k1} {z} 878 ; X64-NEXT: retq 879 entry: 880 %0 = bitcast <8 x i64> %__A to <32 x i16> 881 %1 = bitcast <8 x i64> %__I to <32 x i16> 882 %2 = bitcast <8 x i64> %__B to <32 x i16> 883 %3 = tail call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2) 884 %4 = bitcast i32 %__U to <32 x i1> 885 %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer 886 %6 = bitcast <32 x i16> %5 to <8 x i64> 887 ret <8 x i64> %6 888 } 889 890 declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>) 891 892 !0 = !{i32 1} 893 894