1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlvbmi-builtins.c 6 7 define <2 x i64> @test_mm_mask2_permutex2var_epi8(<2 x i64> %__A, <2 x i64> %__I, i16 zeroext %__U, <2 x i64> %__B) { 8 ; X86-LABEL: test_mm_mask2_permutex2var_epi8: 9 ; X86: # %bb.0: # %entry 10 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 11 ; X86-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} 12 ; X86-NEXT: vmovdqa %xmm1, %xmm0 13 ; X86-NEXT: retl 14 ; 15 ; X64-LABEL: test_mm_mask2_permutex2var_epi8: 16 ; X64: # %bb.0: # %entry 17 ; X64-NEXT: kmovd %edi, %k1 18 ; X64-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} 19 ; X64-NEXT: vmovdqa %xmm1, %xmm0 20 ; X64-NEXT: retq 21 entry: 22 %0 = bitcast <2 x i64> %__A to <16 x i8> 23 %1 = bitcast <2 x i64> %__I to <16 x i8> 24 %2 = bitcast <2 x i64> %__B to <16 x i8> 25 %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) 26 %4 = bitcast i16 %__U to <16 x i1> 27 %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> %1 28 %6 = bitcast <16 x i8> %5 to <2 x i64> 29 ret <2 x i64> %6 30 } 31 32 define <4 x i64> @test_mm256_mask2_permutex2var_epi8(<4 x i64> %__A, <4 x i64> %__I, i32 %__U, <4 x i64> %__B) { 33 ; X86-LABEL: test_mm256_mask2_permutex2var_epi8: 34 ; X86: # %bb.0: # %entry 35 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 36 ; X86-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} 37 ; X86-NEXT: vmovdqa %ymm1, %ymm0 38 ; X86-NEXT: retl 39 ; 40 ; X64-LABEL: test_mm256_mask2_permutex2var_epi8: 41 ; X64: # %bb.0: # %entry 42 ; X64-NEXT: kmovd %edi, %k1 43 ; X64-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} 44 ; X64-NEXT: vmovdqa %ymm1, %ymm0 45 ; X64-NEXT: retq 46 entry: 47 %0 = bitcast <4 x i64> %__A to <32 x i8> 48 %1 = bitcast <4 x i64> %__I to <32 x i8> 49 %2 = bitcast <4 x i64> %__B to <32 x i8> 50 %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2) 51 %4 = bitcast i32 %__U to <32 x i1> 52 %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> %1 53 %6 = bitcast <32 x i8> %5 to <4 x i64> 54 ret <4 x i64> %6 55 } 56 57 define <2 x i64> @test_mm_permutex2var_epi8(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) { 58 ; CHECK-LABEL: test_mm_permutex2var_epi8: 59 ; CHECK: # %bb.0: # %entry 60 ; CHECK-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 61 ; CHECK-NEXT: ret{{[l|q]}} 62 entry: 63 %0 = bitcast <2 x i64> %__A to <16 x i8> 64 %1 = bitcast <2 x i64> %__I to <16 x i8> 65 %2 = bitcast <2 x i64> %__B to <16 x i8> 66 %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) 67 %4 = bitcast <16 x i8> %3 to <2 x i64> 68 ret <2 x i64> %4 69 } 70 71 define <2 x i64> @test_mm_mask_permutex2var_epi8(<2 x i64> %__A, i16 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) { 72 ; X86-LABEL: test_mm_mask_permutex2var_epi8: 73 ; X86: # %bb.0: # %entry 74 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 75 ; X86-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 {%k1} 76 ; X86-NEXT: retl 77 ; 78 ; X64-LABEL: test_mm_mask_permutex2var_epi8: 79 ; X64: # %bb.0: # %entry 80 ; X64-NEXT: kmovd %edi, %k1 81 ; X64-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 {%k1} 82 ; X64-NEXT: retq 83 entry: 84 %0 = bitcast <2 x i64> %__A to <16 x i8> 85 %1 = bitcast <2 x i64> %__I to <16 x i8> 86 %2 = bitcast <2 x i64> %__B to <16 x i8> 87 %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) 88 %4 = bitcast i16 %__U to <16 x i1> 89 %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> %0 90 %6 = bitcast <16 x i8> %5 to <2 x i64> 91 ret <2 x i64> %6 92 } 93 94 define <2 x i64> @test_mm_maskz_permutex2var_epi8(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) { 95 ; X86-LABEL: test_mm_maskz_permutex2var_epi8: 96 ; X86: # %bb.0: # %entry 97 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 98 ; X86-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 {%k1} {z} 99 ; X86-NEXT: retl 100 ; 101 ; X64-LABEL: test_mm_maskz_permutex2var_epi8: 102 ; X64: # %bb.0: # %entry 103 ; X64-NEXT: kmovd %edi, %k1 104 ; X64-NEXT: vpermt2b %xmm2, %xmm1, %xmm0 {%k1} {z} 105 ; X64-NEXT: retq 106 entry: 107 %0 = bitcast <2 x i64> %__A to <16 x i8> 108 %1 = bitcast <2 x i64> %__I to <16 x i8> 109 %2 = bitcast <2 x i64> %__B to <16 x i8> 110 %3 = tail call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) 111 %4 = bitcast i16 %__U to <16 x i1> 112 %5 = select <16 x i1> %4, <16 x i8> %3, <16 x i8> zeroinitializer 113 %6 = bitcast <16 x i8> %5 to <2 x i64> 114 ret <2 x i64> %6 115 } 116 117 define <4 x i64> @test_mm256_permutex2var_epi8(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) { 118 ; CHECK-LABEL: test_mm256_permutex2var_epi8: 119 ; CHECK: # %bb.0: # %entry 120 ; CHECK-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 121 ; CHECK-NEXT: ret{{[l|q]}} 122 entry: 123 %0 = bitcast <4 x i64> %__A to <32 x i8> 124 %1 = bitcast <4 x i64> %__I to <32 x i8> 125 %2 = bitcast <4 x i64> %__B to <32 x i8> 126 %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2) 127 %4 = bitcast <32 x i8> %3 to <4 x i64> 128 ret <4 x i64> %4 129 } 130 131 define <4 x i64> @test_mm256_mask_permutex2var_epi8(<4 x i64> %__A, i32 %__U, <4 x i64> %__I, <4 x i64> %__B) { 132 ; X86-LABEL: test_mm256_mask_permutex2var_epi8: 133 ; X86: # %bb.0: # %entry 134 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 135 ; X86-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 {%k1} 136 ; X86-NEXT: retl 137 ; 138 ; X64-LABEL: test_mm256_mask_permutex2var_epi8: 139 ; X64: # %bb.0: # %entry 140 ; X64-NEXT: kmovd %edi, %k1 141 ; X64-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 {%k1} 142 ; X64-NEXT: retq 143 entry: 144 %0 = bitcast <4 x i64> %__A to <32 x i8> 145 %1 = bitcast <4 x i64> %__I to <32 x i8> 146 %2 = bitcast <4 x i64> %__B to <32 x i8> 147 %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2) 148 %4 = bitcast i32 %__U to <32 x i1> 149 %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> %0 150 %6 = bitcast <32 x i8> %5 to <4 x i64> 151 ret <4 x i64> %6 152 } 153 154 define <4 x i64> @test_mm256_maskz_permutex2var_epi8(i32 %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) { 155 ; X86-LABEL: test_mm256_maskz_permutex2var_epi8: 156 ; X86: # %bb.0: # %entry 157 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 158 ; X86-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 {%k1} {z} 159 ; X86-NEXT: retl 160 ; 161 ; X64-LABEL: test_mm256_maskz_permutex2var_epi8: 162 ; X64: # %bb.0: # %entry 163 ; X64-NEXT: kmovd %edi, %k1 164 ; X64-NEXT: vpermt2b %ymm2, %ymm1, %ymm0 {%k1} {z} 165 ; X64-NEXT: retq 166 entry: 167 %0 = bitcast <4 x i64> %__A to <32 x i8> 168 %1 = bitcast <4 x i64> %__I to <32 x i8> 169 %2 = bitcast <4 x i64> %__B to <32 x i8> 170 %3 = tail call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %0, <32 x i8> %1, <32 x i8> %2) 171 %4 = bitcast i32 %__U to <32 x i1> 172 %5 = select <32 x i1> %4, <32 x i8> %3, <32 x i8> zeroinitializer 173 %6 = bitcast <32 x i8> %5 to <4 x i64> 174 ret <4 x i64> %6 175 } 176 177 declare <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>) 178 declare <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>) 179