1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64 4 5 declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>) 6 7 define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 8 ; X86-LABEL: test_int_x86_avx512_mask_permvar_qi_128: 9 ; X86: # %bb.0: 10 ; X86-NEXT: vpermb %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8] 11 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 12 ; X86-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0] 13 ; X86-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0] 14 ; X86-NEXT: vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3] 15 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 16 ; X86-NEXT: retl # encoding: [0xc3] 17 ; 18 ; X64-LABEL: test_int_x86_avx512_mask_permvar_qi_128: 19 ; X64: # %bb.0: 20 ; X64-NEXT: vpermb %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x8d,0xd8] 21 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 22 ; X64-NEXT: vpermb %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0] 23 ; X64-NEXT: vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0] 24 ; X64-NEXT: vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3] 25 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 26 ; X64-NEXT: retq # encoding: [0xc3] 27 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1) 28 %2 = bitcast i16 %x3 to <16 x i1> 29 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x2 30 %4 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1) 31 %5 = bitcast i16 %x3 to <16 x i1> 32 %6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer 33 %7 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1) 34 %res3 = add <16 x i8> %3, %6 35 %res4 = add <16 x i8> %res3, %7 36 ret <16 x i8> %res4 37 } 38 39 declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>) 40 41 define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 42 ; X86-LABEL: test_int_x86_avx512_mask_permvar_qi_256: 43 ; X86: # %bb.0: 44 ; X86-NEXT: vpermb %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8] 45 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 46 ; X86-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0] 47 ; X86-NEXT: vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0] 48 ; X86-NEXT: vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3] 49 ; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] 50 ; X86-NEXT: retl # encoding: [0xc3] 51 ; 52 ; X64-LABEL: test_int_x86_avx512_mask_permvar_qi_256: 53 ; X64: # %bb.0: 54 ; X64-NEXT: vpermb %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x8d,0xd8] 55 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 56 ; X64-NEXT: vpermb %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0] 57 ; X64-NEXT: vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0] 58 ; X64-NEXT: vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3] 59 ; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] 60 ; X64-NEXT: retq # encoding: [0xc3] 61 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1) 62 %2 = bitcast i32 %x3 to <32 x i1> 63 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x2 64 %4 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1) 65 %5 = bitcast i32 %x3 to <32 x i1> 66 %6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer 67 %7 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1) 68 %res3 = add <32 x i8> %3, %6 69 %res4 = add <32 x i8> %res3, %7 70 ret <32 x i8> %res4 71 } 72 73 declare <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) 74 75 define <16 x i8>@test_int_x86_avx512_mask_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 76 ; X86-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128: 77 ; X86: # %bb.0: 78 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 79 ; X86-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1] 80 ; X86-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x83,0xd9] 81 ; X86-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x83,0xc1] 82 ; X86-NEXT: vpaddb %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfc,0xc0] 83 ; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 84 ; X86-NEXT: retl # encoding: [0xc3] 85 ; 86 ; X64-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128: 87 ; X64: # %bb.0: 88 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 89 ; X64-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x83,0xd9] 90 ; X64-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1] 91 ; X64-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x83,0xc1] 92 ; X64-NEXT: vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3] 93 ; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] 94 ; X64-NEXT: retq # encoding: [0xc3] 95 %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) 96 %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) 97 %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) 98 %res3 = add <16 x i8> %res, %res1 99 %res4 = add <16 x i8> %res3, %res2 100 ret <16 x i8> %res4 101 } 102 103 declare <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) 104 105 define <32 x i8>@test_int_x86_avx512_mask_pmultishift_qb_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 106 ; X86-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256: 107 ; X86: # %bb.0: 108 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 109 ; X86-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1] 110 ; X86-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xd9] 111 ; X86-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x83,0xc1] 112 ; X86-NEXT: vpaddb %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0] 113 ; X86-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] 114 ; X86-NEXT: retl # encoding: [0xc3] 115 ; 116 ; X64-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256: 117 ; X64: # %bb.0: 118 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 119 ; X64-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1] 120 ; X64-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xd9] 121 ; X64-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x83,0xc1] 122 ; X64-NEXT: vpaddb %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfc,0xc0] 123 ; X64-NEXT: vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] 124 ; X64-NEXT: retq # encoding: [0xc3] 125 %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) 126 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) 127 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) 128 %res3 = add <32 x i8> %res, %res1 129 %res4 = add <32 x i8> %res3, %res2 130 ret <32 x i8> %res4 131 } 132 133 declare <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>) 134 135 define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 136 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128: 137 ; X86: # %bb.0: 138 ; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 139 ; X86-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda] 140 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 141 ; X86-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca] 142 ; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] 143 ; X86-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2] 144 ; X86-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3] 145 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 146 ; X86-NEXT: retl # encoding: [0xc3] 147 ; 148 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128: 149 ; X64: # %bb.0: 150 ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] 151 ; X64-NEXT: vpermt2b %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7d,0xda] 152 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 153 ; X64-NEXT: vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca] 154 ; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] 155 ; X64-NEXT: vpermi2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x75,0xe2] 156 ; X64-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3] 157 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 158 ; X64-NEXT: retq # encoding: [0xc3] 159 %1 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) 160 %2 = bitcast i16 %x3 to <16 x i1> 161 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1 162 %4 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> zeroinitializer, <16 x i8> %x2) 163 %5 = bitcast i16 %x3 to <16 x i1> 164 %6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer 165 %7 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) 166 %res3 = add <16 x i8> %3, %6 167 %res4 = add <16 x i8> %res3, %7 168 ret <16 x i8> %res4 169 } 170 171 declare <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>) 172 173 define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 174 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256: 175 ; X86: # %bb.0: 176 ; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 177 ; X86-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda] 178 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 179 ; X86-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca] 180 ; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] 181 ; X86-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2] 182 ; X86-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3] 183 ; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 184 ; X86-NEXT: retl # encoding: [0xc3] 185 ; 186 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256: 187 ; X64: # %bb.0: 188 ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] 189 ; X64-NEXT: vpermt2b %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7d,0xda] 190 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 191 ; X64-NEXT: vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca] 192 ; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] 193 ; X64-NEXT: vpermi2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x75,0xe2] 194 ; X64-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3] 195 ; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 196 ; X64-NEXT: retq # encoding: [0xc3] 197 %1 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) 198 %2 = bitcast i32 %x3 to <32 x i1> 199 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1 200 %4 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> zeroinitializer, <32 x i8> %x2) 201 %5 = bitcast i32 %x3 to <32 x i1> 202 %6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer 203 %7 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) 204 %res3 = add <32 x i8> %3, %6 205 %res4 = add <32 x i8> %res3, %7 206 ret <32 x i8> %res4 207 } 208 209 define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 210 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128: 211 ; X86: # %bb.0: 212 ; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 213 ; X86-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xda] 214 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 215 ; X86-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca] 216 ; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] 217 ; X86-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2] 218 ; X86-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3] 219 ; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 220 ; X86-NEXT: retl # encoding: [0xc3] 221 ; 222 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128: 223 ; X64: # %bb.0: 224 ; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] 225 ; X64-NEXT: vpermt2b %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7d,0xda] 226 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 227 ; X64-NEXT: vpermt2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca] 228 ; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] 229 ; X64-NEXT: vpermt2b %xmm2, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7d,0xe2] 230 ; X64-NEXT: vpaddb %xmm3, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xfc,0xc3] 231 ; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] 232 ; X64-NEXT: retq # encoding: [0xc3] 233 %1 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2) 234 %2 = bitcast i16 %x3 to <16 x i1> 235 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %x1 236 %4 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> zeroinitializer, <16 x i8> %x0, <16 x i8> %x2) 237 %5 = bitcast i16 %x3 to <16 x i1> 238 %6 = select <16 x i1> %5, <16 x i8> %4, <16 x i8> zeroinitializer 239 %7 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2) 240 %res3 = add <16 x i8> %3, %6 241 %res4 = add <16 x i8> %res3, %7 242 ret <16 x i8> %res4 243 } 244 245 define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 246 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256: 247 ; X86: # %bb.0: 248 ; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 249 ; X86-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xda] 250 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 251 ; X86-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca] 252 ; X86-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] 253 ; X86-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2] 254 ; X86-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3] 255 ; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 256 ; X86-NEXT: retl # encoding: [0xc3] 257 ; 258 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256: 259 ; X64: # %bb.0: 260 ; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] 261 ; X64-NEXT: vpermt2b %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7d,0xda] 262 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 263 ; X64-NEXT: vpermt2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca] 264 ; X64-NEXT: vpxor %xmm4, %xmm4, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xe4] 265 ; X64-NEXT: vpermt2b %ymm2, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7d,0xe2] 266 ; X64-NEXT: vpaddb %ymm3, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xfc,0xc3] 267 ; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] 268 ; X64-NEXT: retq # encoding: [0xc3] 269 %1 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2) 270 %2 = bitcast i32 %x3 to <32 x i1> 271 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %x1 272 %4 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> zeroinitializer, <32 x i8> %x0, <32 x i8> %x2) 273 %5 = bitcast i32 %x3 to <32 x i1> 274 %6 = select <32 x i1> %5, <32 x i8> %4, <32 x i8> zeroinitializer 275 %7 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2) 276 %res3 = add <32 x i8> %3, %6 277 %res4 = add <32 x i8> %res3, %7 278 ret <32 x i8> %res4 279 } 280 281 define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { 282 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128: 283 ; X86: # %bb.0: 284 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] 285 ; X86-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2] 286 ; X86-NEXT: retl # encoding: [0xc3] 287 ; 288 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128: 289 ; X64: # %bb.0: 290 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 291 ; X64-NEXT: vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2] 292 ; X64-NEXT: retq # encoding: [0xc3] 293 %1 = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> %x1, <16 x i8> %x0, <16 x i8> %x2) 294 %2 = bitcast i16 %x3 to <16 x i1> 295 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer 296 ret <16 x i8> %3 297 } 298 299 define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { 300 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256: 301 ; X86: # %bb.0: 302 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] 303 ; X86-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2] 304 ; X86-NEXT: retl # encoding: [0xc3] 305 ; 306 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256: 307 ; X64: # %bb.0: 308 ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] 309 ; X64-NEXT: vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2] 310 ; X64-NEXT: retq # encoding: [0xc3] 311 %1 = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> %x1, <32 x i8> %x0, <32 x i8> %x2) 312 %2 = bitcast i32 %x3 to <32 x i1> 313 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer 314 ret <32 x i8> %3 315 } 316