1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX256 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F 6 7 define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) { 8 ; AVX256-LABEL: testv8i1_sext_v8i16: 9 ; AVX256: # %bb.0: 10 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 11 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 12 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 13 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 14 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0 15 ; AVX256-NEXT: vzeroupper 16 ; AVX256-NEXT: retq 17 ; 18 ; AVX512VL-LABEL: testv8i1_sext_v8i16: 19 ; AVX512VL: # %bb.0: 20 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 21 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 22 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 23 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 24 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 25 ; AVX512VL-NEXT: vzeroupper 26 ; AVX512VL-NEXT: retq 27 ; 28 ; AVX512F-LABEL: testv8i1_sext_v8i16: 29 ; AVX512F: # %bb.0: 30 ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 31 ; AVX512F-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 32 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 33 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 34 ; AVX512F-NEXT: vzeroupper 35 ; AVX512F-NEXT: retq 36 %in = load <8 x i32>, <8 x i32>* %p 37 %cmp = icmp eq <8 x i32> %in, zeroinitializer 38 %ext = sext <8 x i1> %cmp to <8 x i16> 39 ret <8 x i16> %ext 40 } 41 42 define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) { 43 ; AVX256-LABEL: testv16i1_sext_v16i8: 44 ; AVX256: # %bb.0: 45 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 46 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 47 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2 48 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 49 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z} 50 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1 51 ; AVX256-NEXT: vpacksswb %xmm0, %xmm1, %xmm1 52 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 53 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0 54 ; AVX256-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 55 ; AVX256-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 56 ; AVX256-NEXT: vzeroupper 57 ; AVX256-NEXT: retq 58 ; 59 ; AVX512VL-LABEL: testv16i1_sext_v16i8: 60 ; AVX512VL: # %bb.0: 61 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 62 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 63 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1 64 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 65 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 66 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 67 ; AVX512VL-NEXT: vzeroupper 68 ; AVX512VL-NEXT: retq 69 ; 70 ; AVX512F-LABEL: testv16i1_sext_v16i8: 71 ; AVX512F: # %bb.0: 72 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 73 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 74 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 75 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 76 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 77 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 78 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 79 ; AVX512F-NEXT: vzeroupper 80 ; AVX512F-NEXT: retq 81 %in = load <8 x i32>, <8 x i32>* %p 82 %cmp = icmp eq <8 x i32> %in, zeroinitializer 83 %in2 = load <8 x i32>, <8 x i32>* %q 84 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer 85 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 86 %ext = sext <16 x i1> %concat to <16 x i8> 87 ret <16 x i8> %ext 88 } 89 90 define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) { 91 ; AVX256-LABEL: testv16i1_sext_v16i16: 92 ; AVX256: # %bb.0: 93 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 94 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 95 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2 96 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 97 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} 98 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1 99 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z} 100 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0 101 ; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 102 ; AVX256-NEXT: retq 103 ; 104 ; AVX512VL-LABEL: testv16i1_sext_v16i16: 105 ; AVX512VL: # %bb.0: 106 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 107 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 108 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1 109 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 110 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 111 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 112 ; AVX512VL-NEXT: retq 113 ; 114 ; AVX512F-LABEL: testv16i1_sext_v16i16: 115 ; AVX512F: # %bb.0: 116 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 117 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 118 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 119 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 120 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 121 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 122 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 123 ; AVX512F-NEXT: retq 124 %in = load <8 x i32>, <8 x i32>* %p 125 %cmp = icmp eq <8 x i32> %in, zeroinitializer 126 %in2 = load <8 x i32>, <8 x i32>* %q 127 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer 128 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 129 %ext = sext <16 x i1> %concat to <16 x i16> 130 ret <16 x i16> %ext 131 } 132 133 define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) { 134 ; AVX256-LABEL: testv8i1_zext_v8i16: 135 ; AVX256: # %bb.0: 136 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 137 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 138 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 139 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 140 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0 141 ; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0 142 ; AVX256-NEXT: vzeroupper 143 ; AVX256-NEXT: retq 144 ; 145 ; AVX512VL-LABEL: testv8i1_zext_v8i16: 146 ; AVX512VL: # %bb.0: 147 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 148 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 149 ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 150 ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 151 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 152 ; AVX512VL-NEXT: vpsrlw $15, %xmm0, %xmm0 153 ; AVX512VL-NEXT: vzeroupper 154 ; AVX512VL-NEXT: retq 155 ; 156 ; AVX512F-LABEL: testv8i1_zext_v8i16: 157 ; AVX512F: # %bb.0: 158 ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 159 ; AVX512F-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 160 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 161 ; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0 162 ; AVX512F-NEXT: vzeroupper 163 ; AVX512F-NEXT: retq 164 %in = load <8 x i32>, <8 x i32>* %p 165 %cmp = icmp eq <8 x i32> %in, zeroinitializer 166 %ext = zext <8 x i1> %cmp to <8 x i16> 167 ret <8 x i16> %ext 168 } 169 170 define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) { 171 ; AVX256-LABEL: testv16i1_zext_v16i8: 172 ; AVX256: # %bb.0: 173 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 174 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 175 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2 176 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 177 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z} 178 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1 179 ; AVX256-NEXT: vpsrlw $15, %xmm1, %xmm1 180 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 181 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0 182 ; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0 183 ; AVX256-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 184 ; AVX256-NEXT: vzeroupper 185 ; AVX256-NEXT: retq 186 ; 187 ; AVX512VL-LABEL: testv16i1_zext_v16i8: 188 ; AVX512VL: # %bb.0: 189 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 190 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 191 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1 192 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 193 ; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 194 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 195 ; AVX512VL-NEXT: vzeroupper 196 ; AVX512VL-NEXT: retq 197 ; 198 ; AVX512F-LABEL: testv16i1_zext_v16i8: 199 ; AVX512F: # %bb.0: 200 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 201 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 202 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 203 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 204 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 205 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} 206 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 207 ; AVX512F-NEXT: vzeroupper 208 ; AVX512F-NEXT: retq 209 %in = load <8 x i32>, <8 x i32>* %p 210 %cmp = icmp eq <8 x i32> %in, zeroinitializer 211 %in2 = load <8 x i32>, <8 x i32>* %q 212 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer 213 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 214 %ext = zext <16 x i1> %concat to <16 x i8> 215 ret <16 x i8> %ext 216 } 217 218 define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) { 219 ; AVX256-LABEL: testv16i1_zext_v16i16: 220 ; AVX256: # %bb.0: 221 ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 222 ; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1 223 ; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2 224 ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 225 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z} 226 ; AVX256-NEXT: vpmovdw %ymm1, %xmm1 227 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z} 228 ; AVX256-NEXT: vpmovdw %ymm0, %xmm0 229 ; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 230 ; AVX256-NEXT: vpsrlw $15, %ymm0, %ymm0 231 ; AVX256-NEXT: retq 232 ; 233 ; AVX512VL-LABEL: testv16i1_zext_v16i16: 234 ; AVX512VL: # %bb.0: 235 ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 236 ; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 237 ; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1 238 ; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1 239 ; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 240 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 241 ; AVX512VL-NEXT: vpsrlw $15, %ymm0, %ymm0 242 ; AVX512VL-NEXT: retq 243 ; 244 ; AVX512F-LABEL: testv16i1_zext_v16i16: 245 ; AVX512F: # %bb.0: 246 ; AVX512F-NEXT: vmovdqa (%rdi), %ymm0 247 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 248 ; AVX512F-NEXT: vmovdqa (%rsi), %ymm0 249 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 250 ; AVX512F-NEXT: kunpckbw %k0, %k1, %k1 251 ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 252 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 253 ; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0 254 ; AVX512F-NEXT: retq 255 %in = load <8 x i32>, <8 x i32>* %p 256 %cmp = icmp eq <8 x i32> %in, zeroinitializer 257 %in2 = load <8 x i32>, <8 x i32>* %q 258 %cmp2 = icmp eq <8 x i32> %in2, zeroinitializer 259 %concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 260 %ext = zext <16 x i1> %concat to <16 x i16> 261 ret <16 x i16> %ext 262 } 263