1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ 9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 10 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512DQVL 11 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512BWVL 12 ; 13 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts. 14 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2 15 16 ; 17 ; Variable Shifts 18 ; 19 20 define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { 21 ; SSE2-LABEL: var_shift_v2i64: 22 ; SSE2: # %bb.0: 23 ; SSE2-NEXT: movdqa %xmm0, %xmm2 24 ; SSE2-NEXT: psrlq %xmm1, %xmm2 25 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 26 ; SSE2-NEXT: psrlq %xmm1, %xmm0 27 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 28 ; SSE2-NEXT: retq 29 ; 30 ; SSE41-LABEL: var_shift_v2i64: 31 ; SSE41: # %bb.0: 32 ; SSE41-NEXT: movdqa %xmm0, %xmm2 33 ; SSE41-NEXT: psrlq %xmm1, %xmm2 34 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 35 ; SSE41-NEXT: psrlq %xmm1, %xmm0 36 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] 37 ; SSE41-NEXT: retq 38 ; 39 ; AVX1-LABEL: var_shift_v2i64: 40 ; AVX1: # %bb.0: 41 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm2 42 ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 43 ; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 44 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] 45 ; AVX1-NEXT: retq 46 ; 47 ; AVX2-LABEL: var_shift_v2i64: 48 ; AVX2: # %bb.0: 49 ; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 50 ; AVX2-NEXT: retq 51 ; 52 ; XOPAVX1-LABEL: var_shift_v2i64: 53 ; XOPAVX1: # %bb.0: 54 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 55 ; XOPAVX1-NEXT: vpsubq %xmm1, %xmm2, %xmm1 56 ; XOPAVX1-NEXT: vpshlq %xmm1, %xmm0, %xmm0 57 ; XOPAVX1-NEXT: retq 58 ; 59 ; XOPAVX2-LABEL: var_shift_v2i64: 60 ; XOPAVX2: # %bb.0: 61 ; XOPAVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 62 ; XOPAVX2-NEXT: retq 63 ; 64 ; AVX512-LABEL: var_shift_v2i64: 65 ; AVX512: # %bb.0: 66 ; AVX512-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 67 ; AVX512-NEXT: retq 68 ; 69 ; AVX512VL-LABEL: var_shift_v2i64: 70 ; AVX512VL: # %bb.0: 71 ; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 72 ; AVX512VL-NEXT: retq 73 ; 74 ; X32-SSE-LABEL: var_shift_v2i64: 75 ; X32-SSE: # %bb.0: 76 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2 77 ; X32-SSE-NEXT: psrlq %xmm1, %xmm2 78 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 79 ; X32-SSE-NEXT: psrlq %xmm1, %xmm0 80 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 81 ; X32-SSE-NEXT: retl 82 %shift = lshr <2 x i64> %a, %b 83 ret <2 x i64> %shift 84 } 85 86 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 87 ; SSE2-LABEL: var_shift_v4i32: 88 ; SSE2: # %bb.0: 89 ; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] 90 ; SSE2-NEXT: movdqa %xmm0, %xmm3 91 ; SSE2-NEXT: psrld %xmm2, %xmm3 92 ; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7] 93 ; SSE2-NEXT: movdqa %xmm0, %xmm2 94 ; SSE2-NEXT: psrld %xmm4, %xmm2 95 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 96 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 97 ; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7] 98 ; SSE2-NEXT: movdqa %xmm0, %xmm4 99 ; SSE2-NEXT: psrld %xmm3, %xmm4 100 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] 101 ; SSE2-NEXT: psrld %xmm1, %xmm0 102 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1] 103 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3] 104 ; SSE2-NEXT: movaps %xmm2, %xmm0 105 ; SSE2-NEXT: retq 106 ; 107 ; SSE41-LABEL: var_shift_v4i32: 108 ; SSE41: # %bb.0: 109 ; SSE41-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] 110 ; SSE41-NEXT: movdqa %xmm0, %xmm3 111 ; SSE41-NEXT: psrld %xmm2, %xmm3 112 ; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] 113 ; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm2[2,3,3,3,4,5,6,7] 114 ; SSE41-NEXT: movdqa %xmm0, %xmm5 115 ; SSE41-NEXT: psrld %xmm4, %xmm5 116 ; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm3[0,1,2,3],xmm5[4,5,6,7] 117 ; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] 118 ; SSE41-NEXT: movdqa %xmm0, %xmm3 119 ; SSE41-NEXT: psrld %xmm1, %xmm3 120 ; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm2[0,1,1,1,4,5,6,7] 121 ; SSE41-NEXT: psrld %xmm1, %xmm0 122 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] 123 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7] 124 ; SSE41-NEXT: retq 125 ; 126 ; AVX1-LABEL: var_shift_v4i32: 127 ; AVX1: # %bb.0: 128 ; AVX1-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 129 ; AVX1-NEXT: vpsrld %xmm2, %xmm0, %xmm2 130 ; AVX1-NEXT: vpsrlq $32, %xmm1, %xmm3 131 ; AVX1-NEXT: vpsrld %xmm3, %xmm0, %xmm3 132 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] 133 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 134 ; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm3 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 135 ; AVX1-NEXT: vpsrld %xmm3, %xmm0, %xmm3 136 ; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 137 ; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 138 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] 139 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 140 ; AVX1-NEXT: retq 141 ; 142 ; AVX2-LABEL: var_shift_v4i32: 143 ; AVX2: # %bb.0: 144 ; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 145 ; AVX2-NEXT: retq 146 ; 147 ; XOPAVX1-LABEL: var_shift_v4i32: 148 ; XOPAVX1: # %bb.0: 149 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 150 ; XOPAVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 151 ; XOPAVX1-NEXT: vpshld %xmm1, %xmm0, %xmm0 152 ; XOPAVX1-NEXT: retq 153 ; 154 ; XOPAVX2-LABEL: var_shift_v4i32: 155 ; XOPAVX2: # %bb.0: 156 ; XOPAVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 157 ; XOPAVX2-NEXT: retq 158 ; 159 ; AVX512-LABEL: var_shift_v4i32: 160 ; AVX512: # %bb.0: 161 ; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 162 ; AVX512-NEXT: retq 163 ; 164 ; AVX512VL-LABEL: var_shift_v4i32: 165 ; AVX512VL: # %bb.0: 166 ; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 167 ; AVX512VL-NEXT: retq 168 ; 169 ; X32-SSE-LABEL: var_shift_v4i32: 170 ; X32-SSE: # %bb.0: 171 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] 172 ; X32-SSE-NEXT: movdqa %xmm0, %xmm3 173 ; X32-SSE-NEXT: psrld %xmm2, %xmm3 174 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7] 175 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2 176 ; X32-SSE-NEXT: psrld %xmm4, %xmm2 177 ; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 178 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 179 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7] 180 ; X32-SSE-NEXT: movdqa %xmm0, %xmm4 181 ; X32-SSE-NEXT: psrld %xmm3, %xmm4 182 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] 183 ; X32-SSE-NEXT: psrld %xmm1, %xmm0 184 ; X32-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1] 185 ; X32-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3] 186 ; X32-SSE-NEXT: movaps %xmm2, %xmm0 187 ; X32-SSE-NEXT: retl 188 %shift = lshr <4 x i32> %a, %b 189 ret <4 x i32> %shift 190 } 191 192 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { 193 ; SSE2-LABEL: var_shift_v8i16: 194 ; SSE2: # %bb.0: 195 ; SSE2-NEXT: psllw $12, %xmm1 196 ; SSE2-NEXT: movdqa %xmm1, %xmm2 197 ; SSE2-NEXT: psraw $15, %xmm2 198 ; SSE2-NEXT: movdqa %xmm2, %xmm3 199 ; SSE2-NEXT: pandn %xmm0, %xmm3 200 ; SSE2-NEXT: psrlw $8, %xmm0 201 ; SSE2-NEXT: pand %xmm2, %xmm0 202 ; SSE2-NEXT: por %xmm3, %xmm0 203 ; SSE2-NEXT: paddw %xmm1, %xmm1 204 ; SSE2-NEXT: movdqa %xmm1, %xmm2 205 ; SSE2-NEXT: psraw $15, %xmm2 206 ; SSE2-NEXT: movdqa %xmm2, %xmm3 207 ; SSE2-NEXT: pandn %xmm0, %xmm3 208 ; SSE2-NEXT: psrlw $4, %xmm0 209 ; SSE2-NEXT: pand %xmm2, %xmm0 210 ; SSE2-NEXT: por %xmm3, %xmm0 211 ; SSE2-NEXT: paddw %xmm1, %xmm1 212 ; SSE2-NEXT: movdqa %xmm1, %xmm2 213 ; SSE2-NEXT: psraw $15, %xmm2 214 ; SSE2-NEXT: movdqa %xmm2, %xmm3 215 ; SSE2-NEXT: pandn %xmm0, %xmm3 216 ; SSE2-NEXT: psrlw $2, %xmm0 217 ; SSE2-NEXT: pand %xmm2, %xmm0 218 ; SSE2-NEXT: por %xmm3, %xmm0 219 ; SSE2-NEXT: paddw %xmm1, %xmm1 220 ; SSE2-NEXT: psraw $15, %xmm1 221 ; SSE2-NEXT: movdqa %xmm1, %xmm2 222 ; SSE2-NEXT: pandn %xmm0, %xmm2 223 ; SSE2-NEXT: psrlw $1, %xmm0 224 ; SSE2-NEXT: pand %xmm1, %xmm0 225 ; SSE2-NEXT: por %xmm2, %xmm0 226 ; SSE2-NEXT: retq 227 ; 228 ; SSE41-LABEL: var_shift_v8i16: 229 ; SSE41: # %bb.0: 230 ; SSE41-NEXT: movdqa %xmm0, %xmm2 231 ; SSE41-NEXT: movdqa %xmm1, %xmm0 232 ; SSE41-NEXT: psllw $12, %xmm0 233 ; SSE41-NEXT: psllw $4, %xmm1 234 ; SSE41-NEXT: por %xmm0, %xmm1 235 ; SSE41-NEXT: movdqa %xmm1, %xmm3 236 ; SSE41-NEXT: paddw %xmm1, %xmm3 237 ; SSE41-NEXT: movdqa %xmm2, %xmm4 238 ; SSE41-NEXT: psrlw $8, %xmm4 239 ; SSE41-NEXT: movdqa %xmm1, %xmm0 240 ; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 241 ; SSE41-NEXT: movdqa %xmm2, %xmm1 242 ; SSE41-NEXT: psrlw $4, %xmm1 243 ; SSE41-NEXT: movdqa %xmm3, %xmm0 244 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 245 ; SSE41-NEXT: movdqa %xmm2, %xmm1 246 ; SSE41-NEXT: psrlw $2, %xmm1 247 ; SSE41-NEXT: paddw %xmm3, %xmm3 248 ; SSE41-NEXT: movdqa %xmm3, %xmm0 249 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 250 ; SSE41-NEXT: movdqa %xmm2, %xmm1 251 ; SSE41-NEXT: psrlw $1, %xmm1 252 ; SSE41-NEXT: paddw %xmm3, %xmm3 253 ; SSE41-NEXT: movdqa %xmm3, %xmm0 254 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 255 ; SSE41-NEXT: movdqa %xmm2, %xmm0 256 ; SSE41-NEXT: retq 257 ; 258 ; AVX1-LABEL: var_shift_v8i16: 259 ; AVX1: # %bb.0: 260 ; AVX1-NEXT: vpsllw $12, %xmm1, %xmm2 261 ; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 262 ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 263 ; AVX1-NEXT: vpaddw %xmm1, %xmm1, %xmm2 264 ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm3 265 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 266 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm1 267 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 268 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 269 ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 270 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 271 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 272 ; AVX1-NEXT: vpaddw %xmm2, %xmm2, %xmm2 273 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 274 ; AVX1-NEXT: retq 275 ; 276 ; AVX2-LABEL: var_shift_v8i16: 277 ; AVX2: # %bb.0: 278 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 279 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 280 ; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 281 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31] 282 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 283 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 284 ; AVX2-NEXT: vzeroupper 285 ; AVX2-NEXT: retq 286 ; 287 ; XOP-LABEL: var_shift_v8i16: 288 ; XOP: # %bb.0: 289 ; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 290 ; XOP-NEXT: vpsubw %xmm1, %xmm2, %xmm1 291 ; XOP-NEXT: vpshlw %xmm1, %xmm0, %xmm0 292 ; XOP-NEXT: retq 293 ; 294 ; AVX512DQ-LABEL: var_shift_v8i16: 295 ; AVX512DQ: # %bb.0: 296 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 297 ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 298 ; AVX512DQ-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 299 ; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 300 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 301 ; AVX512DQ-NEXT: vzeroupper 302 ; AVX512DQ-NEXT: retq 303 ; 304 ; AVX512BW-LABEL: var_shift_v8i16: 305 ; AVX512BW: # %bb.0: 306 ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 307 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 308 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 309 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 310 ; AVX512BW-NEXT: vzeroupper 311 ; AVX512BW-NEXT: retq 312 ; 313 ; AVX512DQVL-LABEL: var_shift_v8i16: 314 ; AVX512DQVL: # %bb.0: 315 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 316 ; AVX512DQVL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 317 ; AVX512DQVL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 318 ; AVX512DQVL-NEXT: vpmovdw %ymm0, %xmm0 319 ; AVX512DQVL-NEXT: vzeroupper 320 ; AVX512DQVL-NEXT: retq 321 ; 322 ; AVX512BWVL-LABEL: var_shift_v8i16: 323 ; AVX512BWVL: # %bb.0: 324 ; AVX512BWVL-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 325 ; AVX512BWVL-NEXT: retq 326 ; 327 ; X32-SSE-LABEL: var_shift_v8i16: 328 ; X32-SSE: # %bb.0: 329 ; X32-SSE-NEXT: psllw $12, %xmm1 330 ; X32-SSE-NEXT: movdqa %xmm1, %xmm2 331 ; X32-SSE-NEXT: psraw $15, %xmm2 332 ; X32-SSE-NEXT: movdqa %xmm2, %xmm3 333 ; X32-SSE-NEXT: pandn %xmm0, %xmm3 334 ; X32-SSE-NEXT: psrlw $8, %xmm0 335 ; X32-SSE-NEXT: pand %xmm2, %xmm0 336 ; X32-SSE-NEXT: por %xmm3, %xmm0 337 ; X32-SSE-NEXT: paddw %xmm1, %xmm1 338 ; X32-SSE-NEXT: movdqa %xmm1, %xmm2 339 ; X32-SSE-NEXT: psraw $15, %xmm2 340 ; X32-SSE-NEXT: movdqa %xmm2, %xmm3 341 ; X32-SSE-NEXT: pandn %xmm0, %xmm3 342 ; X32-SSE-NEXT: psrlw $4, %xmm0 343 ; X32-SSE-NEXT: pand %xmm2, %xmm0 344 ; X32-SSE-NEXT: por %xmm3, %xmm0 345 ; X32-SSE-NEXT: paddw %xmm1, %xmm1 346 ; X32-SSE-NEXT: movdqa %xmm1, %xmm2 347 ; X32-SSE-NEXT: psraw $15, %xmm2 348 ; X32-SSE-NEXT: movdqa %xmm2, %xmm3 349 ; X32-SSE-NEXT: pandn %xmm0, %xmm3 350 ; X32-SSE-NEXT: psrlw $2, %xmm0 351 ; X32-SSE-NEXT: pand %xmm2, %xmm0 352 ; X32-SSE-NEXT: por %xmm3, %xmm0 353 ; X32-SSE-NEXT: paddw %xmm1, %xmm1 354 ; X32-SSE-NEXT: psraw $15, %xmm1 355 ; X32-SSE-NEXT: movdqa %xmm1, %xmm2 356 ; X32-SSE-NEXT: pandn %xmm0, %xmm2 357 ; X32-SSE-NEXT: psrlw $1, %xmm0 358 ; X32-SSE-NEXT: pand %xmm1, %xmm0 359 ; X32-SSE-NEXT: por %xmm2, %xmm0 360 ; X32-SSE-NEXT: retl 361 %shift = lshr <8 x i16> %a, %b 362 ret <8 x i16> %shift 363 } 364 365 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { 366 ; SSE2-LABEL: var_shift_v16i8: 367 ; SSE2: # %bb.0: 368 ; SSE2-NEXT: psllw $5, %xmm1 369 ; SSE2-NEXT: pxor %xmm2, %xmm2 370 ; SSE2-NEXT: pxor %xmm3, %xmm3 371 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3 372 ; SSE2-NEXT: movdqa %xmm3, %xmm4 373 ; SSE2-NEXT: pandn %xmm0, %xmm4 374 ; SSE2-NEXT: psrlw $4, %xmm0 375 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 376 ; SSE2-NEXT: pand %xmm3, %xmm0 377 ; SSE2-NEXT: por %xmm4, %xmm0 378 ; SSE2-NEXT: paddb %xmm1, %xmm1 379 ; SSE2-NEXT: pxor %xmm3, %xmm3 380 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm3 381 ; SSE2-NEXT: movdqa %xmm3, %xmm4 382 ; SSE2-NEXT: pandn %xmm0, %xmm4 383 ; SSE2-NEXT: psrlw $2, %xmm0 384 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 385 ; SSE2-NEXT: pand %xmm3, %xmm0 386 ; SSE2-NEXT: por %xmm4, %xmm0 387 ; SSE2-NEXT: paddb %xmm1, %xmm1 388 ; SSE2-NEXT: pcmpgtb %xmm1, %xmm2 389 ; SSE2-NEXT: movdqa %xmm2, %xmm1 390 ; SSE2-NEXT: pandn %xmm0, %xmm1 391 ; SSE2-NEXT: psrlw $1, %xmm0 392 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 393 ; SSE2-NEXT: pand %xmm2, %xmm0 394 ; SSE2-NEXT: por %xmm1, %xmm0 395 ; SSE2-NEXT: retq 396 ; 397 ; SSE41-LABEL: var_shift_v16i8: 398 ; SSE41: # %bb.0: 399 ; SSE41-NEXT: movdqa %xmm0, %xmm2 400 ; SSE41-NEXT: psllw $5, %xmm1 401 ; SSE41-NEXT: movdqa %xmm0, %xmm3 402 ; SSE41-NEXT: psrlw $4, %xmm3 403 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 404 ; SSE41-NEXT: movdqa %xmm1, %xmm0 405 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 406 ; SSE41-NEXT: movdqa %xmm2, %xmm3 407 ; SSE41-NEXT: psrlw $2, %xmm3 408 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 409 ; SSE41-NEXT: paddb %xmm1, %xmm1 410 ; SSE41-NEXT: movdqa %xmm1, %xmm0 411 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 412 ; SSE41-NEXT: movdqa %xmm2, %xmm3 413 ; SSE41-NEXT: psrlw $1, %xmm3 414 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 415 ; SSE41-NEXT: paddb %xmm1, %xmm1 416 ; SSE41-NEXT: movdqa %xmm1, %xmm0 417 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 418 ; SSE41-NEXT: movdqa %xmm2, %xmm0 419 ; SSE41-NEXT: retq 420 ; 421 ; AVX-LABEL: var_shift_v16i8: 422 ; AVX: # %bb.0: 423 ; AVX-NEXT: vpsllw $5, %xmm1, %xmm1 424 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm2 425 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 426 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 427 ; AVX-NEXT: vpsrlw $2, %xmm0, %xmm2 428 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 429 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 430 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 431 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm2 432 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 433 ; AVX-NEXT: vpaddb %xmm1, %xmm1, %xmm1 434 ; AVX-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 435 ; AVX-NEXT: retq 436 ; 437 ; XOP-LABEL: var_shift_v16i8: 438 ; XOP: # %bb.0: 439 ; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 440 ; XOP-NEXT: vpsubb %xmm1, %xmm2, %xmm1 441 ; XOP-NEXT: vpshlb %xmm1, %xmm0, %xmm0 442 ; XOP-NEXT: retq 443 ; 444 ; AVX512DQ-LABEL: var_shift_v16i8: 445 ; AVX512DQ: # %bb.0: 446 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 447 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 448 ; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 449 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 450 ; AVX512DQ-NEXT: vzeroupper 451 ; AVX512DQ-NEXT: retq 452 ; 453 ; AVX512BW-LABEL: var_shift_v16i8: 454 ; AVX512BW: # %bb.0: 455 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 456 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 457 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 458 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 459 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 460 ; AVX512BW-NEXT: vzeroupper 461 ; AVX512BW-NEXT: retq 462 ; 463 ; AVX512DQVL-LABEL: var_shift_v16i8: 464 ; AVX512DQVL: # %bb.0: 465 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 466 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 467 ; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 468 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 469 ; AVX512DQVL-NEXT: vzeroupper 470 ; AVX512DQVL-NEXT: retq 471 ; 472 ; AVX512BWVL-LABEL: var_shift_v16i8: 473 ; AVX512BWVL: # %bb.0: 474 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 475 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 476 ; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 477 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 478 ; AVX512BWVL-NEXT: vzeroupper 479 ; AVX512BWVL-NEXT: retq 480 ; 481 ; X32-SSE-LABEL: var_shift_v16i8: 482 ; X32-SSE: # %bb.0: 483 ; X32-SSE-NEXT: psllw $5, %xmm1 484 ; X32-SSE-NEXT: pxor %xmm2, %xmm2 485 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 486 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3 487 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 488 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 489 ; X32-SSE-NEXT: psrlw $4, %xmm0 490 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 491 ; X32-SSE-NEXT: pand %xmm3, %xmm0 492 ; X32-SSE-NEXT: por %xmm4, %xmm0 493 ; X32-SSE-NEXT: paddb %xmm1, %xmm1 494 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 495 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm3 496 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 497 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 498 ; X32-SSE-NEXT: psrlw $2, %xmm0 499 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 500 ; X32-SSE-NEXT: pand %xmm3, %xmm0 501 ; X32-SSE-NEXT: por %xmm4, %xmm0 502 ; X32-SSE-NEXT: paddb %xmm1, %xmm1 503 ; X32-SSE-NEXT: pcmpgtb %xmm1, %xmm2 504 ; X32-SSE-NEXT: movdqa %xmm2, %xmm1 505 ; X32-SSE-NEXT: pandn %xmm0, %xmm1 506 ; X32-SSE-NEXT: psrlw $1, %xmm0 507 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 508 ; X32-SSE-NEXT: pand %xmm2, %xmm0 509 ; X32-SSE-NEXT: por %xmm1, %xmm0 510 ; X32-SSE-NEXT: retl 511 %shift = lshr <16 x i8> %a, %b 512 ret <16 x i8> %shift 513 } 514 515 ; 516 ; Uniform Variable Shifts 517 ; 518 519 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { 520 ; SSE-LABEL: splatvar_shift_v2i64: 521 ; SSE: # %bb.0: 522 ; SSE-NEXT: psrlq %xmm1, %xmm0 523 ; SSE-NEXT: retq 524 ; 525 ; AVX-LABEL: splatvar_shift_v2i64: 526 ; AVX: # %bb.0: 527 ; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 528 ; AVX-NEXT: retq 529 ; 530 ; XOP-LABEL: splatvar_shift_v2i64: 531 ; XOP: # %bb.0: 532 ; XOP-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 533 ; XOP-NEXT: retq 534 ; 535 ; AVX512-LABEL: splatvar_shift_v2i64: 536 ; AVX512: # %bb.0: 537 ; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 538 ; AVX512-NEXT: retq 539 ; 540 ; AVX512VL-LABEL: splatvar_shift_v2i64: 541 ; AVX512VL: # %bb.0: 542 ; AVX512VL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 543 ; AVX512VL-NEXT: retq 544 ; 545 ; X32-SSE-LABEL: splatvar_shift_v2i64: 546 ; X32-SSE: # %bb.0: 547 ; X32-SSE-NEXT: psrlq %xmm1, %xmm0 548 ; X32-SSE-NEXT: retl 549 %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer 550 %shift = lshr <2 x i64> %a, %splat 551 ret <2 x i64> %shift 552 } 553 554 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { 555 ; SSE2-LABEL: splatvar_shift_v4i32: 556 ; SSE2: # %bb.0: 557 ; SSE2-NEXT: xorps %xmm2, %xmm2 558 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 559 ; SSE2-NEXT: psrld %xmm2, %xmm0 560 ; SSE2-NEXT: retq 561 ; 562 ; SSE41-LABEL: splatvar_shift_v4i32: 563 ; SSE41: # %bb.0: 564 ; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 565 ; SSE41-NEXT: psrld %xmm1, %xmm0 566 ; SSE41-NEXT: retq 567 ; 568 ; AVX-LABEL: splatvar_shift_v4i32: 569 ; AVX: # %bb.0: 570 ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 571 ; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 572 ; AVX-NEXT: retq 573 ; 574 ; XOP-LABEL: splatvar_shift_v4i32: 575 ; XOP: # %bb.0: 576 ; XOP-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 577 ; XOP-NEXT: vpsrld %xmm1, %xmm0, %xmm0 578 ; XOP-NEXT: retq 579 ; 580 ; AVX512-LABEL: splatvar_shift_v4i32: 581 ; AVX512: # %bb.0: 582 ; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 583 ; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0 584 ; AVX512-NEXT: retq 585 ; 586 ; AVX512VL-LABEL: splatvar_shift_v4i32: 587 ; AVX512VL: # %bb.0: 588 ; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero 589 ; AVX512VL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 590 ; AVX512VL-NEXT: retq 591 ; 592 ; X32-SSE-LABEL: splatvar_shift_v4i32: 593 ; X32-SSE: # %bb.0: 594 ; X32-SSE-NEXT: xorps %xmm2, %xmm2 595 ; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 596 ; X32-SSE-NEXT: psrld %xmm2, %xmm0 597 ; X32-SSE-NEXT: retl 598 %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer 599 %shift = lshr <4 x i32> %a, %splat 600 ret <4 x i32> %shift 601 } 602 603 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { 604 ; SSE2-LABEL: splatvar_shift_v8i16: 605 ; SSE2: # %bb.0: 606 ; SSE2-NEXT: pextrw $0, %xmm1, %eax 607 ; SSE2-NEXT: movd %eax, %xmm1 608 ; SSE2-NEXT: psrlw %xmm1, %xmm0 609 ; SSE2-NEXT: retq 610 ; 611 ; SSE41-LABEL: splatvar_shift_v8i16: 612 ; SSE41: # %bb.0: 613 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 614 ; SSE41-NEXT: psrlw %xmm1, %xmm0 615 ; SSE41-NEXT: retq 616 ; 617 ; AVX-LABEL: splatvar_shift_v8i16: 618 ; AVX: # %bb.0: 619 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 620 ; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 621 ; AVX-NEXT: retq 622 ; 623 ; XOP-LABEL: splatvar_shift_v8i16: 624 ; XOP: # %bb.0: 625 ; XOP-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 626 ; XOP-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 627 ; XOP-NEXT: retq 628 ; 629 ; AVX512-LABEL: splatvar_shift_v8i16: 630 ; AVX512: # %bb.0: 631 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 632 ; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 633 ; AVX512-NEXT: retq 634 ; 635 ; AVX512VL-LABEL: splatvar_shift_v8i16: 636 ; AVX512VL: # %bb.0: 637 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 638 ; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 639 ; AVX512VL-NEXT: retq 640 ; 641 ; X32-SSE-LABEL: splatvar_shift_v8i16: 642 ; X32-SSE: # %bb.0: 643 ; X32-SSE-NEXT: pextrw $0, %xmm1, %eax 644 ; X32-SSE-NEXT: movd %eax, %xmm1 645 ; X32-SSE-NEXT: psrlw %xmm1, %xmm0 646 ; X32-SSE-NEXT: retl 647 %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer 648 %shift = lshr <8 x i16> %a, %splat 649 ret <8 x i16> %shift 650 } 651 652 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { 653 ; SSE2-LABEL: splatvar_shift_v16i8: 654 ; SSE2: # %bb.0: 655 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 656 ; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] 657 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,0,0] 658 ; SSE2-NEXT: psllw $5, %xmm2 659 ; SSE2-NEXT: pxor %xmm1, %xmm1 660 ; SSE2-NEXT: pxor %xmm3, %xmm3 661 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm3 662 ; SSE2-NEXT: movdqa %xmm3, %xmm4 663 ; SSE2-NEXT: pandn %xmm0, %xmm4 664 ; SSE2-NEXT: psrlw $4, %xmm0 665 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 666 ; SSE2-NEXT: pand %xmm3, %xmm0 667 ; SSE2-NEXT: por %xmm4, %xmm0 668 ; SSE2-NEXT: paddb %xmm2, %xmm2 669 ; SSE2-NEXT: pxor %xmm3, %xmm3 670 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm3 671 ; SSE2-NEXT: movdqa %xmm3, %xmm4 672 ; SSE2-NEXT: pandn %xmm0, %xmm4 673 ; SSE2-NEXT: psrlw $2, %xmm0 674 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 675 ; SSE2-NEXT: pand %xmm3, %xmm0 676 ; SSE2-NEXT: por %xmm4, %xmm0 677 ; SSE2-NEXT: paddb %xmm2, %xmm2 678 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm1 679 ; SSE2-NEXT: movdqa %xmm1, %xmm2 680 ; SSE2-NEXT: pandn %xmm0, %xmm2 681 ; SSE2-NEXT: psrlw $1, %xmm0 682 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 683 ; SSE2-NEXT: pand %xmm1, %xmm0 684 ; SSE2-NEXT: por %xmm2, %xmm0 685 ; SSE2-NEXT: retq 686 ; 687 ; SSE41-LABEL: splatvar_shift_v16i8: 688 ; SSE41: # %bb.0: 689 ; SSE41-NEXT: movdqa %xmm0, %xmm2 690 ; SSE41-NEXT: pxor %xmm0, %xmm0 691 ; SSE41-NEXT: pshufb %xmm0, %xmm1 692 ; SSE41-NEXT: psllw $5, %xmm1 693 ; SSE41-NEXT: movdqa %xmm1, %xmm3 694 ; SSE41-NEXT: paddb %xmm1, %xmm3 695 ; SSE41-NEXT: movdqa %xmm2, %xmm4 696 ; SSE41-NEXT: psrlw $4, %xmm4 697 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm4 698 ; SSE41-NEXT: movdqa %xmm1, %xmm0 699 ; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2 700 ; SSE41-NEXT: movdqa %xmm2, %xmm1 701 ; SSE41-NEXT: psrlw $2, %xmm1 702 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 703 ; SSE41-NEXT: movdqa %xmm3, %xmm0 704 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 705 ; SSE41-NEXT: movdqa %xmm2, %xmm1 706 ; SSE41-NEXT: psrlw $1, %xmm1 707 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 708 ; SSE41-NEXT: paddb %xmm3, %xmm3 709 ; SSE41-NEXT: movdqa %xmm3, %xmm0 710 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 711 ; SSE41-NEXT: movdqa %xmm2, %xmm0 712 ; SSE41-NEXT: retq 713 ; 714 ; AVX1-LABEL: splatvar_shift_v16i8: 715 ; AVX1: # %bb.0: 716 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 717 ; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 718 ; AVX1-NEXT: vpsllw $5, %xmm1, %xmm1 719 ; AVX1-NEXT: vpaddb %xmm1, %xmm1, %xmm2 720 ; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm3 721 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm3, %xmm3 722 ; AVX1-NEXT: vpblendvb %xmm1, %xmm3, %xmm0, %xmm0 723 ; AVX1-NEXT: vpsrlw $2, %xmm0, %xmm1 724 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 725 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 726 ; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm1 727 ; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 728 ; AVX1-NEXT: vpaddb %xmm2, %xmm2, %xmm2 729 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 730 ; AVX1-NEXT: retq 731 ; 732 ; AVX2-LABEL: splatvar_shift_v16i8: 733 ; AVX2: # %bb.0: 734 ; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 735 ; AVX2-NEXT: vpsllw $5, %xmm1, %xmm1 736 ; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm2 737 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 738 ; AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 739 ; AVX2-NEXT: vpsrlw $2, %xmm0, %xmm2 740 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 741 ; AVX2-NEXT: vpaddb %xmm1, %xmm1, %xmm1 742 ; AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 743 ; AVX2-NEXT: vpsrlw $1, %xmm0, %xmm2 744 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 745 ; AVX2-NEXT: vpaddb %xmm1, %xmm1, %xmm1 746 ; AVX2-NEXT: vpblendvb %xmm1, %xmm2, %xmm0, %xmm0 747 ; AVX2-NEXT: retq 748 ; 749 ; XOPAVX1-LABEL: splatvar_shift_v16i8: 750 ; XOPAVX1: # %bb.0: 751 ; XOPAVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 752 ; XOPAVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 753 ; XOPAVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm1 754 ; XOPAVX1-NEXT: vpshlb %xmm1, %xmm0, %xmm0 755 ; XOPAVX1-NEXT: retq 756 ; 757 ; XOPAVX2-LABEL: splatvar_shift_v16i8: 758 ; XOPAVX2: # %bb.0: 759 ; XOPAVX2-NEXT: vpbroadcastb %xmm1, %xmm1 760 ; XOPAVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 761 ; XOPAVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1 762 ; XOPAVX2-NEXT: vpshlb %xmm1, %xmm0, %xmm0 763 ; XOPAVX2-NEXT: retq 764 ; 765 ; AVX512DQ-LABEL: splatvar_shift_v16i8: 766 ; AVX512DQ: # %bb.0: 767 ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 768 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 769 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 770 ; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 771 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 772 ; AVX512DQ-NEXT: vzeroupper 773 ; AVX512DQ-NEXT: retq 774 ; 775 ; AVX512BW-LABEL: splatvar_shift_v16i8: 776 ; AVX512BW: # %bb.0: 777 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 778 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 779 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 780 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 781 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 782 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 783 ; AVX512BW-NEXT: vzeroupper 784 ; AVX512BW-NEXT: retq 785 ; 786 ; AVX512DQVL-LABEL: splatvar_shift_v16i8: 787 ; AVX512DQVL: # %bb.0: 788 ; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 789 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 790 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero 791 ; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 792 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 793 ; AVX512DQVL-NEXT: vzeroupper 794 ; AVX512DQVL-NEXT: retq 795 ; 796 ; AVX512BWVL-LABEL: splatvar_shift_v16i8: 797 ; AVX512BWVL: # %bb.0: 798 ; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 799 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 800 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 801 ; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 802 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 803 ; AVX512BWVL-NEXT: vzeroupper 804 ; AVX512BWVL-NEXT: retq 805 ; 806 ; X32-SSE-LABEL: splatvar_shift_v16i8: 807 ; X32-SSE: # %bb.0: 808 ; X32-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 809 ; X32-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,2,3,4,5,6,7] 810 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,0,0,0] 811 ; X32-SSE-NEXT: psllw $5, %xmm2 812 ; X32-SSE-NEXT: pxor %xmm1, %xmm1 813 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 814 ; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3 815 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 816 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 817 ; X32-SSE-NEXT: psrlw $4, %xmm0 818 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 819 ; X32-SSE-NEXT: pand %xmm3, %xmm0 820 ; X32-SSE-NEXT: por %xmm4, %xmm0 821 ; X32-SSE-NEXT: paddb %xmm2, %xmm2 822 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 823 ; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3 824 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 825 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 826 ; X32-SSE-NEXT: psrlw $2, %xmm0 827 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 828 ; X32-SSE-NEXT: pand %xmm3, %xmm0 829 ; X32-SSE-NEXT: por %xmm4, %xmm0 830 ; X32-SSE-NEXT: paddb %xmm2, %xmm2 831 ; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm1 832 ; X32-SSE-NEXT: movdqa %xmm1, %xmm2 833 ; X32-SSE-NEXT: pandn %xmm0, %xmm2 834 ; X32-SSE-NEXT: psrlw $1, %xmm0 835 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 836 ; X32-SSE-NEXT: pand %xmm1, %xmm0 837 ; X32-SSE-NEXT: por %xmm2, %xmm0 838 ; X32-SSE-NEXT: retl 839 %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer 840 %shift = lshr <16 x i8> %a, %splat 841 ret <16 x i8> %shift 842 } 843 844 ; 845 ; Constant Shifts 846 ; 847 848 define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind { 849 ; SSE2-LABEL: constant_shift_v2i64: 850 ; SSE2: # %bb.0: 851 ; SSE2-NEXT: movdqa %xmm0, %xmm1 852 ; SSE2-NEXT: psrlq $1, %xmm1 853 ; SSE2-NEXT: psrlq $7, %xmm0 854 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 855 ; SSE2-NEXT: retq 856 ; 857 ; SSE41-LABEL: constant_shift_v2i64: 858 ; SSE41: # %bb.0: 859 ; SSE41-NEXT: movdqa %xmm0, %xmm1 860 ; SSE41-NEXT: psrlq $7, %xmm1 861 ; SSE41-NEXT: psrlq $1, %xmm0 862 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 863 ; SSE41-NEXT: retq 864 ; 865 ; AVX1-LABEL: constant_shift_v2i64: 866 ; AVX1: # %bb.0: 867 ; AVX1-NEXT: vpsrlq $7, %xmm0, %xmm1 868 ; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 869 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 870 ; AVX1-NEXT: retq 871 ; 872 ; AVX2-LABEL: constant_shift_v2i64: 873 ; AVX2: # %bb.0: 874 ; AVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 875 ; AVX2-NEXT: retq 876 ; 877 ; XOPAVX1-LABEL: constant_shift_v2i64: 878 ; XOPAVX1: # %bb.0: 879 ; XOPAVX1-NEXT: vpshlq {{.*}}(%rip), %xmm0, %xmm0 880 ; XOPAVX1-NEXT: retq 881 ; 882 ; XOPAVX2-LABEL: constant_shift_v2i64: 883 ; XOPAVX2: # %bb.0: 884 ; XOPAVX2-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 885 ; XOPAVX2-NEXT: retq 886 ; 887 ; AVX512-LABEL: constant_shift_v2i64: 888 ; AVX512: # %bb.0: 889 ; AVX512-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 890 ; AVX512-NEXT: retq 891 ; 892 ; AVX512VL-LABEL: constant_shift_v2i64: 893 ; AVX512VL: # %bb.0: 894 ; AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 895 ; AVX512VL-NEXT: retq 896 ; 897 ; X32-SSE-LABEL: constant_shift_v2i64: 898 ; X32-SSE: # %bb.0: 899 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 900 ; X32-SSE-NEXT: psrlq $1, %xmm1 901 ; X32-SSE-NEXT: psrlq $7, %xmm0 902 ; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 903 ; X32-SSE-NEXT: retl 904 %shift = lshr <2 x i64> %a, <i64 1, i64 7> 905 ret <2 x i64> %shift 906 } 907 908 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind { 909 ; SSE2-LABEL: constant_shift_v4i32: 910 ; SSE2: # %bb.0: 911 ; SSE2-NEXT: movdqa %xmm0, %xmm1 912 ; SSE2-NEXT: psrld $7, %xmm1 913 ; SSE2-NEXT: movdqa %xmm0, %xmm2 914 ; SSE2-NEXT: psrld $6, %xmm2 915 ; SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 916 ; SSE2-NEXT: movdqa %xmm0, %xmm1 917 ; SSE2-NEXT: psrld $5, %xmm1 918 ; SSE2-NEXT: psrld $4, %xmm0 919 ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 920 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 921 ; SSE2-NEXT: retq 922 ; 923 ; SSE41-LABEL: constant_shift_v4i32: 924 ; SSE41: # %bb.0: 925 ; SSE41-NEXT: movdqa %xmm0, %xmm1 926 ; SSE41-NEXT: psrld $7, %xmm1 927 ; SSE41-NEXT: movdqa %xmm0, %xmm2 928 ; SSE41-NEXT: psrld $5, %xmm2 929 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] 930 ; SSE41-NEXT: movdqa %xmm0, %xmm1 931 ; SSE41-NEXT: psrld $6, %xmm1 932 ; SSE41-NEXT: psrld $4, %xmm0 933 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 934 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] 935 ; SSE41-NEXT: retq 936 ; 937 ; AVX1-LABEL: constant_shift_v4i32: 938 ; AVX1: # %bb.0: 939 ; AVX1-NEXT: vpsrld $7, %xmm0, %xmm1 940 ; AVX1-NEXT: vpsrld $5, %xmm0, %xmm2 941 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] 942 ; AVX1-NEXT: vpsrld $6, %xmm0, %xmm2 943 ; AVX1-NEXT: vpsrld $4, %xmm0, %xmm0 944 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] 945 ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 946 ; AVX1-NEXT: retq 947 ; 948 ; AVX2-LABEL: constant_shift_v4i32: 949 ; AVX2: # %bb.0: 950 ; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 951 ; AVX2-NEXT: retq 952 ; 953 ; XOPAVX1-LABEL: constant_shift_v4i32: 954 ; XOPAVX1: # %bb.0: 955 ; XOPAVX1-NEXT: vpshld {{.*}}(%rip), %xmm0, %xmm0 956 ; XOPAVX1-NEXT: retq 957 ; 958 ; XOPAVX2-LABEL: constant_shift_v4i32: 959 ; XOPAVX2: # %bb.0: 960 ; XOPAVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 961 ; XOPAVX2-NEXT: retq 962 ; 963 ; AVX512-LABEL: constant_shift_v4i32: 964 ; AVX512: # %bb.0: 965 ; AVX512-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 966 ; AVX512-NEXT: retq 967 ; 968 ; AVX512VL-LABEL: constant_shift_v4i32: 969 ; AVX512VL: # %bb.0: 970 ; AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 971 ; AVX512VL-NEXT: retq 972 ; 973 ; X32-SSE-LABEL: constant_shift_v4i32: 974 ; X32-SSE: # %bb.0: 975 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 976 ; X32-SSE-NEXT: psrld $7, %xmm1 977 ; X32-SSE-NEXT: movdqa %xmm0, %xmm2 978 ; X32-SSE-NEXT: psrld $6, %xmm2 979 ; X32-SSE-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 980 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 981 ; X32-SSE-NEXT: psrld $5, %xmm1 982 ; X32-SSE-NEXT: psrld $4, %xmm0 983 ; X32-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 984 ; X32-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 985 ; X32-SSE-NEXT: retl 986 %shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7> 987 ret <4 x i32> %shift 988 } 989 990 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { 991 ; SSE2-LABEL: constant_shift_v8i16: 992 ; SSE2: # %bb.0: 993 ; SSE2-NEXT: movdqa %xmm0, %xmm1 994 ; SSE2-NEXT: psrlw $4, %xmm1 995 ; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 996 ; SSE2-NEXT: movapd %xmm1, %xmm2 997 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[2,3] 998 ; SSE2-NEXT: psrlw $2, %xmm1 999 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3] 1000 ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1001 ; SSE2-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0] 1002 ; SSE2-NEXT: movaps %xmm2, %xmm0 1003 ; SSE2-NEXT: andps %xmm1, %xmm0 1004 ; SSE2-NEXT: psrlw $1, %xmm2 1005 ; SSE2-NEXT: andnps %xmm2, %xmm1 1006 ; SSE2-NEXT: orps %xmm1, %xmm0 1007 ; SSE2-NEXT: retq 1008 ; 1009 ; SSE41-LABEL: constant_shift_v8i16: 1010 ; SSE41: # %bb.0: 1011 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <u,32768,16384,8192,4096,2048,1024,512> 1012 ; SSE41-NEXT: pmulhuw %xmm0, %xmm1 1013 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] 1014 ; SSE41-NEXT: retq 1015 ; 1016 ; AVX-LABEL: constant_shift_v8i16: 1017 ; AVX: # %bb.0: 1018 ; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 1019 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] 1020 ; AVX-NEXT: retq 1021 ; 1022 ; XOP-LABEL: constant_shift_v8i16: 1023 ; XOP: # %bb.0: 1024 ; XOP-NEXT: vpshlw {{.*}}(%rip), %xmm0, %xmm0 1025 ; XOP-NEXT: retq 1026 ; 1027 ; AVX512DQ-LABEL: constant_shift_v8i16: 1028 ; AVX512DQ: # %bb.0: 1029 ; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 1030 ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] 1031 ; AVX512DQ-NEXT: retq 1032 ; 1033 ; AVX512BW-LABEL: constant_shift_v8i16: 1034 ; AVX512BW: # %bb.0: 1035 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1036 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7] 1037 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 1038 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1039 ; AVX512BW-NEXT: vzeroupper 1040 ; AVX512BW-NEXT: retq 1041 ; 1042 ; AVX512DQVL-LABEL: constant_shift_v8i16: 1043 ; AVX512DQVL: # %bb.0: 1044 ; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 1045 ; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] 1046 ; AVX512DQVL-NEXT: retq 1047 ; 1048 ; AVX512BWVL-LABEL: constant_shift_v8i16: 1049 ; AVX512BWVL: # %bb.0: 1050 ; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm0 1051 ; AVX512BWVL-NEXT: retq 1052 ; 1053 ; X32-SSE-LABEL: constant_shift_v8i16: 1054 ; X32-SSE: # %bb.0: 1055 ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 1056 ; X32-SSE-NEXT: psrlw $4, %xmm1 1057 ; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] 1058 ; X32-SSE-NEXT: movapd %xmm1, %xmm2 1059 ; X32-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[2,3] 1060 ; X32-SSE-NEXT: psrlw $2, %xmm1 1061 ; X32-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3] 1062 ; X32-SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1063 ; X32-SSE-NEXT: movaps {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0] 1064 ; X32-SSE-NEXT: movaps %xmm2, %xmm0 1065 ; X32-SSE-NEXT: andps %xmm1, %xmm0 1066 ; X32-SSE-NEXT: psrlw $1, %xmm2 1067 ; X32-SSE-NEXT: andnps %xmm2, %xmm1 1068 ; X32-SSE-NEXT: orps %xmm1, %xmm0 1069 ; X32-SSE-NEXT: retl 1070 %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7> 1071 ret <8 x i16> %shift 1072 } 1073 1074 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { 1075 ; SSE2-LABEL: constant_shift_v16i8: 1076 ; SSE2: # %bb.0: 1077 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] 1078 ; SSE2-NEXT: pxor %xmm1, %xmm1 1079 ; SSE2-NEXT: pxor %xmm3, %xmm3 1080 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm3 1081 ; SSE2-NEXT: movdqa %xmm3, %xmm4 1082 ; SSE2-NEXT: pandn %xmm0, %xmm4 1083 ; SSE2-NEXT: psrlw $4, %xmm0 1084 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 1085 ; SSE2-NEXT: pand %xmm3, %xmm0 1086 ; SSE2-NEXT: por %xmm4, %xmm0 1087 ; SSE2-NEXT: paddb %xmm2, %xmm2 1088 ; SSE2-NEXT: pxor %xmm3, %xmm3 1089 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm3 1090 ; SSE2-NEXT: movdqa %xmm3, %xmm4 1091 ; SSE2-NEXT: pandn %xmm0, %xmm4 1092 ; SSE2-NEXT: psrlw $2, %xmm0 1093 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 1094 ; SSE2-NEXT: pand %xmm3, %xmm0 1095 ; SSE2-NEXT: por %xmm4, %xmm0 1096 ; SSE2-NEXT: paddb %xmm2, %xmm2 1097 ; SSE2-NEXT: pcmpgtb %xmm2, %xmm1 1098 ; SSE2-NEXT: movdqa %xmm1, %xmm2 1099 ; SSE2-NEXT: pandn %xmm0, %xmm2 1100 ; SSE2-NEXT: psrlw $1, %xmm0 1101 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 1102 ; SSE2-NEXT: pand %xmm1, %xmm0 1103 ; SSE2-NEXT: por %xmm2, %xmm0 1104 ; SSE2-NEXT: retq 1105 ; 1106 ; SSE41-LABEL: constant_shift_v16i8: 1107 ; SSE41: # %bb.0: 1108 ; SSE41-NEXT: movdqa %xmm0, %xmm1 1109 ; SSE41-NEXT: movdqa %xmm0, %xmm2 1110 ; SSE41-NEXT: psrlw $4, %xmm2 1111 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 1112 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,49376,32928,16480,32] 1113 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 1114 ; SSE41-NEXT: movdqa %xmm1, %xmm2 1115 ; SSE41-NEXT: psrlw $2, %xmm2 1116 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 1117 ; SSE41-NEXT: paddb %xmm0, %xmm0 1118 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 1119 ; SSE41-NEXT: movdqa %xmm1, %xmm2 1120 ; SSE41-NEXT: psrlw $1, %xmm2 1121 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 1122 ; SSE41-NEXT: paddb %xmm0, %xmm0 1123 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 1124 ; SSE41-NEXT: movdqa %xmm1, %xmm0 1125 ; SSE41-NEXT: retq 1126 ; 1127 ; AVX-LABEL: constant_shift_v16i8: 1128 ; AVX: # %bb.0: 1129 ; AVX-NEXT: vpsrlw $4, %xmm0, %xmm1 1130 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 1131 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] 1132 ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 1133 ; AVX-NEXT: vpsrlw $2, %xmm0, %xmm1 1134 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 1135 ; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 1136 ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 1137 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm1 1138 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 1139 ; AVX-NEXT: vpaddb %xmm2, %xmm2, %xmm2 1140 ; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 1141 ; AVX-NEXT: retq 1142 ; 1143 ; XOP-LABEL: constant_shift_v16i8: 1144 ; XOP: # %bb.0: 1145 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0 1146 ; XOP-NEXT: retq 1147 ; 1148 ; AVX512DQ-LABEL: constant_shift_v16i8: 1149 ; AVX512DQ: # %bb.0: 1150 ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1151 ; AVX512DQ-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0 1152 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 1153 ; AVX512DQ-NEXT: vzeroupper 1154 ; AVX512DQ-NEXT: retq 1155 ; 1156 ; AVX512BW-LABEL: constant_shift_v16i8: 1157 ; AVX512BW: # %bb.0: 1158 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0] 1159 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1160 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 1161 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 1162 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1163 ; AVX512BW-NEXT: vzeroupper 1164 ; AVX512BW-NEXT: retq 1165 ; 1166 ; AVX512DQVL-LABEL: constant_shift_v16i8: 1167 ; AVX512DQVL: # %bb.0: 1168 ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 1169 ; AVX512DQVL-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0 1170 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 1171 ; AVX512DQVL-NEXT: vzeroupper 1172 ; AVX512DQVL-NEXT: retq 1173 ; 1174 ; AVX512BWVL-LABEL: constant_shift_v16i8: 1175 ; AVX512BWVL: # %bb.0: 1176 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1177 ; AVX512BWVL-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0 1178 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 1179 ; AVX512BWVL-NEXT: vzeroupper 1180 ; AVX512BWVL-NEXT: retq 1181 ; 1182 ; X32-SSE-LABEL: constant_shift_v16i8: 1183 ; X32-SSE: # %bb.0: 1184 ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [8192,24640,41088,57536,49376,32928,16480,32] 1185 ; X32-SSE-NEXT: pxor %xmm1, %xmm1 1186 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 1187 ; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3 1188 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 1189 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 1190 ; X32-SSE-NEXT: psrlw $4, %xmm0 1191 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 1192 ; X32-SSE-NEXT: pand %xmm3, %xmm0 1193 ; X32-SSE-NEXT: por %xmm4, %xmm0 1194 ; X32-SSE-NEXT: paddb %xmm2, %xmm2 1195 ; X32-SSE-NEXT: pxor %xmm3, %xmm3 1196 ; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm3 1197 ; X32-SSE-NEXT: movdqa %xmm3, %xmm4 1198 ; X32-SSE-NEXT: pandn %xmm0, %xmm4 1199 ; X32-SSE-NEXT: psrlw $2, %xmm0 1200 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 1201 ; X32-SSE-NEXT: pand %xmm3, %xmm0 1202 ; X32-SSE-NEXT: por %xmm4, %xmm0 1203 ; X32-SSE-NEXT: paddb %xmm2, %xmm2 1204 ; X32-SSE-NEXT: pcmpgtb %xmm2, %xmm1 1205 ; X32-SSE-NEXT: movdqa %xmm1, %xmm2 1206 ; X32-SSE-NEXT: pandn %xmm0, %xmm2 1207 ; X32-SSE-NEXT: psrlw $1, %xmm0 1208 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 1209 ; X32-SSE-NEXT: pand %xmm1, %xmm0 1210 ; X32-SSE-NEXT: por %xmm2, %xmm0 1211 ; X32-SSE-NEXT: retl 1212 %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0> 1213 ret <16 x i8> %shift 1214 } 1215 1216 ; 1217 ; Uniform Constant Shifts 1218 ; 1219 1220 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind { 1221 ; SSE-LABEL: splatconstant_shift_v2i64: 1222 ; SSE: # %bb.0: 1223 ; SSE-NEXT: psrlq $7, %xmm0 1224 ; SSE-NEXT: retq 1225 ; 1226 ; AVX-LABEL: splatconstant_shift_v2i64: 1227 ; AVX: # %bb.0: 1228 ; AVX-NEXT: vpsrlq $7, %xmm0, %xmm0 1229 ; AVX-NEXT: retq 1230 ; 1231 ; XOP-LABEL: splatconstant_shift_v2i64: 1232 ; XOP: # %bb.0: 1233 ; XOP-NEXT: vpsrlq $7, %xmm0, %xmm0 1234 ; XOP-NEXT: retq 1235 ; 1236 ; AVX512-LABEL: splatconstant_shift_v2i64: 1237 ; AVX512: # %bb.0: 1238 ; AVX512-NEXT: vpsrlq $7, %xmm0, %xmm0 1239 ; AVX512-NEXT: retq 1240 ; 1241 ; AVX512VL-LABEL: splatconstant_shift_v2i64: 1242 ; AVX512VL: # %bb.0: 1243 ; AVX512VL-NEXT: vpsrlq $7, %xmm0, %xmm0 1244 ; AVX512VL-NEXT: retq 1245 ; 1246 ; X32-SSE-LABEL: splatconstant_shift_v2i64: 1247 ; X32-SSE: # %bb.0: 1248 ; X32-SSE-NEXT: psrlq $7, %xmm0 1249 ; X32-SSE-NEXT: retl 1250 %shift = lshr <2 x i64> %a, <i64 7, i64 7> 1251 ret <2 x i64> %shift 1252 } 1253 1254 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind { 1255 ; SSE-LABEL: splatconstant_shift_v4i32: 1256 ; SSE: # %bb.0: 1257 ; SSE-NEXT: psrld $5, %xmm0 1258 ; SSE-NEXT: retq 1259 ; 1260 ; AVX-LABEL: splatconstant_shift_v4i32: 1261 ; AVX: # %bb.0: 1262 ; AVX-NEXT: vpsrld $5, %xmm0, %xmm0 1263 ; AVX-NEXT: retq 1264 ; 1265 ; XOP-LABEL: splatconstant_shift_v4i32: 1266 ; XOP: # %bb.0: 1267 ; XOP-NEXT: vpsrld $5, %xmm0, %xmm0 1268 ; XOP-NEXT: retq 1269 ; 1270 ; AVX512-LABEL: splatconstant_shift_v4i32: 1271 ; AVX512: # %bb.0: 1272 ; AVX512-NEXT: vpsrld $5, %xmm0, %xmm0 1273 ; AVX512-NEXT: retq 1274 ; 1275 ; AVX512VL-LABEL: splatconstant_shift_v4i32: 1276 ; AVX512VL: # %bb.0: 1277 ; AVX512VL-NEXT: vpsrld $5, %xmm0, %xmm0 1278 ; AVX512VL-NEXT: retq 1279 ; 1280 ; X32-SSE-LABEL: splatconstant_shift_v4i32: 1281 ; X32-SSE: # %bb.0: 1282 ; X32-SSE-NEXT: psrld $5, %xmm0 1283 ; X32-SSE-NEXT: retl 1284 %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5> 1285 ret <4 x i32> %shift 1286 } 1287 1288 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind { 1289 ; SSE-LABEL: splatconstant_shift_v8i16: 1290 ; SSE: # %bb.0: 1291 ; SSE-NEXT: psrlw $3, %xmm0 1292 ; SSE-NEXT: retq 1293 ; 1294 ; AVX-LABEL: splatconstant_shift_v8i16: 1295 ; AVX: # %bb.0: 1296 ; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0 1297 ; AVX-NEXT: retq 1298 ; 1299 ; XOP-LABEL: splatconstant_shift_v8i16: 1300 ; XOP: # %bb.0: 1301 ; XOP-NEXT: vpsrlw $3, %xmm0, %xmm0 1302 ; XOP-NEXT: retq 1303 ; 1304 ; AVX512-LABEL: splatconstant_shift_v8i16: 1305 ; AVX512: # %bb.0: 1306 ; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0 1307 ; AVX512-NEXT: retq 1308 ; 1309 ; AVX512VL-LABEL: splatconstant_shift_v8i16: 1310 ; AVX512VL: # %bb.0: 1311 ; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0 1312 ; AVX512VL-NEXT: retq 1313 ; 1314 ; X32-SSE-LABEL: splatconstant_shift_v8i16: 1315 ; X32-SSE: # %bb.0: 1316 ; X32-SSE-NEXT: psrlw $3, %xmm0 1317 ; X32-SSE-NEXT: retl 1318 %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 1319 ret <8 x i16> %shift 1320 } 1321 1322 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind { 1323 ; SSE-LABEL: splatconstant_shift_v16i8: 1324 ; SSE: # %bb.0: 1325 ; SSE-NEXT: psrlw $3, %xmm0 1326 ; SSE-NEXT: pand {{.*}}(%rip), %xmm0 1327 ; SSE-NEXT: retq 1328 ; 1329 ; AVX-LABEL: splatconstant_shift_v16i8: 1330 ; AVX: # %bb.0: 1331 ; AVX-NEXT: vpsrlw $3, %xmm0, %xmm0 1332 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1333 ; AVX-NEXT: retq 1334 ; 1335 ; XOP-LABEL: splatconstant_shift_v16i8: 1336 ; XOP: # %bb.0: 1337 ; XOP-NEXT: vpshlb {{.*}}(%rip), %xmm0, %xmm0 1338 ; XOP-NEXT: retq 1339 ; 1340 ; AVX512-LABEL: splatconstant_shift_v16i8: 1341 ; AVX512: # %bb.0: 1342 ; AVX512-NEXT: vpsrlw $3, %xmm0, %xmm0 1343 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1344 ; AVX512-NEXT: retq 1345 ; 1346 ; AVX512VL-LABEL: splatconstant_shift_v16i8: 1347 ; AVX512VL: # %bb.0: 1348 ; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0 1349 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1350 ; AVX512VL-NEXT: retq 1351 ; 1352 ; X32-SSE-LABEL: splatconstant_shift_v16i8: 1353 ; X32-SSE: # %bb.0: 1354 ; X32-SSE-NEXT: psrlw $3, %xmm0 1355 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 1356 ; X32-SSE-NEXT: retl 1357 %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 1358 ret <16 x i8> %shift 1359 } 1360