1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s 2 3 ; AVX2 Logical Shift Left 4 5 define <16 x i16> @test_sllw_1(<16 x i16> %InVec) { 6 entry: 7 %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 8 ret <16 x i16> %shl 9 } 10 11 ; CHECK-LABEL: test_sllw_1: 12 ; CHECK-NOT: vpsllw $0, %ymm0, %ymm0 13 ; CHECK: ret 14 15 define <16 x i16> @test_sllw_2(<16 x i16> %InVec) { 16 entry: 17 %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 18 ret <16 x i16> %shl 19 } 20 21 ; CHECK-LABEL: test_sllw_2: 22 ; CHECK: vpaddw %ymm0, %ymm0, %ymm0 23 ; CHECK: ret 24 25 define <16 x i16> @test_sllw_3(<16 x i16> %InVec) { 26 entry: 27 %shl = shl <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 28 ret <16 x i16> %shl 29 } 30 31 ; CHECK-LABEL: test_sllw_3: 32 ; CHECK: vpsllw $15, %ymm0, %ymm0 33 ; CHECK: ret 34 35 define <8 x i32> @test_slld_1(<8 x i32> %InVec) { 36 entry: 37 %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 38 ret <8 x i32> %shl 39 } 40 41 ; CHECK-LABEL: test_slld_1: 42 ; CHECK-NOT: vpslld $0, %ymm0, %ymm0 43 ; CHECK: ret 44 45 define <8 x i32> @test_slld_2(<8 x i32> %InVec) { 46 entry: 47 %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 48 ret <8 x i32> %shl 49 } 50 51 ; CHECK-LABEL: test_slld_2: 52 ; CHECK: vpaddd %ymm0, %ymm0, %ymm0 53 ; CHECK: ret 54 55 define <8 x i32> @test_vpslld_var(i32 %shift) { 56 %amt = insertelement <8 x i32> undef, i32 %shift, i32 0 57 %tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt 58 ret <8 x i32> %tmp 59 } 60 61 ; CHECK-LABEL: test_vpslld_var: 62 ; CHECK: vpslld %xmm0, %ymm1, %ymm0 63 ; CHECK: ret 64 65 define <8 x i32> @test_slld_3(<8 x i32> %InVec) { 66 entry: 67 %shl = shl <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 68 ret <8 x i32> %shl 69 } 70 71 ; CHECK-LABEL: test_slld_3: 72 ; CHECK: vpslld $31, %ymm0, %ymm0 73 ; CHECK: ret 74 75 define <4 x i64> @test_sllq_1(<4 x i64> %InVec) { 76 entry: 77 %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0> 78 ret <4 x i64> %shl 79 } 80 81 ; CHECK-LABEL: test_sllq_1: 82 ; CHECK-NOT: vpsllq $0, %ymm0, %ymm0 83 ; CHECK: ret 84 85 define <4 x i64> @test_sllq_2(<4 x i64> %InVec) { 86 entry: 87 %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1> 88 ret <4 x i64> %shl 89 } 90 91 ; CHECK-LABEL: test_sllq_2: 92 ; CHECK: vpaddq %ymm0, %ymm0, %ymm0 93 ; CHECK: ret 94 95 define <4 x i64> @test_sllq_3(<4 x i64> %InVec) { 96 entry: 97 %shl = shl <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63> 98 ret <4 x i64> %shl 99 } 100 101 ; CHECK-LABEL: test_sllq_3: 102 ; CHECK: vpsllq $63, %ymm0, %ymm0 103 ; CHECK: ret 104 105 ; AVX2 Arithmetic Shift 106 107 define <16 x i16> @test_sraw_1(<16 x i16> %InVec) { 108 entry: 109 %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 110 ret <16 x i16> %shl 111 } 112 113 ; CHECK-LABEL: test_sraw_1: 114 ; CHECK-NOT: vpsraw $0, %ymm0, %ymm0 115 ; CHECK: ret 116 117 define <16 x i16> @test_sraw_2(<16 x i16> %InVec) { 118 entry: 119 %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 120 ret <16 x i16> %shl 121 } 122 123 ; CHECK-LABEL: test_sraw_2: 124 ; CHECK: vpsraw $1, %ymm0, %ymm0 125 ; CHECK: ret 126 127 define <16 x i16> @test_sraw_3(<16 x i16> %InVec) { 128 entry: 129 %shl = ashr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 130 ret <16 x i16> %shl 131 } 132 133 ; CHECK-LABEL: test_sraw_3: 134 ; CHECK: vpsraw $15, %ymm0, %ymm0 135 ; CHECK: ret 136 137 define <8 x i32> @test_srad_1(<8 x i32> %InVec) { 138 entry: 139 %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 140 ret <8 x i32> %shl 141 } 142 143 ; CHECK-LABEL: test_srad_1: 144 ; CHECK-NOT: vpsrad $0, %ymm0, %ymm0 145 ; CHECK: ret 146 147 define <8 x i32> @test_srad_2(<8 x i32> %InVec) { 148 entry: 149 %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 150 ret <8 x i32> %shl 151 } 152 153 ; CHECK-LABEL: test_srad_2: 154 ; CHECK: vpsrad $1, %ymm0, %ymm0 155 ; CHECK: ret 156 157 define <8 x i32> @test_srad_3(<8 x i32> %InVec) { 158 entry: 159 %shl = ashr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 160 ret <8 x i32> %shl 161 } 162 163 ; CHECK-LABEL: test_srad_3: 164 ; CHECK: vpsrad $31, %ymm0, %ymm0 165 ; CHECK: ret 166 167 ; SSE Logical Shift Right 168 169 define <16 x i16> @test_srlw_1(<16 x i16> %InVec) { 170 entry: 171 %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 172 ret <16 x i16> %shl 173 } 174 175 ; CHECK-LABEL: test_srlw_1: 176 ; CHECK-NOT: vpsrlw $0, %ymm0, %ymm0 177 ; CHECK: ret 178 179 define <16 x i16> @test_srlw_2(<16 x i16> %InVec) { 180 entry: 181 %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 182 ret <16 x i16> %shl 183 } 184 185 ; CHECK-LABEL: test_srlw_2: 186 ; CHECK: vpsrlw $1, %ymm0, %ymm0 187 ; CHECK: ret 188 189 define <16 x i16> @test_srlw_3(<16 x i16> %InVec) { 190 entry: 191 %shl = lshr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 192 ret <16 x i16> %shl 193 } 194 195 ; CHECK-LABEL: test_srlw_3: 196 ; CHECK: vpsrlw $15, %ymm0, %ymm0 197 ; CHECK: ret 198 199 define <8 x i32> @test_srld_1(<8 x i32> %InVec) { 200 entry: 201 %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 202 ret <8 x i32> %shl 203 } 204 205 ; CHECK-LABEL: test_srld_1: 206 ; CHECK-NOT: vpsrld $0, %ymm0, %ymm0 207 ; CHECK: ret 208 209 define <8 x i32> @test_srld_2(<8 x i32> %InVec) { 210 entry: 211 %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 212 ret <8 x i32> %shl 213 } 214 215 ; CHECK-LABEL: test_srld_2: 216 ; CHECK: vpsrld $1, %ymm0, %ymm0 217 ; CHECK: ret 218 219 define <8 x i32> @test_srld_3(<8 x i32> %InVec) { 220 entry: 221 %shl = lshr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31> 222 ret <8 x i32> %shl 223 } 224 225 ; CHECK-LABEL: test_srld_3: 226 ; CHECK: vpsrld $31, %ymm0, %ymm0 227 ; CHECK: ret 228 229 define <4 x i64> @test_srlq_1(<4 x i64> %InVec) { 230 entry: 231 %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0> 232 ret <4 x i64> %shl 233 } 234 235 ; CHECK-LABEL: test_srlq_1: 236 ; CHECK-NOT: vpsrlq $0, %ymm0, %ymm0 237 ; CHECK: ret 238 239 define <4 x i64> @test_srlq_2(<4 x i64> %InVec) { 240 entry: 241 %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1> 242 ret <4 x i64> %shl 243 } 244 245 ; CHECK-LABEL: test_srlq_2: 246 ; CHECK: vpsrlq $1, %ymm0, %ymm0 247 ; CHECK: ret 248 249 define <4 x i64> @test_srlq_3(<4 x i64> %InVec) { 250 entry: 251 %shl = lshr <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63> 252 ret <4 x i64> %shl 253 } 254 255 ; CHECK-LABEL: test_srlq_3: 256 ; CHECK: vpsrlq $63, %ymm0, %ymm0 257 ; CHECK: ret 258 259 ; CHECK-LABEL: @srl_trunc_and_v4i64 260 ; CHECK: vpand 261 ; CHECK-NEXT: vpsrlvd 262 ; CHECK: ret 263 define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind { 264 %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8> 265 %trunc = trunc <4 x i64> %and to <4 x i32> 266 %sra = lshr <4 x i32> %x, %trunc 267 ret <4 x i32> %sra 268 } 269 270 ; 271 ; Vectorized byte shifts 272 ; 273 274 define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { 275 ; CHECK-LABEL: shl_8i16 276 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 277 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 278 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 279 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 280 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 281 ; CHECK: retq 282 %shl = shl <8 x i16> %r, %a 283 ret <8 x i16> %shl 284 } 285 286 define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { 287 ; CHECK-LABEL: shl_16i16 288 ; CHECK: vpxor %ymm2, %ymm2, %ymm2 289 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 290 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] 291 ; CHECK-NEXT: vpsllvd %ymm3, %ymm4, %ymm3 292 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3 293 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 294 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] 295 ; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 296 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0 297 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 298 ; CHECK-NEXT: retq 299 %shl = shl <16 x i16> %r, %a 300 ret <16 x i16> %shl 301 } 302 303 define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { 304 ; CHECK-LABEL: shl_32i8 305 ; CHECK: vpsllw $5, %ymm1, %ymm1 306 ; CHECK-NEXT: vpsllw $4, %ymm0, %ymm2 307 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 308 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 309 ; CHECK-NEXT: vpsllw $2, %ymm0, %ymm2 310 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 311 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 312 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 313 ; CHECK-NEXT: vpaddb %ymm0, %ymm0, %ymm2 314 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 315 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 316 ; CHECK-NEXT: retq 317 %shl = shl <32 x i8> %r, %a 318 ret <32 x i8> %shl 319 } 320 321 define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { 322 ; CHECK-LABEL: ashr_8i16 323 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 324 ; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 325 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 326 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 327 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 328 ; CHECK: retq 329 %ashr = ashr <8 x i16> %r, %a 330 ret <8 x i16> %ashr 331 } 332 333 define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { 334 ; CHECK-LABEL: ashr_16i16 335 ; CHECK: vpxor %ymm2, %ymm2, %ymm2 336 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 337 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] 338 ; CHECK-NEXT: vpsravd %ymm3, %ymm4, %ymm3 339 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3 340 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 341 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] 342 ; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 343 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0 344 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 345 ; CHECK-NEXT: retq 346 %ashr = ashr <16 x i16> %r, %a 347 ret <16 x i16> %ashr 348 } 349 350 define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { 351 ; CHECK-LABEL: ashr_32i8 352 ; CHECK: vpsllw $5, %ymm1, %ymm1 353 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] 354 ; CHECK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] 355 ; CHECK-NEXT: vpsraw $4, %ymm3, %ymm4 356 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 357 ; CHECK-NEXT: vpsraw $2, %ymm3, %ymm4 358 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 359 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 360 ; CHECK-NEXT: vpsraw $1, %ymm3, %ymm4 361 ; CHECK-NEXT: vpaddw %ymm2, %ymm2, %ymm2 362 ; CHECK-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 363 ; CHECK-NEXT: vpsrlw $8, %ymm2, %ymm2 364 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] 365 ; CHECK-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] 366 ; CHECK-NEXT: vpsraw $4, %ymm0, %ymm3 367 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 368 ; CHECK-NEXT: vpsraw $2, %ymm0, %ymm3 369 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 370 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 371 ; CHECK-NEXT: vpsraw $1, %ymm0, %ymm3 372 ; CHECK-NEXT: vpaddw %ymm1, %ymm1, %ymm1 373 ; CHECK-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 374 ; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0 375 ; CHECK-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 376 ; CHECK-NEXT: retq 377 %ashr = ashr <32 x i8> %r, %a 378 ret <32 x i8> %ashr 379 } 380 381 define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind { 382 ; CHECK-LABEL: lshr_8i16 383 ; CHECK: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 384 ; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 385 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 386 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero 387 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 388 ; CHECK: retq 389 %lshr = lshr <8 x i16> %r, %a 390 ret <8 x i16> %lshr 391 } 392 393 define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind { 394 ; CHECK-LABEL: lshr_16i16 395 ; CHECK: vpxor %ymm2, %ymm2, %ymm2 396 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] 397 ; CHECK-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15] 398 ; CHECK-NEXT: vpsrlvd %ymm3, %ymm4, %ymm3 399 ; CHECK-NEXT: vpsrld $16, %ymm3, %ymm3 400 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] 401 ; CHECK-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11] 402 ; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 403 ; CHECK-NEXT: vpsrld $16, %ymm0, %ymm0 404 ; CHECK-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 405 ; CHECK-NEXT: retq 406 %lshr = lshr <16 x i16> %r, %a 407 ret <16 x i16> %lshr 408 } 409 410 define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind { 411 ; CHECK-LABEL: lshr_32i8 412 ; CHECK: vpsllw $5, %ymm1, %ymm1 413 ; CHECK-NEXT: vpsrlw $4, %ymm0, %ymm2 414 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 415 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 416 ; CHECK-NEXT: vpsrlw $2, %ymm0, %ymm2 417 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 418 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 419 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 420 ; CHECK-NEXT: vpsrlw $1, %ymm0, %ymm2 421 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 422 ; CHECK-NEXT: vpaddb %ymm1, %ymm1, %ymm1 423 ; CHECK-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 424 ; CHECK-NEXT: retq 425 %lshr = lshr <32 x i8> %r, %a 426 ret <32 x i8> %lshr 427 } 428