Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
      2 
      3 ; AVX2 Logical Shift Left
      4 
      5 define <16 x i16> @test_sllw_1(<16 x i16> %InVec) {
      6 entry:
      7   %shl = shl <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
      8   ret <16 x i16> %shl
      9 }
     10 
     11 ; CHECK-LABEL: test_sllw_1:
     12 ; CHECK-NOT: vpsllw  $0, %ymm0, %ymm0
     13 ; CHECK: ret
     14 
     15 define <16 x i16> @test_sllw_2(<16 x i16> %InVec) {
     16 entry:
     17   %shl = shl <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
     18   ret <16 x i16> %shl
     19 }
     20 
     21 ; CHECK-LABEL: test_sllw_2:
     22 ; CHECK: vpaddw  %ymm0, %ymm0, %ymm0
     23 ; CHECK: ret
     24 
     25 define <16 x i16> @test_sllw_3(<16 x i16> %InVec) {
     26 entry:
     27   %shl = shl <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
     28   ret <16 x i16> %shl
     29 }
     30 
     31 ; CHECK-LABEL: test_sllw_3:
     32 ; CHECK: vpsllw $15, %ymm0, %ymm0
     33 ; CHECK: ret
     34 
     35 define <8 x i32> @test_slld_1(<8 x i32> %InVec) {
     36 entry:
     37   %shl = shl <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     38   ret <8 x i32> %shl
     39 }
     40 
     41 ; CHECK-LABEL: test_slld_1:
     42 ; CHECK-NOT: vpslld  $0, %ymm0, %ymm0
     43 ; CHECK: ret
     44 
     45 define <8 x i32> @test_slld_2(<8 x i32> %InVec) {
     46 entry:
     47   %shl = shl <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
     48   ret <8 x i32> %shl
     49 }
     50 
     51 ; CHECK-LABEL: test_slld_2:
     52 ; CHECK: vpaddd  %ymm0, %ymm0, %ymm0
     53 ; CHECK: ret
     54 
     55 define <8 x i32> @test_vpslld_var(i32 %shift) {
     56   %amt = insertelement <8 x i32> undef, i32 %shift, i32 0
     57   %tmp = shl <8 x i32> <i32 192, i32 193, i32 194, i32 195, i32 196, i32 197, i32 198, i32 199>, %amt
     58   ret <8 x i32> %tmp
     59 }
     60 
     61 ; CHECK-LABEL: test_vpslld_var:
     62 ; CHECK: vpslld %xmm0, %ymm1, %ymm0
     63 ; CHECK: ret
     64 
     65 define <8 x i32> @test_slld_3(<8 x i32> %InVec) {
     66 entry:
     67   %shl = shl <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
     68   ret <8 x i32> %shl
     69 }
     70 
     71 ; CHECK-LABEL: test_slld_3:
     72 ; CHECK: vpslld $31, %ymm0, %ymm0
     73 ; CHECK: ret
     74 
     75 define <4 x i64> @test_sllq_1(<4 x i64> %InVec) {
     76 entry:
     77   %shl = shl <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
     78   ret <4 x i64> %shl
     79 }
     80 
     81 ; CHECK-LABEL: test_sllq_1:
     82 ; CHECK-NOT: vpsllq  $0, %ymm0, %ymm0
     83 ; CHECK: ret
     84 
     85 define <4 x i64> @test_sllq_2(<4 x i64> %InVec) {
     86 entry:
     87   %shl = shl <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
     88   ret <4 x i64> %shl
     89 }
     90 
     91 ; CHECK-LABEL: test_sllq_2:
     92 ; CHECK: vpaddq  %ymm0, %ymm0, %ymm0
     93 ; CHECK: ret
     94 
     95 define <4 x i64> @test_sllq_3(<4 x i64> %InVec) {
     96 entry:
     97   %shl = shl <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
     98   ret <4 x i64> %shl
     99 }
    100 
    101 ; CHECK-LABEL: test_sllq_3:
    102 ; CHECK: vpsllq $63, %ymm0, %ymm0
    103 ; CHECK: ret
    104 
    105 ; AVX2 Arithmetic Shift
    106 
    107 define <16 x i16> @test_sraw_1(<16 x i16> %InVec) {
    108 entry:
    109   %shl = ashr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
    110   ret <16 x i16> %shl
    111 }
    112 
    113 ; CHECK-LABEL: test_sraw_1:
    114 ; CHECK-NOT: vpsraw  $0, %ymm0, %ymm0
    115 ; CHECK: ret
    116 
    117 define <16 x i16> @test_sraw_2(<16 x i16> %InVec) {
    118 entry:
    119   %shl = ashr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    120   ret <16 x i16> %shl
    121 }
    122 
    123 ; CHECK-LABEL: test_sraw_2:
    124 ; CHECK: vpsraw  $1, %ymm0, %ymm0
    125 ; CHECK: ret
    126 
    127 define <16 x i16> @test_sraw_3(<16 x i16> %InVec) {
    128 entry:
    129   %shl = ashr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    130   ret <16 x i16> %shl
    131 }
    132 
    133 ; CHECK-LABEL: test_sraw_3:
    134 ; CHECK: vpsraw  $15, %ymm0, %ymm0
    135 ; CHECK: ret
    136 
    137 define <8 x i32> @test_srad_1(<8 x i32> %InVec) {
    138 entry:
    139   %shl = ashr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    140   ret <8 x i32> %shl
    141 }
    142 
    143 ; CHECK-LABEL: test_srad_1:
    144 ; CHECK-NOT: vpsrad  $0, %ymm0, %ymm0
    145 ; CHECK: ret
    146 
    147 define <8 x i32> @test_srad_2(<8 x i32> %InVec) {
    148 entry:
    149   %shl = ashr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    150   ret <8 x i32> %shl
    151 }
    152 
    153 ; CHECK-LABEL: test_srad_2:
    154 ; CHECK: vpsrad  $1, %ymm0, %ymm0
    155 ; CHECK: ret
    156 
    157 define <8 x i32> @test_srad_3(<8 x i32> %InVec) {
    158 entry:
    159   %shl = ashr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
    160   ret <8 x i32> %shl
    161 }
    162 
    163 ; CHECK-LABEL: test_srad_3:
    164 ; CHECK: vpsrad  $31, %ymm0, %ymm0
    165 ; CHECK: ret
    166 
    167 ; SSE Logical Shift Right
    168 
    169 define <16 x i16> @test_srlw_1(<16 x i16> %InVec) {
    170 entry:
    171   %shl = lshr <16 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
    172   ret <16 x i16> %shl
    173 }
    174 
    175 ; CHECK-LABEL: test_srlw_1:
    176 ; CHECK-NOT: vpsrlw  $0, %ymm0, %ymm0
    177 ; CHECK: ret
    178 
    179 define <16 x i16> @test_srlw_2(<16 x i16> %InVec) {
    180 entry:
    181   %shl = lshr <16 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
    182   ret <16 x i16> %shl
    183 }
    184 
    185 ; CHECK-LABEL: test_srlw_2:
    186 ; CHECK: vpsrlw  $1, %ymm0, %ymm0
    187 ; CHECK: ret
    188 
    189 define <16 x i16> @test_srlw_3(<16 x i16> %InVec) {
    190 entry:
    191   %shl = lshr <16 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    192   ret <16 x i16> %shl
    193 }
    194 
    195 ; CHECK-LABEL: test_srlw_3:
    196 ; CHECK: vpsrlw $15, %ymm0, %ymm0
    197 ; CHECK: ret
    198 
    199 define <8 x i32> @test_srld_1(<8 x i32> %InVec) {
    200 entry:
    201   %shl = lshr <8 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    202   ret <8 x i32> %shl
    203 }
    204 
    205 ; CHECK-LABEL: test_srld_1:
    206 ; CHECK-NOT: vpsrld  $0, %ymm0, %ymm0
    207 ; CHECK: ret
    208 
    209 define <8 x i32> @test_srld_2(<8 x i32> %InVec) {
    210 entry:
    211   %shl = lshr <8 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    212   ret <8 x i32> %shl
    213 }
    214 
    215 ; CHECK-LABEL: test_srld_2:
    216 ; CHECK: vpsrld  $1, %ymm0, %ymm0
    217 ; CHECK: ret
    218 
    219 define <8 x i32> @test_srld_3(<8 x i32> %InVec) {
    220 entry:
    221   %shl = lshr <8 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
    222   ret <8 x i32> %shl
    223 }
    224 
    225 ; CHECK-LABEL: test_srld_3:
    226 ; CHECK: vpsrld $31, %ymm0, %ymm0
    227 ; CHECK: ret
    228 
    229 define <4 x i64> @test_srlq_1(<4 x i64> %InVec) {
    230 entry:
    231   %shl = lshr <4 x i64> %InVec, <i64 0, i64 0, i64 0, i64 0>
    232   ret <4 x i64> %shl
    233 }
    234 
    235 ; CHECK-LABEL: test_srlq_1:
    236 ; CHECK-NOT: vpsrlq  $0, %ymm0, %ymm0
    237 ; CHECK: ret
    238 
    239 define <4 x i64> @test_srlq_2(<4 x i64> %InVec) {
    240 entry:
    241   %shl = lshr <4 x i64> %InVec, <i64 1, i64 1, i64 1, i64 1>
    242   ret <4 x i64> %shl
    243 }
    244 
    245 ; CHECK-LABEL: test_srlq_2:
    246 ; CHECK: vpsrlq  $1, %ymm0, %ymm0
    247 ; CHECK: ret
    248 
    249 define <4 x i64> @test_srlq_3(<4 x i64> %InVec) {
    250 entry:
    251   %shl = lshr <4 x i64> %InVec, <i64 63, i64 63, i64 63, i64 63>
    252   ret <4 x i64> %shl
    253 }
    254 
    255 ; CHECK-LABEL: test_srlq_3:
    256 ; CHECK: vpsrlq $63, %ymm0, %ymm0
    257 ; CHECK: ret
    258 
    259 ; CHECK-LABEL: @srl_trunc_and_v4i64
    260 ; CHECK: vpand
    261 ; CHECK-NEXT: vpsrlvd
    262 ; CHECK: ret
    263 define <4 x i32> @srl_trunc_and_v4i64(<4 x i32> %x, <4 x i64> %y) nounwind {
    264   %and = and <4 x i64> %y, <i64 8, i64 8, i64 8, i64 8>
    265   %trunc = trunc <4 x i64> %and to <4 x i32>
    266   %sra = lshr <4 x i32> %x, %trunc
    267   ret <4 x i32> %sra
    268 }
    269 
    270 ;
    271 ; Vectorized byte shifts
    272 ;
    273 
    274 define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
    275 ; CHECK-LABEL:  shl_8i16
    276 ; CHECK:        vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    277 ; CHECK-NEXT:   vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    278 ; CHECK-NEXT:   vpsllvd %ymm1, %ymm0, %ymm0
    279 ; CHECK-NEXT:   vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
    280 ; CHECK-NEXT:   vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    281 ; CHECK:        retq
    282   %shl = shl <8 x i16> %r, %a
    283   ret <8 x i16> %shl
    284 }
    285 
    286 define <16 x i16> @shl_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
    287 ; CHECK-LABEL:  shl_16i16
    288 ; CHECK:        vpxor %ymm2, %ymm2, %ymm2
    289 ; CHECK-NEXT:   vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
    290 ; CHECK-NEXT:   vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
    291 ; CHECK-NEXT:   vpsllvd %ymm3, %ymm4, %ymm3
    292 ; CHECK-NEXT:   vpsrld $16, %ymm3, %ymm3
    293 ; CHECK-NEXT:   vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
    294 ; CHECK-NEXT:   vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
    295 ; CHECK-NEXT:   vpsllvd %ymm1, %ymm0, %ymm0
    296 ; CHECK-NEXT:   vpsrld $16, %ymm0, %ymm0
    297 ; CHECK-NEXT:   vpackusdw %ymm3, %ymm0, %ymm0
    298 ; CHECK-NEXT:   retq
    299   %shl = shl <16 x i16> %r, %a
    300   ret <16 x i16> %shl
    301 }
    302 
    303 define <32 x i8> @shl_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
    304 ; CHECK-LABEL:  shl_32i8
    305 ; CHECK:        vpsllw    $5, %ymm1, %ymm1
    306 ; CHECK-NEXT:   vpsllw    $4, %ymm0, %ymm2
    307 ; CHECK-NEXT:   vpand     {{.*}}(%rip), %ymm2, %ymm2
    308 ; CHECK-NEXT:   vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
    309 ; CHECK-NEXT:   vpsllw    $2, %ymm0, %ymm2
    310 ; CHECK-NEXT:   vpand     {{.*}}(%rip), %ymm2, %ymm2
    311 ; CHECK-NEXT:   vpaddb    %ymm1, %ymm1, %ymm1
    312 ; CHECK-NEXT:   vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
    313 ; CHECK-NEXT:   vpaddb    %ymm0, %ymm0, %ymm2
    314 ; CHECK-NEXT:   vpaddb    %ymm1, %ymm1, %ymm1
    315 ; CHECK-NEXT:   vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
    316 ; CHECK-NEXT:   retq
    317   %shl = shl <32 x i8> %r, %a
    318   ret <32 x i8> %shl
    319 }
    320 
    321 define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
    322 ; CHECK-LABEL:  ashr_8i16
    323 ; CHECK:        vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    324 ; CHECK-NEXT:   vpmovsxwd %xmm0, %ymm0
    325 ; CHECK-NEXT:   vpsravd %ymm1, %ymm0, %ymm0
    326 ; CHECK-NEXT:   vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
    327 ; CHECK-NEXT:   vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    328 ; CHECK:        retq
    329   %ashr = ashr <8 x i16> %r, %a
    330   ret <8 x i16> %ashr
    331 }
    332 
    333 define <16 x i16> @ashr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
    334 ; CHECK-LABEL:  ashr_16i16
    335 ; CHECK:        vpxor %ymm2, %ymm2, %ymm2
    336 ; CHECK-NEXT:   vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
    337 ; CHECK-NEXT:   vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
    338 ; CHECK-NEXT:   vpsravd %ymm3, %ymm4, %ymm3
    339 ; CHECK-NEXT:   vpsrld $16, %ymm3, %ymm3
    340 ; CHECK-NEXT:   vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
    341 ; CHECK-NEXT:   vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
    342 ; CHECK-NEXT:   vpsravd %ymm1, %ymm0, %ymm0
    343 ; CHECK-NEXT:   vpsrld $16, %ymm0, %ymm0
    344 ; CHECK-NEXT:   vpackusdw %ymm3, %ymm0, %ymm0
    345 ; CHECK-NEXT:   retq
    346   %ashr = ashr <16 x i16> %r, %a
    347   ret <16 x i16> %ashr
    348 }
    349 
    350 define <32 x i8> @ashr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
    351 ; CHECK-LABEL:  ashr_32i8
    352 ; CHECK:        vpsllw     $5, %ymm1, %ymm1
    353 ; CHECK-NEXT:   vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
    354 ; CHECK-NEXT:   vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
    355 ; CHECK-NEXT:   vpsraw     $4, %ymm3, %ymm4
    356 ; CHECK-NEXT:   vpblendvb  %ymm2, %ymm4, %ymm3, %ymm3
    357 ; CHECK-NEXT:   vpsraw     $2, %ymm3, %ymm4
    358 ; CHECK-NEXT:   vpaddw     %ymm2, %ymm2, %ymm2
    359 ; CHECK-NEXT:   vpblendvb  %ymm2, %ymm4, %ymm3, %ymm3
    360 ; CHECK-NEXT:   vpsraw     $1, %ymm3, %ymm4
    361 ; CHECK-NEXT:   vpaddw     %ymm2, %ymm2, %ymm2
    362 ; CHECK-NEXT:   vpblendvb  %ymm2, %ymm4, %ymm3, %ymm2
    363 ; CHECK-NEXT:   vpsrlw     $8, %ymm2, %ymm2
    364 ; CHECK-NEXT:   vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
    365 ; CHECK-NEXT:   vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
    366 ; CHECK-NEXT:   vpsraw     $4, %ymm0, %ymm3
    367 ; CHECK-NEXT:   vpblendvb  %ymm1, %ymm3, %ymm0, %ymm0
    368 ; CHECK-NEXT:   vpsraw     $2, %ymm0, %ymm3
    369 ; CHECK-NEXT:   vpaddw     %ymm1, %ymm1, %ymm1
    370 ; CHECK-NEXT:   vpblendvb  %ymm1, %ymm3, %ymm0, %ymm0
    371 ; CHECK-NEXT:   vpsraw     $1, %ymm0, %ymm3
    372 ; CHECK-NEXT:   vpaddw     %ymm1, %ymm1, %ymm1
    373 ; CHECK-NEXT:   vpblendvb  %ymm1, %ymm3, %ymm0, %ymm0
    374 ; CHECK-NEXT:   vpsrlw     $8, %ymm0, %ymm0
    375 ; CHECK-NEXT:   vpackuswb  %ymm2, %ymm0, %ymm0
    376 ; CHECK-NEXT:   retq
    377   %ashr = ashr <32 x i8> %r, %a
    378   ret <32 x i8> %ashr
    379 }
    380 
    381 define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
    382 ; CHECK-LABEL:  lshr_8i16
    383 ; CHECK:        vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    384 ; CHECK-NEXT:   vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    385 ; CHECK-NEXT:   vpsrlvd %ymm1, %ymm0, %ymm0
    386 ; CHECK-NEXT:   vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
    387 ; CHECK-NEXT:   vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    388 ; CHECK:        retq
    389   %lshr = lshr <8 x i16> %r, %a
    390   ret <8 x i16> %lshr
    391 }
    392 
    393 define <16 x i16> @lshr_16i16(<16 x i16> %r, <16 x i16> %a) nounwind {
    394 ; CHECK-LABEL:  lshr_16i16
    395 ; CHECK:        vpxor %ymm2, %ymm2, %ymm2
    396 ; CHECK-NEXT:   vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
    397 ; CHECK-NEXT:   vpunpckhwd {{.*#+}} ymm4 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
    398 ; CHECK-NEXT:   vpsrlvd %ymm3, %ymm4, %ymm3
    399 ; CHECK-NEXT:   vpsrld $16, %ymm3, %ymm3
    400 ; CHECK-NEXT:   vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
    401 ; CHECK-NEXT:   vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
    402 ; CHECK-NEXT:   vpsrlvd %ymm1, %ymm0, %ymm0
    403 ; CHECK-NEXT:   vpsrld $16, %ymm0, %ymm0
    404 ; CHECK-NEXT:   vpackusdw %ymm3, %ymm0, %ymm0
    405 ; CHECK-NEXT:   retq
    406   %lshr = lshr <16 x i16> %r, %a
    407   ret <16 x i16> %lshr
    408 }
    409 
    410 define <32 x i8> @lshr_32i8(<32 x i8> %r, <32 x i8> %a) nounwind {
    411 ; CHECK-LABEL:  lshr_32i8
    412 ; CHECK:        vpsllw    $5, %ymm1, %ymm1
    413 ; CHECK-NEXT:   vpsrlw    $4, %ymm0, %ymm2
    414 ; CHECK-NEXT:   vpand     {{.*}}(%rip), %ymm2, %ymm2
    415 ; CHECK-NEXT:   vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
    416 ; CHECK-NEXT:   vpsrlw    $2, %ymm0, %ymm2
    417 ; CHECK-NEXT:   vpand     {{.*}}(%rip), %ymm2, %ymm2
    418 ; CHECK-NEXT:   vpaddb    %ymm1, %ymm1, %ymm1
    419 ; CHECK-NEXT:   vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
    420 ; CHECK-NEXT:   vpsrlw    $1, %ymm0, %ymm2
    421 ; CHECK-NEXT:   vpand     {{.*}}(%rip), %ymm2, %ymm2
    422 ; CHECK-NEXT:   vpaddb    %ymm1, %ymm1, %ymm1
    423 ; CHECK-NEXT:   vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
    424 ; CHECK-NEXT:   retq
    425   %lshr = lshr <32 x i8> %r, %a
    426   ret <32 x i8> %lshr
    427 }
    428