Home | History | Annotate | Download | only in InstCombine
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt < %s -instcombine -S | FileCheck %s
      3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      4 
      5 ;
      6 ; ASHR - Immediate
      7 ;
      8 
      9 define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
     10 ; CHECK-LABEL: @sse2_psrai_w_0(
     11 ; CHECK-NEXT:    ret <8 x i16> %v
     12 ;
     13   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
     14   ret <8 x i16> %1
     15 }
     16 
     17 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
     18 ; CHECK-LABEL: @sse2_psrai_w_15(
     19 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
     20 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
     21 ;
     22   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
     23   ret <8 x i16> %1
     24 }
     25 
     26 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
     27 ; CHECK-LABEL: @sse2_psrai_w_64(
     28 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
     29 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
     30 ;
     31   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
     32   ret <8 x i16> %1
     33 }
     34 
     35 define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
     36 ; CHECK-LABEL: @sse2_psrai_d_0(
     37 ; CHECK-NEXT:    ret <4 x i32> %v
     38 ;
     39   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
     40   ret <4 x i32> %1
     41 }
     42 
     43 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
     44 ; CHECK-LABEL: @sse2_psrai_d_15(
     45 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
     46 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
     47 ;
     48   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
     49   ret <4 x i32> %1
     50 }
     51 
     52 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
     53 ; CHECK-LABEL: @sse2_psrai_d_64(
     54 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
     55 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
     56 ;
     57   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
     58   ret <4 x i32> %1
     59 }
     60 
     61 define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
     62 ; CHECK-LABEL: @avx2_psrai_w_0(
     63 ; CHECK-NEXT:    ret <16 x i16> %v
     64 ;
     65   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
     66   ret <16 x i16> %1
     67 }
     68 
     69 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
     70 ; CHECK-LABEL: @avx2_psrai_w_15(
     71 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
     72 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
     73 ;
     74   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
     75   ret <16 x i16> %1
     76 }
     77 
     78 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
     79 ; CHECK-LABEL: @avx2_psrai_w_64(
     80 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
     81 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
     82 ;
     83   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
     84   ret <16 x i16> %1
     85 }
     86 
     87 define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
     88 ; CHECK-LABEL: @avx2_psrai_d_0(
     89 ; CHECK-NEXT:    ret <8 x i32> %v
     90 ;
     91   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
     92   ret <8 x i32> %1
     93 }
     94 
     95 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
     96 ; CHECK-LABEL: @avx2_psrai_d_15(
     97 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
     98 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
     99 ;
    100   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
    101   ret <8 x i32> %1
    102 }
    103 
    104 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
    105 ; CHECK-LABEL: @avx2_psrai_d_64(
    106 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
    107 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    108 ;
    109   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
    110   ret <8 x i32> %1
    111 }
    112 
    113 ;
    114 ; LSHR - Immediate
    115 ;
    116 
    117 define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
    118 ; CHECK-LABEL: @sse2_psrli_w_0(
    119 ; CHECK-NEXT:    ret <8 x i16> %v
    120 ;
    121   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
    122   ret <8 x i16> %1
    123 }
    124 
    125 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
    126 ; CHECK-LABEL: @sse2_psrli_w_15(
    127 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    128 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
    129 ;
    130   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
    131   ret <8 x i16> %1
    132 }
    133 
    134 define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
    135 ; CHECK-LABEL: @sse2_psrli_w_64(
    136 ; CHECK-NEXT:    ret <8 x i16> zeroinitializer
    137 ;
    138   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
    139   ret <8 x i16> %1
    140 }
    141 
    142 define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
    143 ; CHECK-LABEL: @sse2_psrli_d_0(
    144 ; CHECK-NEXT:    ret <4 x i32> %v
    145 ;
    146   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
    147   ret <4 x i32> %1
    148 }
    149 
    150 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
    151 ; CHECK-LABEL: @sse2_psrli_d_15(
    152 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
    153 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    154 ;
    155   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
    156   ret <4 x i32> %1
    157 }
    158 
    159 define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
    160 ; CHECK-LABEL: @sse2_psrli_d_64(
    161 ; CHECK-NEXT:    ret <4 x i32> zeroinitializer
    162 ;
    163   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
    164   ret <4 x i32> %1
    165 }
    166 
    167 define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
    168 ; CHECK-LABEL: @sse2_psrli_q_0(
    169 ; CHECK-NEXT:    ret <2 x i64> %v
    170 ;
    171   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
    172   ret <2 x i64> %1
    173 }
    174 
    175 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
    176 ; CHECK-LABEL: @sse2_psrli_q_15(
    177 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
    178 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    179 ;
    180   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
    181   ret <2 x i64> %1
    182 }
    183 
    184 define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
    185 ; CHECK-LABEL: @sse2_psrli_q_64(
    186 ; CHECK-NEXT:    ret <2 x i64> zeroinitializer
    187 ;
    188   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
    189   ret <2 x i64> %1
    190 }
    191 
    192 define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
    193 ; CHECK-LABEL: @avx2_psrli_w_0(
    194 ; CHECK-NEXT:    ret <16 x i16> %v
    195 ;
    196   %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
    197   ret <16 x i16> %1
    198 }
    199 
    200 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
    201 ; CHECK-LABEL: @avx2_psrli_w_15(
    202 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    203 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
    204 ;
    205   %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
    206   ret <16 x i16> %1
    207 }
    208 
    209 define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
    210 ; CHECK-LABEL: @avx2_psrli_w_64(
    211 ; CHECK-NEXT:    ret <16 x i16> zeroinitializer
    212 ;
    213   %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
    214   ret <16 x i16> %1
    215 }
    216 
    217 define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
    218 ; CHECK-LABEL: @avx2_psrli_d_0(
    219 ; CHECK-NEXT:    ret <8 x i32> %v
    220 ;
    221   %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
    222   ret <8 x i32> %1
    223 }
    224 
    225 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
    226 ; CHECK-LABEL: @avx2_psrli_d_15(
    227 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
    228 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    229 ;
    230   %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
    231   ret <8 x i32> %1
    232 }
    233 
    234 define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
    235 ; CHECK-LABEL: @avx2_psrli_d_64(
    236 ; CHECK-NEXT:    ret <8 x i32> zeroinitializer
    237 ;
    238   %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
    239   ret <8 x i32> %1
    240 }
    241 
    242 define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
    243 ; CHECK-LABEL: @avx2_psrli_q_0(
    244 ; CHECK-NEXT:    ret <4 x i64> %v
    245 ;
    246   %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
    247   ret <4 x i64> %1
    248 }
    249 
    250 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
    251 ; CHECK-LABEL: @avx2_psrli_q_15(
    252 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
    253 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
    254 ;
    255   %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
    256   ret <4 x i64> %1
    257 }
    258 
    259 define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
    260 ; CHECK-LABEL: @avx2_psrli_q_64(
    261 ; CHECK-NEXT:    ret <4 x i64> zeroinitializer
    262 ;
    263   %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
    264   ret <4 x i64> %1
    265 }
    266 
    267 ;
    268 ; SHL - Immediate
    269 ;
    270 
    271 define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
    272 ; CHECK-LABEL: @sse2_pslli_w_0(
    273 ; CHECK-NEXT:    ret <8 x i16> %v
    274 ;
    275   %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
    276   ret <8 x i16> %1
    277 }
    278 
    279 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
    280 ; CHECK-LABEL: @sse2_pslli_w_15(
    281 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    282 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
    283 ;
    284   %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
    285   ret <8 x i16> %1
    286 }
    287 
    288 define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
    289 ; CHECK-LABEL: @sse2_pslli_w_64(
    290 ; CHECK-NEXT:    ret <8 x i16> zeroinitializer
    291 ;
    292   %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
    293   ret <8 x i16> %1
    294 }
    295 
    296 define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
    297 ; CHECK-LABEL: @sse2_pslli_d_0(
    298 ; CHECK-NEXT:    ret <4 x i32> %v
    299 ;
    300   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
    301   ret <4 x i32> %1
    302 }
    303 
    304 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
    305 ; CHECK-LABEL: @sse2_pslli_d_15(
    306 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
    307 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    308 ;
    309   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
    310   ret <4 x i32> %1
    311 }
    312 
    313 define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
    314 ; CHECK-LABEL: @sse2_pslli_d_64(
    315 ; CHECK-NEXT:    ret <4 x i32> zeroinitializer
    316 ;
    317   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
    318   ret <4 x i32> %1
    319 }
    320 
    321 define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
    322 ; CHECK-LABEL: @sse2_pslli_q_0(
    323 ; CHECK-NEXT:    ret <2 x i64> %v
    324 ;
    325   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
    326   ret <2 x i64> %1
    327 }
    328 
    329 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
    330 ; CHECK-LABEL: @sse2_pslli_q_15(
    331 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
    332 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    333 ;
    334   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
    335   ret <2 x i64> %1
    336 }
    337 
    338 define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
    339 ; CHECK-LABEL: @sse2_pslli_q_64(
    340 ; CHECK-NEXT:    ret <2 x i64> zeroinitializer
    341 ;
    342   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
    343   ret <2 x i64> %1
    344 }
    345 
    346 define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
    347 ; CHECK-LABEL: @avx2_pslli_w_0(
    348 ; CHECK-NEXT:    ret <16 x i16> %v
    349 ;
    350   %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
    351   ret <16 x i16> %1
    352 }
    353 
    354 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
    355 ; CHECK-LABEL: @avx2_pslli_w_15(
    356 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    357 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
    358 ;
    359   %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
    360   ret <16 x i16> %1
    361 }
    362 
    363 define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
    364 ; CHECK-LABEL: @avx2_pslli_w_64(
    365 ; CHECK-NEXT:    ret <16 x i16> zeroinitializer
    366 ;
    367   %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
    368   ret <16 x i16> %1
    369 }
    370 
    371 define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
    372 ; CHECK-LABEL: @avx2_pslli_d_0(
    373 ; CHECK-NEXT:    ret <8 x i32> %v
    374 ;
    375   %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
    376   ret <8 x i32> %1
    377 }
    378 
    379 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
    380 ; CHECK-LABEL: @avx2_pslli_d_15(
    381 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
    382 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    383 ;
    384   %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
    385   ret <8 x i32> %1
    386 }
    387 
    388 define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
    389 ; CHECK-LABEL: @avx2_pslli_d_64(
    390 ; CHECK-NEXT:    ret <8 x i32> zeroinitializer
    391 ;
    392   %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
    393   ret <8 x i32> %1
    394 }
    395 
    396 define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
    397 ; CHECK-LABEL: @avx2_pslli_q_0(
    398 ; CHECK-NEXT:    ret <4 x i64> %v
    399 ;
    400   %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
    401   ret <4 x i64> %1
    402 }
    403 
    404 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
    405 ; CHECK-LABEL: @avx2_pslli_q_15(
    406 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
    407 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
    408 ;
    409   %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
    410   ret <4 x i64> %1
    411 }
    412 
    413 define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
    414 ; CHECK-LABEL: @avx2_pslli_q_64(
    415 ; CHECK-NEXT:    ret <4 x i64> zeroinitializer
    416 ;
    417   %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
    418   ret <4 x i64> %1
    419 }
    420 
    421 ;
    422 ; ASHR - Constant Vector
    423 ;
    424 
    425 define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
    426 ; CHECK-LABEL: @sse2_psra_w_0(
    427 ; CHECK-NEXT:    ret <8 x i16> %v
    428 ;
    429   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
    430   ret <8 x i16> %1
    431 }
    432 
    433 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
    434 ; CHECK-LABEL: @sse2_psra_w_15(
    435 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    436 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
    437 ;
    438   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    439   ret <8 x i16> %1
    440 }
    441 
    442 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
    443 ; CHECK-LABEL: @sse2_psra_w_15_splat(
    444 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    445 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
    446 ;
    447   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
    448   ret <8 x i16> %1
    449 }
    450 
    451 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
    452 ; CHECK-LABEL: @sse2_psra_w_64(
    453 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    454 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
    455 ;
    456   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    457   ret <8 x i16> %1
    458 }
    459 
    460 define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
    461 ; CHECK-LABEL: @sse2_psra_d_0(
    462 ; CHECK-NEXT:    ret <4 x i32> %v
    463 ;
    464   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
    465   ret <4 x i32> %1
    466 }
    467 
    468 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
    469 ; CHECK-LABEL: @sse2_psra_d_15(
    470 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
    471 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    472 ;
    473   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
    474   ret <4 x i32> %1
    475 }
    476 
    477 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
    478 ; CHECK-LABEL: @sse2_psra_d_15_splat(
    479 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
    480 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    481 ;
    482   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
    483   ret <4 x i32> %1
    484 }
    485 
    486 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
    487 ; CHECK-LABEL: @sse2_psra_d_64(
    488 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
    489 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    490 ;
    491   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
    492   ret <4 x i32> %1
    493 }
    494 
    495 define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
    496 ; CHECK-LABEL: @avx2_psra_w_0(
    497 ; CHECK-NEXT:    ret <16 x i16> %v
    498 ;
    499   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
    500   ret <16 x i16> %1
    501 }
    502 
    503 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
    504 ; CHECK-LABEL: @avx2_psra_w_15(
    505 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    506 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
    507 ;
    508   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    509   ret <16 x i16> %1
    510 }
    511 
    512 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
    513 ; CHECK-LABEL: @avx2_psra_w_15_splat(
    514 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    515 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
    516 ;
    517   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
    518   ret <16 x i16> %1
    519 }
    520 
    521 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
    522 ; CHECK-LABEL: @avx2_psra_w_64(
    523 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    524 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
    525 ;
    526   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    527   ret <16 x i16> %1
    528 }
    529 
    530 define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
    531 ; CHECK-LABEL: @avx2_psra_d_0(
    532 ; CHECK-NEXT:    ret <8 x i32> %v
    533 ;
    534   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
    535   ret <8 x i32> %1
    536 }
    537 
    538 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
    539 ; CHECK-LABEL: @avx2_psra_d_15(
    540 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
    541 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    542 ;
    543   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
    544   ret <8 x i32> %1
    545 }
    546 
    547 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
    548 ; CHECK-LABEL: @avx2_psra_d_15_splat(
    549 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
    550 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    551 ;
    552   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
    553   ret <8 x i32> %1
    554 }
    555 
    556 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
    557 ; CHECK-LABEL: @avx2_psra_d_64(
    558 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
    559 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    560 ;
    561   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
    562   ret <8 x i32> %1
    563 }
    564 
    565 ;
    566 ; LSHR - Constant Vector
    567 ;
    568 
    569 define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
    570 ; CHECK-LABEL: @sse2_psrl_w_0(
    571 ; CHECK-NEXT:    ret <8 x i16> %v
    572 ;
    573   %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
    574   ret <8 x i16> %1
    575 }
    576 
    577 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
    578 ; CHECK-LABEL: @sse2_psrl_w_15(
    579 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    580 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
    581 ;
    582   %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    583   ret <8 x i16> %1
    584 }
    585 
    586 define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
    587 ; CHECK-LABEL: @sse2_psrl_w_15_splat(
    588 ; CHECK-NEXT:    ret <8 x i16> zeroinitializer
    589 ;
    590   %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
    591   ret <8 x i16> %1
    592 }
    593 
    594 define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
    595 ; CHECK-LABEL: @sse2_psrl_w_64(
    596 ; CHECK-NEXT:    ret <8 x i16> zeroinitializer
    597 ;
    598   %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    599   ret <8 x i16> %1
    600 }
    601 
    602 define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
    603 ; CHECK-LABEL: @sse2_psrl_d_0(
    604 ; CHECK-NEXT:    ret <4 x i32> %v
    605 ;
    606   %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
    607   ret <4 x i32> %1
    608 }
    609 
    610 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
    611 ; CHECK-LABEL: @sse2_psrl_d_15(
    612 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
    613 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    614 ;
    615   %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
    616   ret <4 x i32> %1
    617 }
    618 
    619 define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
    620 ; CHECK-LABEL: @sse2_psrl_d_15_splat(
    621 ; CHECK-NEXT:    ret <4 x i32> zeroinitializer
    622 ;
    623   %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
    624   ret <4 x i32> %1
    625 }
    626 
    627 define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
    628 ; CHECK-LABEL: @sse2_psrl_d_64(
    629 ; CHECK-NEXT:    ret <4 x i32> zeroinitializer
    630 ;
    631   %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
    632   ret <4 x i32> %1
    633 }
    634 
    635 define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
    636 ; CHECK-LABEL: @sse2_psrl_q_0(
    637 ; CHECK-NEXT:    ret <2 x i64> %v
    638 ;
    639   %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
    640   ret <2 x i64> %1
    641 }
    642 
    643 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
    644 ; CHECK-LABEL: @sse2_psrl_q_15(
    645 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
    646 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    647 ;
    648   %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
    649   ret <2 x i64> %1
    650 }
    651 
    652 define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
    653 ; CHECK-LABEL: @sse2_psrl_q_64(
    654 ; CHECK-NEXT:    ret <2 x i64> zeroinitializer
    655 ;
    656   %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
    657   ret <2 x i64> %1
    658 }
    659 
    660 define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
    661 ; CHECK-LABEL: @avx2_psrl_w_0(
    662 ; CHECK-NEXT:    ret <16 x i16> %v
    663 ;
    664   %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
    665   ret <16 x i16> %1
    666 }
    667 
    668 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
    669 ; CHECK-LABEL: @avx2_psrl_w_15(
    670 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    671 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
    672 ;
    673   %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    674   ret <16 x i16> %1
    675 }
    676 
    677 define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
    678 ; CHECK-LABEL: @avx2_psrl_w_15_splat(
    679 ; CHECK-NEXT:    ret <16 x i16> zeroinitializer
    680 ;
    681   %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
    682   ret <16 x i16> %1
    683 }
    684 
    685 define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
    686 ; CHECK-LABEL: @avx2_psrl_w_64(
    687 ; CHECK-NEXT:    ret <16 x i16> zeroinitializer
    688 ;
    689   %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    690   ret <16 x i16> %1
    691 }
    692 
    693 define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
    694 ; CHECK-LABEL: @avx2_psrl_d_0(
    695 ; CHECK-NEXT:    ret <8 x i32> %v
    696 ;
    697   %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
    698   ret <8 x i32> %1
    699 }
    700 
    701 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
    702 ; CHECK-LABEL: @avx2_psrl_d_15(
    703 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
    704 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    705 ;
    706   %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
    707   ret <8 x i32> %1
    708 }
    709 
    710 define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
    711 ; CHECK-LABEL: @avx2_psrl_d_15_splat(
    712 ; CHECK-NEXT:    ret <8 x i32> zeroinitializer
    713 ;
    714   %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
    715   ret <8 x i32> %1
    716 }
    717 
    718 define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
    719 ; CHECK-LABEL: @avx2_psrl_d_64(
    720 ; CHECK-NEXT:    ret <8 x i32> zeroinitializer
    721 ;
    722   %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
    723   ret <8 x i32> %1
    724 }
    725 
    726 define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
    727 ; CHECK-LABEL: @avx2_psrl_q_0(
    728 ; CHECK-NEXT:    ret <4 x i64> %v
    729 ;
    730   %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
    731   ret <4 x i64> %1
    732 }
    733 
    734 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
    735 ; CHECK-LABEL: @avx2_psrl_q_15(
    736 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
    737 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
    738 ;
    739   %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
    740   ret <4 x i64> %1
    741 }
    742 
    743 define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
    744 ; CHECK-LABEL: @avx2_psrl_q_64(
    745 ; CHECK-NEXT:    ret <4 x i64> zeroinitializer
    746 ;
    747   %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
    748   ret <4 x i64> %1
    749 }
    750 
    751 ;
    752 ; SHL - Constant Vector
    753 ;
    754 
    755 define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
    756 ; CHECK-LABEL: @sse2_psll_w_0(
    757 ; CHECK-NEXT:    ret <8 x i16> %v
    758 ;
    759   %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
    760   ret <8 x i16> %1
    761 }
    762 
    763 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
    764 ; CHECK-LABEL: @sse2_psll_w_15(
    765 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    766 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
    767 ;
    768   %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    769   ret <8 x i16> %1
    770 }
    771 
    772 define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
    773 ; CHECK-LABEL: @sse2_psll_w_15_splat(
    774 ; CHECK-NEXT:    ret <8 x i16> zeroinitializer
    775 ;
    776   %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
    777   ret <8 x i16> %1
    778 }
    779 
    780 define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
    781 ; CHECK-LABEL: @sse2_psll_w_64(
    782 ; CHECK-NEXT:    ret <8 x i16> zeroinitializer
    783 ;
    784   %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    785   ret <8 x i16> %1
    786 }
    787 
    788 define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
    789 ; CHECK-LABEL: @sse2_psll_d_0(
    790 ; CHECK-NEXT:    ret <4 x i32> %v
    791 ;
    792   %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
    793   ret <4 x i32> %1
    794 }
    795 
    796 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
    797 ; CHECK-LABEL: @sse2_psll_d_15(
    798 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
    799 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    800 ;
    801   %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
    802   ret <4 x i32> %1
    803 }
    804 
    805 define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
    806 ; CHECK-LABEL: @sse2_psll_d_15_splat(
    807 ; CHECK-NEXT:    ret <4 x i32> zeroinitializer
    808 ;
    809   %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
    810   ret <4 x i32> %1
    811 }
    812 
    813 define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
    814 ; CHECK-LABEL: @sse2_psll_d_64(
    815 ; CHECK-NEXT:    ret <4 x i32> zeroinitializer
    816 ;
    817   %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
    818   ret <4 x i32> %1
    819 }
    820 
    821 define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
    822 ; CHECK-LABEL: @sse2_psll_q_0(
    823 ; CHECK-NEXT:    ret <2 x i64> %v
    824 ;
    825   %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
    826   ret <2 x i64> %1
    827 }
    828 
    829 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
    830 ; CHECK-LABEL: @sse2_psll_q_15(
    831 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
    832 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
    833 ;
    834   %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
    835   ret <2 x i64> %1
    836 }
    837 
    838 define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
    839 ; CHECK-LABEL: @sse2_psll_q_64(
    840 ; CHECK-NEXT:    ret <2 x i64> zeroinitializer
    841 ;
    842   %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
    843   ret <2 x i64> %1
    844 }
    845 
    846 define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
    847 ; CHECK-LABEL: @avx2_psll_w_0(
    848 ; CHECK-NEXT:    ret <16 x i16> %v
    849 ;
    850   %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
    851   ret <16 x i16> %1
    852 }
    853 
    854 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
    855 ; CHECK-LABEL: @avx2_psll_w_15(
    856 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
    857 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
    858 ;
    859   %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    860   ret <16 x i16> %1
    861 }
    862 
    863 define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
    864 ; CHECK-LABEL: @avx2_psll_w_15_splat(
    865 ; CHECK-NEXT:    ret <16 x i16> zeroinitializer
    866 ;
    867   %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
    868   ret <16 x i16> %1
    869 }
    870 
    871 define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
    872 ; CHECK-LABEL: @avx2_psll_w_64(
    873 ; CHECK-NEXT:    ret <16 x i16> zeroinitializer
    874 ;
    875   %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
    876   ret <16 x i16> %1
    877 }
    878 
    879 define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
    880 ; CHECK-LABEL: @avx2_psll_d_0(
    881 ; CHECK-NEXT:    ret <8 x i32> %v
    882 ;
    883   %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
    884   ret <8 x i32> %1
    885 }
    886 
    887 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
    888 ; CHECK-LABEL: @avx2_psll_d_15(
    889 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
    890 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    891 ;
    892   %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
    893   ret <8 x i32> %1
    894 }
    895 
    896 define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
    897 ; CHECK-LABEL: @avx2_psll_d_15_splat(
    898 ; CHECK-NEXT:    ret <8 x i32> zeroinitializer
    899 ;
    900   %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
    901   ret <8 x i32> %1
    902 }
    903 
    904 define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
    905 ; CHECK-LABEL: @avx2_psll_d_64(
    906 ; CHECK-NEXT:    ret <8 x i32> zeroinitializer
    907 ;
    908   %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
    909   ret <8 x i32> %1
    910 }
    911 
    912 define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
    913 ; CHECK-LABEL: @avx2_psll_q_0(
    914 ; CHECK-NEXT:    ret <4 x i64> %v
    915 ;
    916   %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
    917   ret <4 x i64> %1
    918 }
    919 
    920 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
    921 ; CHECK-LABEL: @avx2_psll_q_15(
    922 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
    923 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
    924 ;
    925   %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
    926   ret <4 x i64> %1
    927 }
    928 
    929 define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
    930 ; CHECK-LABEL: @avx2_psll_q_64(
    931 ; CHECK-NEXT:    ret <4 x i64> zeroinitializer
    932 ;
    933   %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
    934   ret <4 x i64> %1
    935 }
    936 
    937 ;
    938 ; ASHR - Constant Per-Element Vector
    939 ;
    940 
    941 define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) {
    942 ; CHECK-LABEL: @avx2_psrav_d_128_0(
    943 ; CHECK-NEXT:    ret <4 x i32> %v
    944 ;
    945   %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer)
    946   ret <4 x i32> %1
    947 }
    948 
    949 define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) {
    950 ; CHECK-LABEL: @avx2_psrav_d_256_0(
    951 ; CHECK-NEXT:    ret <8 x i32> %v
    952 ;
    953   %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
    954   ret <8 x i32> %1
    955 }
    956 
    957 define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
    958 ; CHECK-LABEL: @avx2_psrav_d_128_var(
    959 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
    960 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    961 ;
    962   %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
    963   ret <4 x i32> %1
    964 }
    965 
    966 define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) {
    967 ; CHECK-LABEL: @avx2_psrav_d_256_var(
    968 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
    969 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    970 ;
    971   %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
    972   ret <8 x i32> %1
    973 }
    974 
    975 define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
    976 ; CHECK-LABEL: @avx2_psrav_d_128_allbig(
    977 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 undef>
    978 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    979 ;
    980   %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
    981   ret <4 x i32> %1
    982 }
    983 
    984 define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) {
    985 ; CHECK-LABEL: @avx2_psrav_d_256_allbig(
    986 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
    987 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
    988 ;
    989   %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
    990   ret <8 x i32> %1
    991 }
    992 
    993 define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
    994 ; CHECK-LABEL: @avx2_psrav_d_128_undef(
    995 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
    996 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
    997 ;
    998   %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0
    999   %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
   1000   ret <4 x i32> %2
   1001 }
   1002 
   1003 define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) {
   1004 ; CHECK-LABEL: @avx2_psrav_d_256_undef(
   1005 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
   1006 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1007 ;
   1008   %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
   1009   %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
   1010   ret <8 x i32> %2
   1011 }
   1012 
   1013 ;
   1014 ; LSHR - Constant Per-Element Vector
   1015 ;
   1016 
   1017 define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) {
   1018 ; CHECK-LABEL: @avx2_psrlv_d_128_0(
   1019 ; CHECK-NEXT:    ret <4 x i32> %v
   1020 ;
   1021   %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
   1022   ret <4 x i32> %1
   1023 }
   1024 
   1025 define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) {
   1026 ; CHECK-LABEL: @avx2_psrlv_d_256_0(
   1027 ; CHECK-NEXT:    ret <8 x i32> %v
   1028 ;
   1029   %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
   1030   ret <8 x i32> %1
   1031 }
   1032 
   1033 define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) {
   1034 ; CHECK-LABEL: @avx2_psrlv_d_128_var(
   1035 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
   1036 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
   1037 ;
   1038   %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
   1039   ret <4 x i32> %1
   1040 }
   1041 
   1042 define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) {
   1043 ; CHECK-LABEL: @avx2_psrlv_d_256_var(
   1044 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
   1045 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1046 ;
   1047   %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
   1048   ret <8 x i32> %1
   1049 }
   1050 
   1051 define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) {
   1052 ; CHECK-LABEL: @avx2_psrlv_d_128_big(
   1053 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
   1054 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
   1055 ;
   1056   %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
   1057   ret <4 x i32> %1
   1058 }
   1059 
   1060 define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) {
   1061 ; CHECK-LABEL: @avx2_psrlv_d_256_big(
   1062 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
   1063 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1064 ;
   1065   %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
   1066   ret <8 x i32> %1
   1067 }
   1068 
   1069 define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) {
   1070 ; CHECK-LABEL: @avx2_psrlv_d_128_allbig(
   1071 ; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
   1072 ;
   1073   %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
   1074   ret <4 x i32> %1
   1075 }
   1076 
   1077 define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) {
   1078 ; CHECK-LABEL: @avx2_psrlv_d_256_allbig(
   1079 ; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1080 ;
   1081   %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
   1082   ret <8 x i32> %1
   1083 }
   1084 
   1085 define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) {
   1086 ; CHECK-LABEL: @avx2_psrlv_d_128_undef(
   1087 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
   1088 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
   1089 ;
   1090   %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
   1091   %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1)
   1092   ret <4 x i32> %2
   1093 }
   1094 
   1095 define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) {
   1096 ; CHECK-LABEL: @avx2_psrlv_d_256_undef(
   1097 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
   1098 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1099 ;
   1100   %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
   1101   %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
   1102   ret <8 x i32> %2
   1103 }
   1104 
   1105 define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) {
   1106 ; CHECK-LABEL: @avx2_psrlv_q_128_0(
   1107 ; CHECK-NEXT:    ret <2 x i64> %v
   1108 ;
   1109   %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
   1110   ret <2 x i64> %1
   1111 }
   1112 
   1113 define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) {
   1114 ; CHECK-LABEL: @avx2_psrlv_q_256_0(
   1115 ; CHECK-NEXT:    ret <4 x i64> %v
   1116 ;
   1117   %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
   1118   ret <4 x i64> %1
   1119 }
   1120 
   1121 define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) {
   1122 ; CHECK-LABEL: @avx2_psrlv_q_128_var(
   1123 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 8>
   1124 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
   1125 ;
   1126   %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
   1127   ret <2 x i64> %1
   1128 }
   1129 
   1130 define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) {
   1131 ; CHECK-LABEL: @avx2_psrlv_q_256_var(
   1132 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
   1133 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
   1134 ;
   1135   %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
   1136   ret <4 x i64> %1
   1137 }
   1138 
   1139 define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) {
   1140 ; CHECK-LABEL: @avx2_psrlv_q_128_big(
   1141 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
   1142 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
   1143 ;
   1144   %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
   1145   ret <2 x i64> %1
   1146 }
   1147 
   1148 define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) {
   1149 ; CHECK-LABEL: @avx2_psrlv_q_256_big(
   1150 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
   1151 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
   1152 ;
   1153   %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
   1154   ret <4 x i64> %1
   1155 }
   1156 
   1157 define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) {
   1158 ; CHECK-LABEL: @avx2_psrlv_q_128_allbig(
   1159 ; CHECK-NEXT:    ret <2 x i64> zeroinitializer
   1160 ;
   1161   %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
   1162   ret <2 x i64> %1
   1163 }
   1164 
   1165 define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) {
   1166 ; CHECK-LABEL: @avx2_psrlv_q_256_allbig(
   1167 ; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
   1168 ;
   1169   %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
   1170   ret <4 x i64> %1
   1171 }
   1172 
   1173 define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) {
   1174 ; CHECK-LABEL: @avx2_psrlv_q_128_undef(
   1175 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 undef>
   1176 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
   1177 ;
   1178   %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
   1179   %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
   1180   ret <2 x i64> %2
   1181 }
   1182 
   1183 define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) {
   1184 ; CHECK-LABEL: @avx2_psrlv_q_256_undef(
   1185 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
   1186 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
   1187 ;
   1188   %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
   1189   %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1)
   1190   ret <4 x i64> %2
   1191 }
   1192 
   1193 ;
   1194 ; SHL - Constant Per-Element Vector
   1195 ;
   1196 
   1197 define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) {
   1198 ; CHECK-LABEL: @avx2_psllv_d_128_0(
   1199 ; CHECK-NEXT:    ret <4 x i32> %v
   1200 ;
   1201   %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
   1202   ret <4 x i32> %1
   1203 }
   1204 
   1205 define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) {
   1206 ; CHECK-LABEL: @avx2_psllv_d_256_0(
   1207 ; CHECK-NEXT:    ret <8 x i32> %v
   1208 ;
   1209   %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
   1210   ret <8 x i32> %1
   1211 }
   1212 
   1213 define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) {
   1214 ; CHECK-LABEL: @avx2_psllv_d_128_var(
   1215 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
   1216 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
   1217 ;
   1218   %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
   1219   ret <4 x i32> %1
   1220 }
   1221 
   1222 define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) {
   1223 ; CHECK-LABEL: @avx2_psllv_d_256_var(
   1224 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
   1225 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1226 ;
   1227   %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
   1228   ret <8 x i32> %1
   1229 }
   1230 
   1231 define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) {
   1232 ; CHECK-LABEL: @avx2_psllv_d_128_big(
   1233 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
   1234 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
   1235 ;
   1236   %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
   1237   ret <4 x i32> %1
   1238 }
   1239 
   1240 define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) {
   1241 ; CHECK-LABEL: @avx2_psllv_d_256_big(
   1242 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
   1243 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1244 ;
   1245   %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
   1246   ret <8 x i32> %1
   1247 }
   1248 
   1249 define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) {
   1250 ; CHECK-LABEL: @avx2_psllv_d_128_allbig(
   1251 ; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
   1252 ;
   1253   %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
   1254   ret <4 x i32> %1
   1255 }
   1256 
   1257 define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) {
   1258 ; CHECK-LABEL: @avx2_psllv_d_256_allbig(
   1259 ; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1260 ;
   1261   %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
   1262   ret <8 x i32> %1
   1263 }
   1264 
   1265 define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) {
   1266 ; CHECK-LABEL: @avx2_psllv_d_128_undef(
   1267 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
   1268 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
   1269 ;
   1270   %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
   1271   %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
   1272   ret <4 x i32> %2
   1273 }
   1274 
   1275 define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) {
   1276 ; CHECK-LABEL: @avx2_psllv_d_256_undef(
   1277 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
   1278 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1279 ;
   1280   %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
   1281   %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1)
   1282   ret <8 x i32> %2
   1283 }
   1284 
   1285 define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) {
   1286 ; CHECK-LABEL: @avx2_psllv_q_128_0(
   1287 ; CHECK-NEXT:    ret <2 x i64> %v
   1288 ;
   1289   %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
   1290   ret <2 x i64> %1
   1291 }
   1292 
   1293 define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) {
   1294 ; CHECK-LABEL: @avx2_psllv_q_256_0(
   1295 ; CHECK-NEXT:    ret <4 x i64> %v
   1296 ;
   1297   %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
   1298   ret <4 x i64> %1
   1299 }
   1300 
   1301 define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) {
   1302 ; CHECK-LABEL: @avx2_psllv_q_128_var(
   1303 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 8>
   1304 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
   1305 ;
   1306   %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
   1307   ret <2 x i64> %1
   1308 }
   1309 
   1310 define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) {
   1311 ; CHECK-LABEL: @avx2_psllv_q_256_var(
   1312 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
   1313 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
   1314 ;
   1315   %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
   1316   ret <4 x i64> %1
   1317 }
   1318 
   1319 define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) {
   1320 ; CHECK-LABEL: @avx2_psllv_q_128_big(
   1321 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
   1322 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
   1323 ;
   1324   %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
   1325   ret <2 x i64> %1
   1326 }
   1327 
   1328 define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) {
   1329 ; CHECK-LABEL: @avx2_psllv_q_256_big(
   1330 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
   1331 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
   1332 ;
   1333   %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
   1334   ret <4 x i64> %1
   1335 }
   1336 
   1337 define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) {
   1338 ; CHECK-LABEL: @avx2_psllv_q_128_allbig(
   1339 ; CHECK-NEXT:    ret <2 x i64> zeroinitializer
   1340 ;
   1341   %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
   1342   ret <2 x i64> %1
   1343 }
   1344 
   1345 define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) {
   1346 ; CHECK-LABEL: @avx2_psllv_q_256_allbig(
   1347 ; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
   1348 ;
   1349   %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
   1350   ret <4 x i64> %1
   1351 }
   1352 
   1353 define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) {
   1354 ; CHECK-LABEL: @avx2_psllv_q_128_undef(
   1355 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 undef>
   1356 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
   1357 ;
   1358   %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
   1359   %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1)
   1360   ret <2 x i64> %2
   1361 }
   1362 
   1363 define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) {
   1364 ; CHECK-LABEL: @avx2_psllv_q_256_undef(
   1365 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
   1366 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
   1367 ;
   1368   %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
   1369   %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
   1370   ret <4 x i64> %2
   1371 }
   1372 
   1373 ;
   1374 ; Vector Demanded Bits
   1375 ;
   1376 
   1377 define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
   1378 ; CHECK-LABEL: @sse2_psra_w_var(
   1379 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
   1380 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
   1381 ;
   1382   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1383   %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
   1384   ret <8 x i16> %2
   1385 }
   1386 
   1387 define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
   1388 ; CHECK-LABEL: @sse2_psra_w_var_bc(
   1389 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <8 x i16>
   1390 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> [[TMP1]])
   1391 ; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
   1392 ;
   1393   %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1394   %2 = bitcast <2 x i64> %1 to <8 x i16>
   1395   %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
   1396   ret <8 x i16> %3
   1397 }
   1398 
   1399 define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
   1400 ; CHECK-LABEL: @sse2_psra_d_var(
   1401 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
   1402 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
   1403 ;
   1404   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1405   %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
   1406   ret <4 x i32> %2
   1407 }
   1408 
   1409 define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
   1410 ; CHECK-LABEL: @sse2_psra_d_var_bc(
   1411 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> %a to <4 x i32>
   1412 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> [[TMP1]])
   1413 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
   1414 ;
   1415   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1416   %2 = bitcast <8 x i16> %1 to <4 x i32>
   1417   %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
   1418   ret <4 x i32> %3
   1419 }
   1420 
   1421 define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
   1422 ; CHECK-LABEL: @avx2_psra_w_var(
   1423 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
   1424 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
   1425 ;
   1426   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1427   %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
   1428   ret <16 x i16> %2
   1429 }
   1430 
   1431 define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
   1432 ; CHECK-LABEL: @avx2_psra_d_var(
   1433 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
   1434 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1435 ;
   1436   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1437   %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
   1438   ret <8 x i32> %2
   1439 }
   1440 
   1441 define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
   1442 ; CHECK-LABEL: @sse2_psrl_w_var(
   1443 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
   1444 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
   1445 ;
   1446   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1447   %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
   1448   ret <8 x i16> %2
   1449 }
   1450 
   1451 define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
   1452 ; CHECK-LABEL: @sse2_psrl_d_var(
   1453 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
   1454 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
   1455 ;
   1456   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1457   %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
   1458   ret <4 x i32> %2
   1459 }
   1460 
   1461 define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
   1462 ; CHECK-LABEL: @sse2_psrl_q_var(
   1463 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
   1464 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
   1465 ;
   1466   %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1467   %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
   1468   ret <2 x i64> %2
   1469 }
   1470 
   1471 define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
   1472 ; CHECK-LABEL: @avx2_psrl_w_var(
   1473 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
   1474 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
   1475 ;
   1476   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1477   %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
   1478   ret <16 x i16> %2
   1479 }
   1480 
   1481 define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
   1482 ; CHECK-LABEL: @avx2_psrl_w_var_bc(
   1483 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16>
   1484 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> [[TMP1]])
   1485 ; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
   1486 ;
   1487   %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1488   %2 = bitcast <16 x i8> %1 to <8 x i16>
   1489   %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
   1490   ret <16 x i16> %3
   1491 }
   1492 
   1493 define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
   1494 ; CHECK-LABEL: @avx2_psrl_d_var(
   1495 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
   1496 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1497 ;
   1498   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1499   %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
   1500   ret <8 x i32> %2
   1501 }
   1502 
   1503 define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
   1504 ; CHECK-LABEL: @avx2_psrl_d_var_bc(
   1505 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
   1506 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> [[TMP1]])
   1507 ; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
   1508 ;
   1509   %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1510   %2 = bitcast <2 x i64> %1 to <4 x i32>
   1511   %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
   1512   ret <8 x i32> %3
   1513 }
   1514 
   1515 define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
   1516 ; CHECK-LABEL: @avx2_psrl_q_var(
   1517 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
   1518 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
   1519 ;
   1520   %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1521   %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
   1522   ret <4 x i64> %2
   1523 }
   1524 
   1525 define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
   1526 ; CHECK-LABEL: @sse2_psll_w_var(
   1527 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
   1528 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
   1529 ;
   1530   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1531   %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
   1532   ret <8 x i16> %2
   1533 }
   1534 
   1535 define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
   1536 ; CHECK-LABEL: @sse2_psll_d_var(
   1537 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
   1538 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
   1539 ;
   1540   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1541   %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
   1542   ret <4 x i32> %2
   1543 }
   1544 
   1545 define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
   1546 ; CHECK-LABEL: @sse2_psll_q_var(
   1547 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
   1548 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
   1549 ;
   1550   %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1551   %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
   1552   ret <2 x i64> %2
   1553 }
   1554 
   1555 define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
   1556 ; CHECK-LABEL: @avx2_psll_w_var(
   1557 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
   1558 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
   1559 ;
   1560   %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1561   %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
   1562   ret <16 x i16> %2
   1563 }
   1564 
   1565 define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
   1566 ; CHECK-LABEL: @avx2_psll_d_var(
   1567 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
   1568 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
   1569 ;
   1570   %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1571   %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
   1572   ret <8 x i32> %2
   1573 }
   1574 
   1575 define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
   1576 ; CHECK-LABEL: @avx2_psll_q_var(
   1577 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
   1578 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
   1579 ;
   1580   %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1581   %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
   1582   ret <4 x i64> %2
   1583 }
   1584 
   1585 ;
   1586 ; Constant Folding
   1587 ;
   1588 
   1589 define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
   1590 ; CHECK-LABEL: @test_sse2_psra_w_0(
   1591 ; CHECK-NEXT:    ret <8 x i16> %A
   1592 ;
   1593   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
   1594   %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
   1595   %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
   1596   ret <8 x i16> %3
   1597 }
   1598 
   1599 define <8 x i16> @test_sse2_psra_w_8() {
   1600 ; CHECK-LABEL: @test_sse2_psra_w_8(
   1601 ; CHECK-NEXT:    ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
   1602 ;
   1603   %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
   1604   %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
   1605   %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
   1606   %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
   1607   ret <8 x i16> %4
   1608 }
   1609 
   1610 define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
   1611 ; CHECK-LABEL: @test_sse2_psra_d_0(
   1612 ; CHECK-NEXT:    ret <4 x i32> %A
   1613 ;
   1614   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
   1615   %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
   1616   %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
   1617   ret <4 x i32> %3
   1618 }
   1619 
   1620 define <4 x i32> @sse2_psra_d_8() {
   1621 ; CHECK-LABEL: @sse2_psra_d_8(
   1622 ; CHECK-NEXT:    ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
   1623 ;
   1624   %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
   1625   %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
   1626   %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
   1627   %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
   1628   ret <4 x i32> %4
   1629 }
   1630 
   1631 define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
   1632 ; CHECK-LABEL: @test_avx2_psra_w_0(
   1633 ; CHECK-NEXT:    ret <16 x i16> %A
   1634 ;
   1635   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
   1636   %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
   1637   %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
   1638   ret <16 x i16> %3
   1639 }
   1640 
   1641 define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
   1642 ; CHECK-LABEL: @test_avx2_psra_w_8(
   1643 ; CHECK-NEXT:    ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
   1644 ;
   1645   %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
   1646   %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
   1647   %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
   1648   %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
   1649   ret <16 x i16> %4
   1650 }
   1651 
   1652 define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
   1653 ; CHECK-LABEL: @test_avx2_psra_d_0(
   1654 ; CHECK-NEXT:    ret <8 x i32> %A
   1655 ;
   1656   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
   1657   %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
   1658   %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
   1659   ret <8 x i32> %3
   1660 }
   1661 
   1662 define <8 x i32> @test_avx2_psra_d_8() {
   1663 ; CHECK-LABEL: @test_avx2_psra_d_8(
   1664 ; CHECK-NEXT:    ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
   1665 ;
   1666   %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
   1667   %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
   1668   %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
   1669   %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
   1670   ret <8 x i32> %4
   1671 }
   1672 
   1673 ;
   1674 ; Old Tests
   1675 ;
   1676 
   1677 define <2 x i64> @test_sse2_1() {
   1678 ; CHECK-LABEL: @test_sse2_1(
   1679 ; CHECK-NEXT:    ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
   1680 ;
   1681   %S = bitcast i32 1 to i32
   1682   %1 = zext i32 %S to i64
   1683   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
   1684   %3 = insertelement <2 x i64> %2, i64 0, i32 1
   1685   %4 = bitcast <2 x i64> %3 to <8 x i16>
   1686   %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
   1687   %6 = bitcast <8 x i16> %5 to <4 x i32>
   1688   %7 = bitcast <2 x i64> %3 to <4 x i32>
   1689   %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
   1690   %9 = bitcast <4 x i32> %8 to <2 x i64>
   1691   %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
   1692   %11 = bitcast <2 x i64> %10 to <8 x i16>
   1693   %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
   1694   %13 = bitcast <8 x i16> %12 to <4 x i32>
   1695   %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
   1696   %15 = bitcast <4 x i32> %14 to <2 x i64>
   1697   %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
   1698   ret <2 x i64> %16
   1699 }
   1700 
   1701 define <4 x i64> @test_avx2_1() {
   1702 ; CHECK-LABEL: @test_avx2_1(
   1703 ; CHECK-NEXT:    ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
   1704 ;
   1705   %S = bitcast i32 1 to i32
   1706   %1 = zext i32 %S to i64
   1707   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
   1708   %3 = insertelement <2 x i64> %2, i64 0, i32 1
   1709   %4 = bitcast <2 x i64> %3 to <8 x i16>
   1710   %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
   1711   %6 = bitcast <16 x i16> %5 to <8 x i32>
   1712   %7 = bitcast <2 x i64> %3 to <4 x i32>
   1713   %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
   1714   %9 = bitcast <8 x i32> %8 to <4 x i64>
   1715   %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
   1716   %11 = bitcast <4 x i64> %10 to <16 x i16>
   1717   %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
   1718   %13 = bitcast <16 x i16> %12 to <8 x i32>
   1719   %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
   1720   %15 = bitcast <8 x i32> %14 to <4 x i64>
   1721   %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
   1722   ret <4 x i64> %16
   1723 }
   1724 
   1725 define <2 x i64> @test_sse2_0() {
   1726 ; CHECK-LABEL: @test_sse2_0(
   1727 ; CHECK-NEXT:    ret <2 x i64> zeroinitializer
   1728 ;
   1729   %S = bitcast i32 128 to i32
   1730   %1 = zext i32 %S to i64
   1731   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
   1732   %3 = insertelement <2 x i64> %2, i64 0, i32 1
   1733   %4 = bitcast <2 x i64> %3 to <8 x i16>
   1734   %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
   1735   %6 = bitcast <8 x i16> %5 to <4 x i32>
   1736   %7 = bitcast <2 x i64> %3 to <4 x i32>
   1737   %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
   1738   %9 = bitcast <4 x i32> %8 to <2 x i64>
   1739   %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
   1740   %11 = bitcast <2 x i64> %10 to <8 x i16>
   1741   %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
   1742   %13 = bitcast <8 x i16> %12 to <4 x i32>
   1743   %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
   1744   %15 = bitcast <4 x i32> %14 to <2 x i64>
   1745   %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
   1746   ret <2 x i64> %16
   1747 }
   1748 
   1749 define <4 x i64> @test_avx2_0() {
   1750 ; CHECK-LABEL: @test_avx2_0(
   1751 ; CHECK-NEXT:    ret <4 x i64> zeroinitializer
   1752 ;
   1753   %S = bitcast i32 128 to i32
   1754   %1 = zext i32 %S to i64
   1755   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
   1756   %3 = insertelement <2 x i64> %2, i64 0, i32 1
   1757   %4 = bitcast <2 x i64> %3 to <8 x i16>
   1758   %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
   1759   %6 = bitcast <16 x i16> %5 to <8 x i32>
   1760   %7 = bitcast <2 x i64> %3 to <4 x i32>
   1761   %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
   1762   %9 = bitcast <8 x i32> %8 to <4 x i64>
   1763   %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
   1764   %11 = bitcast <4 x i64> %10 to <16 x i16>
   1765   %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
   1766   %13 = bitcast <16 x i16> %12 to <8 x i32>
   1767   %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
   1768   %15 = bitcast <8 x i32> %14 to <4 x i64>
   1769   %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
   1770   ret <4 x i64> %16
   1771 }
   1772 define <2 x i64> @test_sse2_psrl_1() {
   1773 ; CHECK-LABEL: @test_sse2_psrl_1(
   1774 ; CHECK-NEXT:    ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
   1775 ;
   1776   %S = bitcast i32 1 to i32
   1777   %1 = zext i32 %S to i64
   1778   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
   1779   %3 = insertelement <2 x i64> %2, i64 0, i32 1
   1780   %4 = bitcast <2 x i64> %3 to <8 x i16>
   1781   %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
   1782   %6 = bitcast <8 x i16> %5 to <4 x i32>
   1783   %7 = bitcast <2 x i64> %3 to <4 x i32>
   1784   %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
   1785   %9 = bitcast <4 x i32> %8 to <2 x i64>
   1786   %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
   1787   %11 = bitcast <2 x i64> %10 to <8 x i16>
   1788   %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
   1789   %13 = bitcast <8 x i16> %12 to <4 x i32>
   1790   %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
   1791   %15 = bitcast <4 x i32> %14 to <2 x i64>
   1792   %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
   1793   ret <2 x i64> %16
   1794 }
   1795 
   1796 define <4 x i64> @test_avx2_psrl_1() {
   1797 ; CHECK-LABEL: @test_avx2_psrl_1(
   1798 ; CHECK-NEXT:    ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
   1799 ;
   1800   %S = bitcast i32 1 to i32
   1801   %1 = zext i32 %S to i64
   1802   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
   1803   %3 = insertelement <2 x i64> %2, i64 0, i32 1
   1804   %4 = bitcast <2 x i64> %3 to <8 x i16>
   1805   %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
   1806   %6 = bitcast <16 x i16> %5 to <8 x i32>
   1807   %7 = bitcast <2 x i64> %3 to <4 x i32>
   1808   %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
   1809   %9 = bitcast <8 x i32> %8 to <4 x i64>
   1810   %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
   1811   %11 = bitcast <4 x i64> %10 to <16 x i16>
   1812   %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
   1813   %13 = bitcast <16 x i16> %12 to <8 x i32>
   1814   %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
   1815   %15 = bitcast <8 x i32> %14 to <4 x i64>
   1816   %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
   1817   ret <4 x i64> %16
   1818 }
   1819 
   1820 define <2 x i64> @test_sse2_psrl_0() {
   1821 ; CHECK-LABEL: @test_sse2_psrl_0(
   1822 ; CHECK-NEXT:    ret <2 x i64> zeroinitializer
   1823 ;
   1824   %S = bitcast i32 128 to i32
   1825   %1 = zext i32 %S to i64
   1826   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
   1827   %3 = insertelement <2 x i64> %2, i64 0, i32 1
   1828   %4 = bitcast <2 x i64> %3 to <8 x i16>
   1829   %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
   1830   %6 = bitcast <8 x i16> %5 to <4 x i32>
   1831   %7 = bitcast <2 x i64> %3 to <4 x i32>
   1832   %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
   1833   %9 = bitcast <4 x i32> %8 to <2 x i64>
   1834   %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
   1835   %11 = bitcast <2 x i64> %10 to <8 x i16>
   1836   %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
   1837   %13 = bitcast <8 x i16> %12 to <4 x i32>
   1838   %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
   1839   %15 = bitcast <4 x i32> %14 to <2 x i64>
   1840   %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
   1841   ret <2 x i64> %16
   1842 }
   1843 
   1844 define <4 x i64> @test_avx2_psrl_0() {
   1845 ; CHECK-LABEL: @test_avx2_psrl_0(
   1846 ; CHECK-NEXT:    ret <4 x i64> zeroinitializer
   1847 ;
   1848   %S = bitcast i32 128 to i32
   1849   %1 = zext i32 %S to i64
   1850   %2 = insertelement <2 x i64> undef, i64 %1, i32 0
   1851   %3 = insertelement <2 x i64> %2, i64 0, i32 1
   1852   %4 = bitcast <2 x i64> %3 to <8 x i16>
   1853   %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
   1854   %6 = bitcast <16 x i16> %5 to <8 x i32>
   1855   %7 = bitcast <2 x i64> %3 to <4 x i32>
   1856   %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
   1857   %9 = bitcast <8 x i32> %8 to <4 x i64>
   1858   %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
   1859   %11 = bitcast <4 x i64> %10 to <16 x i16>
   1860   %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
   1861   %13 = bitcast <16 x i16> %12 to <8 x i32>
   1862   %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
   1863   %15 = bitcast <8 x i32> %14 to <4 x i64>
   1864   %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
   1865   ret <4 x i64> %16
   1866 }
   1867 
   1868 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
   1869 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
   1870 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
   1871 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
   1872 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
   1873 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
   1874 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
   1875 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
   1876 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
   1877 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
   1878 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
   1879 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
   1880 
   1881 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
   1882 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
   1883 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
   1884 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
   1885 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
   1886 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
   1887 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
   1888 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
   1889 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
   1890 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
   1891 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
   1892 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
   1893 
   1894 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
   1895 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
   1896 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
   1897 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
   1898 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
   1899 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
   1900 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
   1901 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
   1902 
   1903 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
   1904 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
   1905 
   1906 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
   1907 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
   1908 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
   1909 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
   1910 
   1911 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
   1912 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
   1913 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
   1914 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
   1915 
   1916 attributes #1 = { nounwind readnone }
   1917