Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
      4 
      5 define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
      6 ; X32-LABEL: variable_shl0:
      7 ; X32:       # %bb.0:
      8 ; X32-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
      9 ; X32-NEXT:    retl
     10 ;
     11 ; X64-LABEL: variable_shl0:
     12 ; X64:       # %bb.0:
     13 ; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
     14 ; X64-NEXT:    retq
     15   %k = shl <4 x i32> %x, %y
     16   ret <4 x i32> %k
     17 }
     18 
     19 define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
     20 ; X32-LABEL: variable_shl1:
     21 ; X32:       # %bb.0:
     22 ; X32-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
     23 ; X32-NEXT:    retl
     24 ;
     25 ; X64-LABEL: variable_shl1:
     26 ; X64:       # %bb.0:
     27 ; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
     28 ; X64-NEXT:    retq
     29   %k = shl <8 x i32> %x, %y
     30   ret <8 x i32> %k
     31 }
     32 
     33 define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
     34 ; X32-LABEL: variable_shl2:
     35 ; X32:       # %bb.0:
     36 ; X32-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
     37 ; X32-NEXT:    retl
     38 ;
     39 ; X64-LABEL: variable_shl2:
     40 ; X64:       # %bb.0:
     41 ; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
     42 ; X64-NEXT:    retq
     43   %k = shl <2 x i64> %x, %y
     44   ret <2 x i64> %k
     45 }
     46 
     47 define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
     48 ; X32-LABEL: variable_shl3:
     49 ; X32:       # %bb.0:
     50 ; X32-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
     51 ; X32-NEXT:    retl
     52 ;
     53 ; X64-LABEL: variable_shl3:
     54 ; X64:       # %bb.0:
     55 ; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
     56 ; X64-NEXT:    retq
     57   %k = shl <4 x i64> %x, %y
     58   ret <4 x i64> %k
     59 }
     60 
     61 define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
     62 ; X32-LABEL: variable_srl0:
     63 ; X32:       # %bb.0:
     64 ; X32-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
     65 ; X32-NEXT:    retl
     66 ;
     67 ; X64-LABEL: variable_srl0:
     68 ; X64:       # %bb.0:
     69 ; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
     70 ; X64-NEXT:    retq
     71   %k = lshr <4 x i32> %x, %y
     72   ret <4 x i32> %k
     73 }
     74 
     75 define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
     76 ; X32-LABEL: variable_srl1:
     77 ; X32:       # %bb.0:
     78 ; X32-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
     79 ; X32-NEXT:    retl
     80 ;
     81 ; X64-LABEL: variable_srl1:
     82 ; X64:       # %bb.0:
     83 ; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
     84 ; X64-NEXT:    retq
     85   %k = lshr <8 x i32> %x, %y
     86   ret <8 x i32> %k
     87 }
     88 
     89 define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
     90 ; X32-LABEL: variable_srl2:
     91 ; X32:       # %bb.0:
     92 ; X32-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
     93 ; X32-NEXT:    retl
     94 ;
     95 ; X64-LABEL: variable_srl2:
     96 ; X64:       # %bb.0:
     97 ; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
     98 ; X64-NEXT:    retq
     99   %k = lshr <2 x i64> %x, %y
    100   ret <2 x i64> %k
    101 }
    102 
    103 define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
    104 ; X32-LABEL: variable_srl3:
    105 ; X32:       # %bb.0:
    106 ; X32-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
    107 ; X32-NEXT:    retl
    108 ;
    109 ; X64-LABEL: variable_srl3:
    110 ; X64:       # %bb.0:
    111 ; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
    112 ; X64-NEXT:    retq
    113   %k = lshr <4 x i64> %x, %y
    114   ret <4 x i64> %k
    115 }
    116 
    117 define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
    118 ; X32-LABEL: variable_sra0:
    119 ; X32:       # %bb.0:
    120 ; X32-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
    121 ; X32-NEXT:    retl
    122 ;
    123 ; X64-LABEL: variable_sra0:
    124 ; X64:       # %bb.0:
    125 ; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
    126 ; X64-NEXT:    retq
    127   %k = ashr <4 x i32> %x, %y
    128   ret <4 x i32> %k
    129 }
    130 
    131 define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
    132 ; X32-LABEL: variable_sra1:
    133 ; X32:       # %bb.0:
    134 ; X32-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
    135 ; X32-NEXT:    retl
    136 ;
    137 ; X64-LABEL: variable_sra1:
    138 ; X64:       # %bb.0:
    139 ; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
    140 ; X64-NEXT:    retq
    141   %k = ashr <8 x i32> %x, %y
    142   ret <8 x i32> %k
    143 }
    144 
    145 ;;; Shift left
    146 
    147 define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
    148 ; X32-LABEL: vshift00:
    149 ; X32:       # %bb.0:
    150 ; X32-NEXT:    vpslld $2, %ymm0, %ymm0
    151 ; X32-NEXT:    retl
    152 ;
    153 ; X64-LABEL: vshift00:
    154 ; X64:       # %bb.0:
    155 ; X64-NEXT:    vpslld $2, %ymm0, %ymm0
    156 ; X64-NEXT:    retq
    157   %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    158   ret <8 x i32> %s
    159 }
    160 
    161 define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
    162 ; X32-LABEL: vshift01:
    163 ; X32:       # %bb.0:
    164 ; X32-NEXT:    vpsllw $2, %ymm0, %ymm0
    165 ; X32-NEXT:    retl
    166 ;
    167 ; X64-LABEL: vshift01:
    168 ; X64:       # %bb.0:
    169 ; X64-NEXT:    vpsllw $2, %ymm0, %ymm0
    170 ; X64-NEXT:    retq
    171   %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    172   ret <16 x i16> %s
    173 }
    174 
    175 define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
    176 ; X32-LABEL: vshift02:
    177 ; X32:       # %bb.0:
    178 ; X32-NEXT:    vpsllq $2, %ymm0, %ymm0
    179 ; X32-NEXT:    retl
    180 ;
    181 ; X64-LABEL: vshift02:
    182 ; X64:       # %bb.0:
    183 ; X64-NEXT:    vpsllq $2, %ymm0, %ymm0
    184 ; X64-NEXT:    retq
    185   %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
    186   ret <4 x i64> %s
    187 }
    188 
    189 ;;; Logical Shift right
    190 
    191 define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
    192 ; X32-LABEL: vshift03:
    193 ; X32:       # %bb.0:
    194 ; X32-NEXT:    vpsrld $2, %ymm0, %ymm0
    195 ; X32-NEXT:    retl
    196 ;
    197 ; X64-LABEL: vshift03:
    198 ; X64:       # %bb.0:
    199 ; X64-NEXT:    vpsrld $2, %ymm0, %ymm0
    200 ; X64-NEXT:    retq
    201   %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    202   ret <8 x i32> %s
    203 }
    204 
    205 define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
    206 ; X32-LABEL: vshift04:
    207 ; X32:       # %bb.0:
    208 ; X32-NEXT:    vpsrlw $2, %ymm0, %ymm0
    209 ; X32-NEXT:    retl
    210 ;
    211 ; X64-LABEL: vshift04:
    212 ; X64:       # %bb.0:
    213 ; X64-NEXT:    vpsrlw $2, %ymm0, %ymm0
    214 ; X64-NEXT:    retq
    215   %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    216   ret <16 x i16> %s
    217 }
    218 
    219 define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
    220 ; X32-LABEL: vshift05:
    221 ; X32:       # %bb.0:
    222 ; X32-NEXT:    vpsrlq $2, %ymm0, %ymm0
    223 ; X32-NEXT:    retl
    224 ;
    225 ; X64-LABEL: vshift05:
    226 ; X64:       # %bb.0:
    227 ; X64-NEXT:    vpsrlq $2, %ymm0, %ymm0
    228 ; X64-NEXT:    retq
    229   %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
    230   ret <4 x i64> %s
    231 }
    232 
    233 ;;; Arithmetic Shift right
    234 
    235 define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
    236 ; X32-LABEL: vshift06:
    237 ; X32:       # %bb.0:
    238 ; X32-NEXT:    vpsrad $2, %ymm0, %ymm0
    239 ; X32-NEXT:    retl
    240 ;
    241 ; X64-LABEL: vshift06:
    242 ; X64:       # %bb.0:
    243 ; X64-NEXT:    vpsrad $2, %ymm0, %ymm0
    244 ; X64-NEXT:    retq
    245   %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    246   ret <8 x i32> %s
    247 }
    248 
    249 define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
    250 ; X32-LABEL: vshift07:
    251 ; X32:       # %bb.0:
    252 ; X32-NEXT:    vpsraw $2, %ymm0, %ymm0
    253 ; X32-NEXT:    retl
    254 ;
    255 ; X64-LABEL: vshift07:
    256 ; X64:       # %bb.0:
    257 ; X64-NEXT:    vpsraw $2, %ymm0, %ymm0
    258 ; X64-NEXT:    retq
    259   %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
    260   ret <16 x i16> %s
    261 }
    262 
    263 define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
    264 ; X32-LABEL: variable_sra0_load:
    265 ; X32:       # %bb.0:
    266 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    267 ; X32-NEXT:    vpsravd (%eax), %xmm0, %xmm0
    268 ; X32-NEXT:    retl
    269 ;
    270 ; X64-LABEL: variable_sra0_load:
    271 ; X64:       # %bb.0:
    272 ; X64-NEXT:    vpsravd (%rdi), %xmm0, %xmm0
    273 ; X64-NEXT:    retq
    274   %y1 = load <4 x i32>, <4 x i32>* %y
    275   %k = ashr <4 x i32> %x, %y1
    276   ret <4 x i32> %k
    277 }
    278 
    279 define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
    280 ; X32-LABEL: variable_sra1_load:
    281 ; X32:       # %bb.0:
    282 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    283 ; X32-NEXT:    vpsravd (%eax), %ymm0, %ymm0
    284 ; X32-NEXT:    retl
    285 ;
    286 ; X64-LABEL: variable_sra1_load:
    287 ; X64:       # %bb.0:
    288 ; X64-NEXT:    vpsravd (%rdi), %ymm0, %ymm0
    289 ; X64-NEXT:    retq
    290   %y1 = load <8 x i32>, <8 x i32>* %y
    291   %k = ashr <8 x i32> %x, %y1
    292   ret <8 x i32> %k
    293 }
    294 
    295 define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
    296 ; X32-LABEL: variable_shl0_load:
    297 ; X32:       # %bb.0:
    298 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    299 ; X32-NEXT:    vpsllvd (%eax), %xmm0, %xmm0
    300 ; X32-NEXT:    retl
    301 ;
    302 ; X64-LABEL: variable_shl0_load:
    303 ; X64:       # %bb.0:
    304 ; X64-NEXT:    vpsllvd (%rdi), %xmm0, %xmm0
    305 ; X64-NEXT:    retq
    306   %y1 = load <4 x i32>, <4 x i32>* %y
    307   %k = shl <4 x i32> %x, %y1
    308   ret <4 x i32> %k
    309 }
    310 
    311 define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
    312 ; X32-LABEL: variable_shl1_load:
    313 ; X32:       # %bb.0:
    314 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    315 ; X32-NEXT:    vpsllvd (%eax), %ymm0, %ymm0
    316 ; X32-NEXT:    retl
    317 ;
    318 ; X64-LABEL: variable_shl1_load:
    319 ; X64:       # %bb.0:
    320 ; X64-NEXT:    vpsllvd (%rdi), %ymm0, %ymm0
    321 ; X64-NEXT:    retq
    322   %y1 = load <8 x i32>, <8 x i32>* %y
    323   %k = shl <8 x i32> %x, %y1
    324   ret <8 x i32> %k
    325 }
    326 
    327 define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
    328 ; X32-LABEL: variable_shl2_load:
    329 ; X32:       # %bb.0:
    330 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    331 ; X32-NEXT:    vpsllvq (%eax), %xmm0, %xmm0
    332 ; X32-NEXT:    retl
    333 ;
    334 ; X64-LABEL: variable_shl2_load:
    335 ; X64:       # %bb.0:
    336 ; X64-NEXT:    vpsllvq (%rdi), %xmm0, %xmm0
    337 ; X64-NEXT:    retq
    338   %y1 = load <2 x i64>, <2 x i64>* %y
    339   %k = shl <2 x i64> %x, %y1
    340   ret <2 x i64> %k
    341 }
    342 
    343 define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
    344 ; X32-LABEL: variable_shl3_load:
    345 ; X32:       # %bb.0:
    346 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    347 ; X32-NEXT:    vpsllvq (%eax), %ymm0, %ymm0
    348 ; X32-NEXT:    retl
    349 ;
    350 ; X64-LABEL: variable_shl3_load:
    351 ; X64:       # %bb.0:
    352 ; X64-NEXT:    vpsllvq (%rdi), %ymm0, %ymm0
    353 ; X64-NEXT:    retq
    354   %y1 = load <4 x i64>, <4 x i64>* %y
    355   %k = shl <4 x i64> %x, %y1
    356   ret <4 x i64> %k
    357 }
    358 
    359 define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
    360 ; X32-LABEL: variable_srl0_load:
    361 ; X32:       # %bb.0:
    362 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    363 ; X32-NEXT:    vpsrlvd (%eax), %xmm0, %xmm0
    364 ; X32-NEXT:    retl
    365 ;
    366 ; X64-LABEL: variable_srl0_load:
    367 ; X64:       # %bb.0:
    368 ; X64-NEXT:    vpsrlvd (%rdi), %xmm0, %xmm0
    369 ; X64-NEXT:    retq
    370   %y1 = load <4 x i32>, <4 x i32>* %y
    371   %k = lshr <4 x i32> %x, %y1
    372   ret <4 x i32> %k
    373 }
    374 
    375 define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
    376 ; X32-LABEL: variable_srl1_load:
    377 ; X32:       # %bb.0:
    378 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    379 ; X32-NEXT:    vpsrlvd (%eax), %ymm0, %ymm0
    380 ; X32-NEXT:    retl
    381 ;
    382 ; X64-LABEL: variable_srl1_load:
    383 ; X64:       # %bb.0:
    384 ; X64-NEXT:    vpsrlvd (%rdi), %ymm0, %ymm0
    385 ; X64-NEXT:    retq
    386   %y1 = load <8 x i32>, <8 x i32>* %y
    387   %k = lshr <8 x i32> %x, %y1
    388   ret <8 x i32> %k
    389 }
    390 
    391 define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
    392 ; X32-LABEL: variable_srl2_load:
    393 ; X32:       # %bb.0:
    394 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    395 ; X32-NEXT:    vpsrlvq (%eax), %xmm0, %xmm0
    396 ; X32-NEXT:    retl
    397 ;
    398 ; X64-LABEL: variable_srl2_load:
    399 ; X64:       # %bb.0:
    400 ; X64-NEXT:    vpsrlvq (%rdi), %xmm0, %xmm0
    401 ; X64-NEXT:    retq
    402   %y1 = load <2 x i64>, <2 x i64>* %y
    403   %k = lshr <2 x i64> %x, %y1
    404   ret <2 x i64> %k
    405 }
    406 
    407 define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
    408 ; X32-LABEL: variable_srl3_load:
    409 ; X32:       # %bb.0:
    410 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
    411 ; X32-NEXT:    vpsrlvq (%eax), %ymm0, %ymm0
    412 ; X32-NEXT:    retl
    413 ;
    414 ; X64-LABEL: variable_srl3_load:
    415 ; X64:       # %bb.0:
    416 ; X64-NEXT:    vpsrlvq (%rdi), %ymm0, %ymm0
    417 ; X64-NEXT:    retq
    418   %y1 = load <4 x i64>, <4 x i64>* %y
    419   %k = lshr <4 x i64> %x, %y1
    420   ret <4 x i64> %k
    421 }
    422 
    423 define <32 x i8> @shl9(<32 x i8> %A) nounwind {
    424 ; X32-LABEL: shl9:
    425 ; X32:       # %bb.0:
    426 ; X32-NEXT:    vpsllw $3, %ymm0, %ymm0
    427 ; X32-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
    428 ; X32-NEXT:    retl
    429 ;
    430 ; X64-LABEL: shl9:
    431 ; X64:       # %bb.0:
    432 ; X64-NEXT:    vpsllw $3, %ymm0, %ymm0
    433 ; X64-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    434 ; X64-NEXT:    retq
    435   %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    436   ret <32 x i8> %B
    437 }
    438 
    439 define <32 x i8> @shr9(<32 x i8> %A) nounwind {
    440 ; X32-LABEL: shr9:
    441 ; X32:       # %bb.0:
    442 ; X32-NEXT:    vpsrlw $3, %ymm0, %ymm0
    443 ; X32-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
    444 ; X32-NEXT:    retl
    445 ;
    446 ; X64-LABEL: shr9:
    447 ; X64:       # %bb.0:
    448 ; X64-NEXT:    vpsrlw $3, %ymm0, %ymm0
    449 ; X64-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    450 ; X64-NEXT:    retq
    451   %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    452   ret <32 x i8> %B
    453 }
    454 
    455 define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
    456 ; X32-LABEL: sra_v32i8_7:
    457 ; X32:       # %bb.0:
    458 ; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    459 ; X32-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
    460 ; X32-NEXT:    retl
    461 ;
    462 ; X64-LABEL: sra_v32i8_7:
    463 ; X64:       # %bb.0:
    464 ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    465 ; X64-NEXT:    vpcmpgtb %ymm0, %ymm1, %ymm0
    466 ; X64-NEXT:    retq
    467   %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
    468   ret <32 x i8> %B
    469 }
    470 
    471 define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
    472 ; X32-LABEL: sra_v32i8:
    473 ; X32:       # %bb.0:
    474 ; X32-NEXT:    vpsrlw $3, %ymm0, %ymm0
    475 ; X32-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
    476 ; X32-NEXT:    vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    477 ; X32-NEXT:    vpxor %ymm1, %ymm0, %ymm0
    478 ; X32-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
    479 ; X32-NEXT:    retl
    480 ;
    481 ; X64-LABEL: sra_v32i8:
    482 ; X64:       # %bb.0:
    483 ; X64-NEXT:    vpsrlw $3, %ymm0, %ymm0
    484 ; X64-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
    485 ; X64-NEXT:    vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
    486 ; X64-NEXT:    vpxor %ymm1, %ymm0, %ymm0
    487 ; X64-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
    488 ; X64-NEXT:    retq
    489   %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    490   ret <32 x i8> %B
    491 }
    492 
    493 define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
    494 ; X32-LABEL: sext_v16i16:
    495 ; X32:       # %bb.0:
    496 ; X32-NEXT:    vpsllw $8, %ymm0, %ymm0
    497 ; X32-NEXT:    vpsraw $8, %ymm0, %ymm0
    498 ; X32-NEXT:    retl
    499 ;
    500 ; X64-LABEL: sext_v16i16:
    501 ; X64:       # %bb.0:
    502 ; X64-NEXT:    vpsllw $8, %ymm0, %ymm0
    503 ; X64-NEXT:    vpsraw $8, %ymm0, %ymm0
    504 ; X64-NEXT:    retq
    505   %b = trunc <16 x i16> %a to <16 x i8>
    506   %c = sext <16 x i8> %b to <16 x i16>
    507   ret <16 x i16> %c
    508 }
    509 
    510 define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
    511 ; X32-LABEL: sext_v8i32:
    512 ; X32:       # %bb.0:
    513 ; X32-NEXT:    vpslld $16, %ymm0, %ymm0
    514 ; X32-NEXT:    vpsrad $16, %ymm0, %ymm0
    515 ; X32-NEXT:    retl
    516 ;
    517 ; X64-LABEL: sext_v8i32:
    518 ; X64:       # %bb.0:
    519 ; X64-NEXT:    vpslld $16, %ymm0, %ymm0
    520 ; X64-NEXT:    vpsrad $16, %ymm0, %ymm0
    521 ; X64-NEXT:    retq
    522   %b = trunc <8 x i32> %a to <8 x i16>
    523   %c = sext <8 x i16> %b to <8 x i32>
    524   ret <8 x i32> %c
    525 }
    526 
    527 define <8 x i16> @variable_shl16(<8 x i16> %lhs, <8  x i16> %rhs) {
    528 ; X32-LABEL: variable_shl16:
    529 ; X32:       # %bb.0:
    530 ; X32-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    531 ; X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    532 ; X32-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
    533 ; X32-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
    534 ; X32-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    535 ; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    536 ; X32-NEXT:    vzeroupper
    537 ; X32-NEXT:    retl
    538 ;
    539 ; X64-LABEL: variable_shl16:
    540 ; X64:       # %bb.0:
    541 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    542 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    543 ; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
    544 ; X64-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
    545 ; X64-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    546 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    547 ; X64-NEXT:    vzeroupper
    548 ; X64-NEXT:    retq
    549   %res = shl <8 x i16> %lhs, %rhs
    550   ret <8 x i16> %res
    551 }
    552 
    553 define <8 x i16> @variable_ashr16(<8 x i16> %lhs, <8  x i16> %rhs) {
    554 ; X32-LABEL: variable_ashr16:
    555 ; X32:       # %bb.0:
    556 ; X32-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    557 ; X32-NEXT:    vpmovsxwd %xmm0, %ymm0
    558 ; X32-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
    559 ; X32-NEXT:    vextracti128 $1, %ymm0, %xmm1
    560 ; X32-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    561 ; X32-NEXT:    vzeroupper
    562 ; X32-NEXT:    retl
    563 ;
    564 ; X64-LABEL: variable_ashr16:
    565 ; X64:       # %bb.0:
    566 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    567 ; X64-NEXT:    vpmovsxwd %xmm0, %ymm0
    568 ; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
    569 ; X64-NEXT:    vextracti128 $1, %ymm0, %xmm1
    570 ; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
    571 ; X64-NEXT:    vzeroupper
    572 ; X64-NEXT:    retq
    573   %res = ashr <8 x i16> %lhs, %rhs
    574   ret <8 x i16> %res
    575 }
    576 
    577 define <8 x i16> @variable_lshr16(<8 x i16> %lhs, <8  x i16> %rhs) {
    578 ; X32-LABEL: variable_lshr16:
    579 ; X32:       # %bb.0:
    580 ; X32-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    581 ; X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    582 ; X32-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
    583 ; X32-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
    584 ; X32-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    585 ; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    586 ; X32-NEXT:    vzeroupper
    587 ; X32-NEXT:    retl
    588 ;
    589 ; X64-LABEL: variable_lshr16:
    590 ; X64:       # %bb.0:
    591 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
    592 ; X64-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    593 ; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
    594 ; X64-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
    595 ; X64-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
    596 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    597 ; X64-NEXT:    vzeroupper
    598 ; X64-NEXT:    retq
    599   %res = lshr <8 x i16> %lhs, %rhs
    600   ret <8 x i16> %res
    601 }
    602