Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core2 < %s | FileCheck --check-prefix=SSE2-CODEGEN %s
      2 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
      3 
      4 %shifttype = type <2 x i16>
      5 define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
      6 entry:
      7   ; SSE2: shift2i16
      8   ; SSE2: cost of 20 {{.*}} ashr
      9   ; SSE2-CODEGEN: shift2i16
     10   ; SSE2-CODEGEN: sarq %cl
     11 
     12   %0 = ashr %shifttype %a , %b
     13   ret %shifttype %0
     14 }
     15 
     16 %shifttype4i16 = type <4 x i16>
     17 define %shifttype4i16 @shift4i16(%shifttype4i16 %a, %shifttype4i16 %b) {
     18 entry:
     19   ; SSE2: shift4i16
     20   ; SSE2: cost of 40 {{.*}} ashr
     21   ; SSE2-CODEGEN: shift4i16
     22   ; SSE2-CODEGEN: sarl %cl
     23 
     24   %0 = ashr %shifttype4i16 %a , %b
     25   ret %shifttype4i16 %0
     26 }
     27 
     28 %shifttype8i16 = type <8 x i16>
     29 define %shifttype8i16 @shift8i16(%shifttype8i16 %a, %shifttype8i16 %b) {
     30 entry:
     31   ; SSE2: shift8i16
     32   ; SSE2: cost of 80 {{.*}} ashr
     33   ; SSE2-CODEGEN: shift8i16
     34   ; SSE2-CODEGEN: sarw %cl
     35 
     36   %0 = ashr %shifttype8i16 %a , %b
     37   ret %shifttype8i16 %0
     38 }
     39 
     40 %shifttype16i16 = type <16 x i16>
     41 define %shifttype16i16 @shift16i16(%shifttype16i16 %a, %shifttype16i16 %b) {
     42 entry:
     43   ; SSE2: shift16i16
     44   ; SSE2: cost of 160 {{.*}} ashr
     45   ; SSE2-CODEGEN: shift16i16
     46   ; SSE2-CODEGEN: sarw %cl
     47 
     48   %0 = ashr %shifttype16i16 %a , %b
     49   ret %shifttype16i16 %0
     50 }
     51 
     52 %shifttype32i16 = type <32 x i16>
     53 define %shifttype32i16 @shift32i16(%shifttype32i16 %a, %shifttype32i16 %b) {
     54 entry:
     55   ; SSE2: shift32i16
     56   ; SSE2: cost of 320 {{.*}} ashr
     57   ; SSE2-CODEGEN: shift32i16
     58   ; SSE2-CODEGEN: sarw %cl
     59 
     60   %0 = ashr %shifttype32i16 %a , %b
     61   ret %shifttype32i16 %0
     62 }
     63 
     64 %shifttype2i32 = type <2 x i32>
     65 define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
     66 entry:
     67   ; SSE2: shift2i32
     68   ; SSE2: cost of 20 {{.*}} ashr
     69   ; SSE2-CODEGEN: shift2i32
     70   ; SSE2-CODEGEN: sarq %cl
     71 
     72   %0 = ashr %shifttype2i32 %a , %b
     73   ret %shifttype2i32 %0
     74 }
     75 
     76 %shifttype4i32 = type <4 x i32>
     77 define %shifttype4i32 @shift4i32(%shifttype4i32 %a, %shifttype4i32 %b) {
     78 entry:
     79   ; SSE2: shift4i32
     80   ; SSE2: cost of 40 {{.*}} ashr
     81   ; SSE2-CODEGEN: shift4i32
     82   ; SSE2-CODEGEN: sarl %cl
     83 
     84   %0 = ashr %shifttype4i32 %a , %b
     85   ret %shifttype4i32 %0
     86 }
     87 
     88 %shifttype8i32 = type <8 x i32>
     89 define %shifttype8i32 @shift8i32(%shifttype8i32 %a, %shifttype8i32 %b) {
     90 entry:
     91   ; SSE2: shift8i32
     92   ; SSE2: cost of 80 {{.*}} ashr
     93   ; SSE2-CODEGEN: shift8i32
     94   ; SSE2-CODEGEN: sarl %cl
     95 
     96   %0 = ashr %shifttype8i32 %a , %b
     97   ret %shifttype8i32 %0
     98 }
     99 
    100 %shifttype16i32 = type <16 x i32>
    101 define %shifttype16i32 @shift16i32(%shifttype16i32 %a, %shifttype16i32 %b) {
    102 entry:
    103   ; SSE2: shift16i32
    104   ; SSE2: cost of 160 {{.*}} ashr
    105   ; SSE2-CODEGEN: shift16i32
    106   ; SSE2-CODEGEN: sarl %cl
    107 
    108   %0 = ashr %shifttype16i32 %a , %b
    109   ret %shifttype16i32 %0
    110 }
    111 
    112 %shifttype32i32 = type <32 x i32>
    113 define %shifttype32i32 @shift32i32(%shifttype32i32 %a, %shifttype32i32 %b) {
    114 entry:
    115   ; SSE2: shift32i32
    116   ; SSE2: cost of 320 {{.*}} ashr
    117   ; SSE2-CODEGEN: shift32i32
    118   ; SSE2-CODEGEN: sarl %cl
    119 
    120   %0 = ashr %shifttype32i32 %a , %b
    121   ret %shifttype32i32 %0
    122 }
    123 
    124 %shifttype2i64 = type <2 x i64>
    125 define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
    126 entry:
    127   ; SSE2: shift2i64
    128   ; SSE2: cost of 20 {{.*}} ashr
    129   ; SSE2-CODEGEN: shift2i64
    130   ; SSE2-CODEGEN: sarq %cl
    131 
    132   %0 = ashr %shifttype2i64 %a , %b
    133   ret %shifttype2i64 %0
    134 }
    135 
    136 %shifttype4i64 = type <4 x i64>
    137 define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
    138 entry:
    139   ; SSE2: shift4i64
    140   ; SSE2: cost of 40 {{.*}} ashr
    141   ; SSE2-CODEGEN: shift4i64
    142   ; SSE2-CODEGEN: sarq %cl
    143 
    144   %0 = ashr %shifttype4i64 %a , %b
    145   ret %shifttype4i64 %0
    146 }
    147 
    148 %shifttype8i64 = type <8 x i64>
    149 define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
    150 entry:
    151   ; SSE2: shift8i64
    152   ; SSE2: cost of 80 {{.*}} ashr
    153   ; SSE2-CODEGEN: shift8i64
    154   ; SSE2-CODEGEN: sarq %cl
    155 
    156   %0 = ashr %shifttype8i64 %a , %b
    157   ret %shifttype8i64 %0
    158 }
    159 
    160 %shifttype16i64 = type <16 x i64>
    161 define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
    162 entry:
    163   ; SSE2: shift16i64
    164   ; SSE2: cost of 160 {{.*}} ashr
    165   ; SSE2-CODEGEN: shift16i64
    166   ; SSE2-CODEGEN: sarq %cl
    167 
    168   %0 = ashr %shifttype16i64 %a , %b
    169   ret %shifttype16i64 %0
    170 }
    171 
    172 %shifttype32i64 = type <32 x i64>
    173 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
    174 entry:
    175   ; SSE2: shift32i64
    176   ; SSE2: cost of 320 {{.*}} ashr
    177   ; SSE2-CODEGEN: shift32i64
    178   ; SSE2-CODEGEN: sarq %cl
    179 
    180   %0 = ashr %shifttype32i64 %a , %b
    181   ret %shifttype32i64 %0
    182 }
    183 
    184 %shifttype2i8 = type <2 x i8>
    185 define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
    186 entry:
    187   ; SSE2: shift2i8
    188   ; SSE2: cost of 20 {{.*}} ashr
    189   ; SSE2-CODEGEN: shift2i8
    190   ; SSE2-CODEGEN: sarq %cl
    191 
    192   %0 = ashr %shifttype2i8 %a , %b
    193   ret %shifttype2i8 %0
    194 }
    195 
    196 %shifttype4i8 = type <4 x i8>
    197 define %shifttype4i8 @shift4i8(%shifttype4i8 %a, %shifttype4i8 %b) {
    198 entry:
    199   ; SSE2: shift4i8
    200   ; SSE2: cost of 40 {{.*}} ashr
    201   ; SSE2-CODEGEN: shift4i8
    202   ; SSE2-CODEGEN: sarl %cl
    203 
    204   %0 = ashr %shifttype4i8 %a , %b
    205   ret %shifttype4i8 %0
    206 }
    207 
    208 %shifttype8i8 = type <8 x i8>
    209 define %shifttype8i8 @shift8i8(%shifttype8i8 %a, %shifttype8i8 %b) {
    210 entry:
    211   ; SSE2: shift8i8
    212   ; SSE2: cost of 80 {{.*}} ashr
    213   ; SSE2-CODEGEN: shift8i8
    214   ; SSE2-CODEGEN: sarw %cl
    215 
    216   %0 = ashr %shifttype8i8 %a , %b
    217   ret %shifttype8i8 %0
    218 }
    219 
    220 %shifttype16i8 = type <16 x i8>
    221 define %shifttype16i8 @shift16i8(%shifttype16i8 %a, %shifttype16i8 %b) {
    222 entry:
    223   ; SSE2: shift16i8
    224   ; SSE2: cost of 160 {{.*}} ashr
    225   ; SSE2-CODEGEN: shift16i8
    226   ; SSE2-CODEGEN: sarb %cl
    227 
    228   %0 = ashr %shifttype16i8 %a , %b
    229   ret %shifttype16i8 %0
    230 }
    231 
    232 %shifttype32i8 = type <32 x i8>
    233 define %shifttype32i8 @shift32i8(%shifttype32i8 %a, %shifttype32i8 %b) {
    234 entry:
    235   ; SSE2: shift32i8
    236   ; SSE2: cost of 320 {{.*}} ashr
    237   ; SSE2-CODEGEN: shift32i8
    238   ; SSE2-CODEGEN: sarb %cl
    239 
    240   %0 = ashr %shifttype32i8 %a , %b
    241   ret %shifttype32i8 %0
    242 }
    243 
    244 ; Test shift by a constant a value.
    245 
    246 %shifttypec = type <2 x i16>
    247 define %shifttypec @shift2i16const(%shifttypec %a, %shifttypec %b) {
    248 entry:
    249   ; SSE2: shift2i16const
    250   ; SSE2: cost of 20 {{.*}} ashr
    251   ; SSE2-CODEGEN: shift2i16const
    252   ; SSE2-CODEGEN: sarq $
    253 
    254   %0 = ashr %shifttypec %a , <i16 3, i16 3>
    255   ret %shifttypec %0
    256 }
    257 
    258 %shifttypec4i16 = type <4 x i16>
    259 define %shifttypec4i16 @shift4i16const(%shifttypec4i16 %a, %shifttypec4i16 %b) {
    260 entry:
    261   ; SSE2: shift4i16const
    262   ; SSE2: cost of 1 {{.*}} ashr
    263   ; SSE2-CODEGEN: shift4i16const
    264   ; SSE2-CODEGEN: psrad $3
    265 
    266   %0 = ashr %shifttypec4i16 %a , <i16 3, i16 3, i16 3, i16 3>
    267   ret %shifttypec4i16 %0
    268 }
    269 
    270 %shifttypec8i16 = type <8 x i16>
    271 define %shifttypec8i16 @shift8i16const(%shifttypec8i16 %a, %shifttypec8i16 %b) {
    272 entry:
    273   ; SSE2: shift8i16const
    274   ; SSE2: cost of 1 {{.*}} ashr
    275   ; SSE2-CODEGEN: shift8i16const
    276   ; SSE2-CODEGEN: psraw $3
    277 
    278   %0 = ashr %shifttypec8i16 %a , <i16 3, i16 3, i16 3, i16 3,
    279                                   i16 3, i16 3, i16 3, i16 3>
    280   ret %shifttypec8i16 %0
    281 }
    282 
    283 %shifttypec16i16 = type <16 x i16>
    284 define %shifttypec16i16 @shift16i16const(%shifttypec16i16 %a,
    285                                          %shifttypec16i16 %b) {
    286 entry:
    287   ; SSE2: shift16i16const
    288   ; SSE2: cost of 2 {{.*}} ashr
    289   ; SSE2-CODEGEN: shift16i16const
    290   ; SSE2-CODEGEN: psraw $3
    291 
    292   %0 = ashr %shifttypec16i16 %a , <i16 3, i16 3, i16 3, i16 3,
    293                                    i16 3, i16 3, i16 3, i16 3,
    294                                    i16 3, i16 3, i16 3, i16 3,
    295                                    i16 3, i16 3, i16 3, i16 3>
    296   ret %shifttypec16i16 %0
    297 }
    298 
    299 %shifttypec32i16 = type <32 x i16>
    300 define %shifttypec32i16 @shift32i16const(%shifttypec32i16 %a,
    301                                         %shifttypec32i16 %b) {
    302 entry:
    303   ; SSE2: shift32i16const
    304   ; SSE2: cost of 4 {{.*}} ashr
    305   ; SSE2-CODEGEN: shift32i16const
    306   ; SSE2-CODEGEN: psraw $3
    307 
    308   %0 = ashr %shifttypec32i16 %a , <i16 3, i16 3, i16 3, i16 3,
    309                                    i16 3, i16 3, i16 3, i16 3,
    310                                    i16 3, i16 3, i16 3, i16 3,
    311                                    i16 3, i16 3, i16 3, i16 3,
    312                                    i16 3, i16 3, i16 3, i16 3,
    313                                    i16 3, i16 3, i16 3, i16 3,
    314                                    i16 3, i16 3, i16 3, i16 3,
    315                                    i16 3, i16 3, i16 3, i16 3>
    316   ret %shifttypec32i16 %0
    317 }
    318 
    319 %shifttypec2i32 = type <2 x i32>
    320 define %shifttypec2i32 @shift2i32c(%shifttypec2i32 %a, %shifttypec2i32 %b) {
    321 entry:
    322   ; SSE2: shift2i32c
    323   ; SSE2: cost of 20 {{.*}} ashr
    324   ; SSE2-CODEGEN: shift2i32c
    325   ; SSE2-CODEGEN: sarq $3
    326 
    327   %0 = ashr %shifttypec2i32 %a , <i32 3, i32 3>
    328   ret %shifttypec2i32 %0
    329 }
    330 
    331 %shifttypec4i32 = type <4 x i32>
    332 define %shifttypec4i32 @shift4i32c(%shifttypec4i32 %a, %shifttypec4i32 %b) {
    333 entry:
    334   ; SSE2: shift4i32c
    335   ; SSE2: cost of 1 {{.*}} ashr
    336   ; SSE2-CODEGEN: shift4i32c
    337   ; SSE2-CODEGEN: psrad $3
    338 
    339   %0 = ashr %shifttypec4i32 %a , <i32 3, i32 3, i32 3, i32 3>
    340   ret %shifttypec4i32 %0
    341 }
    342 
    343 %shifttypec8i32 = type <8 x i32>
    344 define %shifttypec8i32 @shift8i32c(%shifttypec8i32 %a, %shifttypec8i32 %b) {
    345 entry:
    346   ; SSE2: shift8i32c
    347   ; SSE2: cost of 2 {{.*}} ashr
    348   ; SSE2-CODEGEN: shift8i32c
    349   ; SSE2-CODEGEN: psrad $3
    350 
    351   %0 = ashr %shifttypec8i32 %a , <i32 3, i32 3, i32 3, i32 3,
    352                                   i32 3, i32 3, i32 3, i32 3>
    353   ret %shifttypec8i32 %0
    354 }
    355 
    356 %shifttypec16i32 = type <16 x i32>
    357 define %shifttypec16i32 @shift16i32c(%shifttypec16i32 %a, %shifttypec16i32 %b) {
    358 entry:
    359   ; SSE2: shift16i32c
    360   ; SSE2: cost of 4 {{.*}} ashr
    361   ; SSE2-CODEGEN: shift16i32c
    362   ; SSE2-CODEGEN: psrad $3
    363 
    364   %0 = ashr %shifttypec16i32 %a , <i32 3, i32 3, i32 3, i32 3,
    365                                    i32 3, i32 3, i32 3, i32 3,
    366                                    i32 3, i32 3, i32 3, i32 3,
    367                                    i32 3, i32 3, i32 3, i32 3>
    368   ret %shifttypec16i32 %0
    369 }
    370 
    371 %shifttypec32i32 = type <32 x i32>
    372 define %shifttypec32i32 @shift32i32c(%shifttypec32i32 %a, %shifttypec32i32 %b) {
    373 entry:
    374   ; SSE2: shift32i32c
    375   ; getTypeConversion fails here and promotes this to a i64.
    376   ; SSE2: cost of 8 {{.*}} ashr
    377   ; SSE2-CODEGEN: shift32i32c
    378   ; SSE2-CODEGEN: psrad $3
    379   %0 = ashr %shifttypec32i32 %a , <i32 3, i32 3, i32 3, i32 3,
    380                                    i32 3, i32 3, i32 3, i32 3,
    381                                    i32 3, i32 3, i32 3, i32 3,
    382                                    i32 3, i32 3, i32 3, i32 3,
    383                                    i32 3, i32 3, i32 3, i32 3,
    384                                    i32 3, i32 3, i32 3, i32 3,
    385                                    i32 3, i32 3, i32 3, i32 3,
    386                                    i32 3, i32 3, i32 3, i32 3>
    387   ret %shifttypec32i32 %0
    388 }
    389 
    390 %shifttypec2i64 = type <2 x i64>
    391 define %shifttypec2i64 @shift2i64c(%shifttypec2i64 %a, %shifttypec2i64 %b) {
    392 entry:
    393   ; SSE2: shift2i64c
    394   ; SSE2: cost of 20 {{.*}} ashr
    395   ; SSE2-CODEGEN: shift2i64c
    396   ; SSE2-CODEGEN: sarq $3
    397 
    398   %0 = ashr %shifttypec2i64 %a , <i64 3, i64 3>
    399   ret %shifttypec2i64 %0
    400 }
    401 
    402 %shifttypec4i64 = type <4 x i64>
    403 define %shifttypec4i64 @shift4i64c(%shifttypec4i64 %a, %shifttypec4i64 %b) {
    404 entry:
    405   ; SSE2: shift4i64c
    406   ; SSE2: cost of 40 {{.*}} ashr
    407   ; SSE2-CODEGEN: shift4i64c
    408   ; SSE2-CODEGEN: sarq $3
    409 
    410   %0 = ashr %shifttypec4i64 %a , <i64 3, i64 3, i64 3, i64 3>
    411   ret %shifttypec4i64 %0
    412 }
    413 
    414 %shifttypec8i64 = type <8 x i64>
    415 define %shifttypec8i64 @shift8i64c(%shifttypec8i64 %a, %shifttypec8i64 %b) {
    416 entry:
    417   ; SSE2: shift8i64c
    418   ; SSE2: cost of 80 {{.*}} ashr
    419   ; SSE2-CODEGEN: shift8i64c
    420   ; SSE2-CODEGEN: sarq $3
    421 
    422  %0 = ashr %shifttypec8i64 %a , <i64 3, i64 3, i64 3, i64 3,
    423                                  i64 3, i64 3, i64 3, i64 3>
    424   ret %shifttypec8i64 %0
    425 }
    426 
    427 %shifttypec16i64 = type <16 x i64>
    428 define %shifttypec16i64 @shift16i64c(%shifttypec16i64 %a, %shifttypec16i64 %b) {
    429 entry:
    430   ; SSE2: shift16i64c
    431   ; SSE2: cost of 160 {{.*}} ashr
    432   ; SSE2-CODEGEN: shift16i64c
    433   ; SSE2-CODEGEN: sarq $3
    434 
    435   %0 = ashr %shifttypec16i64 %a , <i64 3, i64 3, i64 3, i64 3,
    436                                    i64 3, i64 3, i64 3, i64 3,
    437                                    i64 3, i64 3, i64 3, i64 3,
    438                                    i64 3, i64 3, i64 3, i64 3>
    439   ret %shifttypec16i64 %0
    440 }
    441 
    442 %shifttypec32i64 = type <32 x i64>
    443 define %shifttypec32i64 @shift32i64c(%shifttypec32i64 %a, %shifttypec32i64 %b) {
    444 entry:
    445   ; SSE2: shift32i64c
    446   ; SSE2: cost of 320 {{.*}} ashr
    447   ; SSE2-CODEGEN: shift32i64c
    448   ; SSE2-CODEGEN: sarq $3
    449 
    450   %0 = ashr %shifttypec32i64 %a ,<i64 3, i64 3, i64 3, i64 3,
    451                                   i64 3, i64 3, i64 3, i64 3,
    452                                   i64 3, i64 3, i64 3, i64 3,
    453                                   i64 3, i64 3, i64 3, i64 3,
    454                                   i64 3, i64 3, i64 3, i64 3,
    455                                   i64 3, i64 3, i64 3, i64 3,
    456                                   i64 3, i64 3, i64 3, i64 3,
    457                                   i64 3, i64 3, i64 3, i64 3>
    458   ret %shifttypec32i64 %0
    459 }
    460 
    461 %shifttypec2i8 = type <2 x i8>
    462 define %shifttypec2i8 @shift2i8c(%shifttypec2i8 %a, %shifttypec2i8 %b) {
    463 entry:
    464   ; SSE2: shift2i8c
    465   ; SSE2: cost of 20 {{.*}} ashr
    466   ; SSE2-CODEGEN: shift2i8c
    467   ; SSE2-CODEGEN: sarq $3
    468 
    469   %0 = ashr %shifttypec2i8 %a , <i8 3, i8 3>
    470   ret %shifttypec2i8 %0
    471 }
    472 
    473 %shifttypec4i8 = type <4 x i8>
    474 define %shifttypec4i8 @shift4i8c(%shifttypec4i8 %a, %shifttypec4i8 %b) {
    475 entry:
    476   ; SSE2: shift4i8c
    477   ; SSE2: cost of 1 {{.*}} ashr
    478   ; SSE2-CODEGEN: shift4i8c
    479   ; SSE2-CODEGEN: psrad $3
    480 
    481   %0 = ashr %shifttypec4i8 %a , <i8 3, i8 3, i8 3, i8 3>
    482   ret %shifttypec4i8 %0
    483 }
    484 
    485 %shifttypec8i8 = type <8 x i8>
    486 define %shifttypec8i8 @shift8i8c(%shifttypec8i8 %a, %shifttypec8i8 %b) {
    487 entry:
    488   ; SSE2: shift8i8c
    489   ; SSE2: cost of 1 {{.*}} ashr
    490   ; SSE2-CODEGEN: shift8i8c
    491   ; SSE2-CODEGEN: psraw $3
    492 
    493   %0 = ashr %shifttypec8i8 %a , <i8 3, i8 3, i8 3, i8 3,
    494                                  i8 3, i8 3, i8 3, i8 3>
    495   ret %shifttypec8i8 %0
    496 }
    497 
    498 %shifttypec16i8 = type <16 x i8>
    499 define %shifttypec16i8 @shift16i8c(%shifttypec16i8 %a, %shifttypec16i8 %b) {
    500 entry:
    501   ; SSE2: shift16i8c
    502   ; SSE2: cost of 4 {{.*}} ashr
    503   ; SSE2-CODEGEN: shift16i8c
    504   ; SSE2-CODEGEN: psrlw $3
    505 
    506   %0 = ashr %shifttypec16i8 %a , <i8 3, i8 3, i8 3, i8 3,
    507                                   i8 3, i8 3, i8 3, i8 3,
    508                                   i8 3, i8 3, i8 3, i8 3,
    509                                   i8 3, i8 3, i8 3, i8 3>
    510   ret %shifttypec16i8 %0
    511 }
    512 
    513 %shifttypec32i8 = type <32 x i8>
    514 define %shifttypec32i8 @shift32i8c(%shifttypec32i8 %a, %shifttypec32i8 %b) {
    515 entry:
    516   ; SSE2: shift32i8c
    517   ; SSE2: cost of 8 {{.*}} ashr
    518   ; SSE2-CODEGEN: shift32i8c
    519   ; SSE2-CODEGEN: psrlw $3
    520 
    521   %0 = ashr %shifttypec32i8 %a , <i8 3, i8 3, i8 3, i8 3,
    522                                   i8 3, i8 3, i8 3, i8 3,
    523                                   i8 3, i8 3, i8 3, i8 3,
    524                                   i8 3, i8 3, i8 3, i8 3,
    525                                   i8 3, i8 3, i8 3, i8 3,
    526                                   i8 3, i8 3, i8 3, i8 3,
    527                                   i8 3, i8 3, i8 3, i8 3,
    528                                   i8 3, i8 3, i8 3, i8 3>
    529   ret %shifttypec32i8 %0
    530 }
    531 
    532