Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
      2 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
      3 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
      4 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
      5 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
      6 ; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
      7 
      8 ; Verify the cost of vector shift left instructions.
      9 
     10 ;
     11 ;
     12 ; Variable Shifts
     13 ;
     14 
     15 define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
     16 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
     17 ; SSE2: Found an estimated cost of 4 for instruction:   %shift
     18 ; SSE41: Found an estimated cost of 4 for instruction:   %shift
     19 ; AVX: Found an estimated cost of 4 for instruction:   %shift
     20 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
     21 ; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
     22 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
     23   %shift = shl <2 x i64> %a, %b
     24   ret <2 x i64> %shift
     25 }
     26 
     27 define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
     28 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
     29 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
     30 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
     31 ; AVX: Found an estimated cost of 8 for instruction:   %shift
     32 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
     33 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
     34 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
     35   %shift = shl <4 x i64> %a, %b
     36   ret <4 x i64> %shift
     37 }
     38 
     39 define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
     40 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
     41 ; SSE2: Found an estimated cost of 10 for instruction:   %shift
     42 ; SSE41: Found an estimated cost of 10 for instruction:   %shift
     43 ; AVX: Found an estimated cost of 10 for instruction:   %shift
     44 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
     45 ; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
     46 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
     47   %shift = shl <4 x i32> %a, %b
     48   ret <4 x i32> %shift
     49 }
     50 
     51 define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
     52 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
     53 ; SSE2: Found an estimated cost of 20 for instruction:   %shift
     54 ; SSE41: Found an estimated cost of 20 for instruction:   %shift
     55 ; AVX: Found an estimated cost of 20 for instruction:   %shift
     56 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
     57 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
     58 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
     59   %shift = shl <8 x i32> %a, %b
     60   ret <8 x i32> %shift
     61 }
     62 
     63 define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
     64 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
     65 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
     66 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
     67 ; AVX: Found an estimated cost of 32 for instruction:   %shift
     68 ; AVX2: Found an estimated cost of 32 for instruction:   %shift
     69 ; XOP: Found an estimated cost of 1 for instruction:   %shift
     70   %shift = shl <8 x i16> %a, %b
     71   ret <8 x i16> %shift
     72 }
     73 
     74 define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
     75 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
     76 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
     77 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
     78 ; AVX: Found an estimated cost of 64 for instruction:   %shift
     79 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
     80 ; XOP: Found an estimated cost of 2 for instruction:   %shift
     81   %shift = shl <16 x i16> %a, %b
     82   ret <16 x i16> %shift
     83 }
     84 
     85 define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
     86 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
     87 ; SSE2: Found an estimated cost of 26 for instruction:   %shift
     88 ; SSE41: Found an estimated cost of 26 for instruction:   %shift
     89 ; AVX: Found an estimated cost of 26 for instruction:   %shift
     90 ; AVX2: Found an estimated cost of 26 for instruction:   %shift
     91 ; XOP: Found an estimated cost of 1 for instruction:   %shift
     92   %shift = shl <16 x i8> %a, %b
     93   ret <16 x i8> %shift
     94 }
     95 
     96 define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
     97 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
     98 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
     99 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
    100 ; AVX: Found an estimated cost of 52 for instruction:   %shift
    101 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
    102 ; XOP: Found an estimated cost of 2 for instruction:   %shift
    103   %shift = shl <32 x i8> %a, %b
    104   ret <32 x i8> %shift
    105 }
    106 
    107 ;
    108 ; Uniform Variable Shifts
    109 ;
    110 
    111 define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
    112 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
    113 ; SSE2: Found an estimated cost of 4 for instruction:   %shift
    114 ; SSE41: Found an estimated cost of 4 for instruction:   %shift
    115 ; AVX: Found an estimated cost of 4 for instruction:   %shift
    116 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    117 ; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
    118 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    119   %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
    120   %shift = shl <2 x i64> %a, %splat
    121   ret <2 x i64> %shift
    122 }
    123 
    124 define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
    125 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
    126 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
    127 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
    128 ; AVX: Found an estimated cost of 8 for instruction:   %shift
    129 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    130 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
    131 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    132   %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
    133   %shift = shl <4 x i64> %a, %splat
    134   ret <4 x i64> %shift
    135 }
    136 
    137 define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
    138 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
    139 ; SSE2: Found an estimated cost of 10 for instruction:   %shift
    140 ; SSE41: Found an estimated cost of 10 for instruction:   %shift
    141 ; AVX: Found an estimated cost of 10 for instruction:   %shift
    142 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    143 ; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
    144 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    145   %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
    146   %shift = shl <4 x i32> %a, %splat
    147   ret <4 x i32> %shift
    148 }
    149 
    150 define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
    151 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
    152 ; SSE2: Found an estimated cost of 20 for instruction:   %shift
    153 ; SSE41: Found an estimated cost of 20 for instruction:   %shift
    154 ; AVX: Found an estimated cost of 20 for instruction:   %shift
    155 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    156 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
    157 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    158   %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
    159   %shift = shl <8 x i32> %a, %splat
    160   ret <8 x i32> %shift
    161 }
    162 
    163 define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
    164 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
    165 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
    166 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
    167 ; AVX: Found an estimated cost of 32 for instruction:   %shift
    168 ; AVX2: Found an estimated cost of 32 for instruction:   %shift
    169 ; XOP: Found an estimated cost of 1 for instruction:   %shift
    170   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
    171   %shift = shl <8 x i16> %a, %splat
    172   ret <8 x i16> %shift
    173 }
    174 
    175 define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
    176 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
    177 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
    178 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
    179 ; AVX: Found an estimated cost of 64 for instruction:   %shift
    180 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
    181 ; XOP: Found an estimated cost of 2 for instruction:   %shift
    182   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
    183   %shift = shl <16 x i16> %a, %splat
    184   ret <16 x i16> %shift
    185 }
    186 
    187 define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
    188 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
    189 ; SSE2: Found an estimated cost of 26 for instruction:   %shift
    190 ; SSE41: Found an estimated cost of 26 for instruction:   %shift
    191 ; AVX: Found an estimated cost of 26 for instruction:   %shift
    192 ; AVX2: Found an estimated cost of 26 for instruction:   %shift
    193 ; XOP: Found an estimated cost of 1 for instruction:   %shift
    194   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
    195   %shift = shl <16 x i8> %a, %splat
    196   ret <16 x i8> %shift
    197 }
    198 
    199 define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
    200 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
    201 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
    202 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
    203 ; AVX: Found an estimated cost of 52 for instruction:   %shift
    204 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
    205 ; XOP: Found an estimated cost of 2 for instruction:   %shift
    206   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
    207   %shift = shl <32 x i8> %a, %splat
    208   ret <32 x i8> %shift
    209 }
    210 
    211 ;
    212 ; Constant Shifts
    213 ;
    214 
    215 define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
    216 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
    217 ; SSE2: Found an estimated cost of 4 for instruction:   %shift
    218 ; SSE41: Found an estimated cost of 4 for instruction:   %shift
    219 ; AVX: Found an estimated cost of 4 for instruction:   %shift
    220 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    221 ; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
    222 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    223   %shift = shl <2 x i64> %a, <i64 1, i64 7>
    224   ret <2 x i64> %shift
    225 }
    226 
    227 define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
    228 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
    229 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
    230 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
    231 ; AVX: Found an estimated cost of 8 for instruction:   %shift
    232 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    233 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
    234 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    235   %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
    236   ret <4 x i64> %shift
    237 }
    238 
    239 define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
    240 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
    241 ; SSE2: Found an estimated cost of 6 for instruction:   %shift
    242 ; SSE41: Found an estimated cost of 1 for instruction:   %shift
    243 ; AVX: Found an estimated cost of 1 for instruction:   %shift
    244 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    245 ; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
    246 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    247   %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
    248   ret <4 x i32> %shift
    249 }
    250 
    251 define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
    252 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
    253 ; SSE2: Found an estimated cost of 12 for instruction:   %shift
    254 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
    255 ; AVX: Found an estimated cost of 4 for instruction:   %shift
    256 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    257 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
    258 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    259   %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
    260   ret <8 x i32> %shift
    261 }
    262 
    263 define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
    264 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
    265 ; SSE2: Found an estimated cost of 1 for instruction:   %shift
    266 ; SSE41: Found an estimated cost of 1 for instruction:   %shift
    267 ; AVX: Found an estimated cost of 1 for instruction:   %shift
    268 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    269 ; XOP: Found an estimated cost of 1 for instruction:   %shift
    270   %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
    271   ret <8 x i16> %shift
    272 }
    273 
    274 define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
    275 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
    276 ; SSE2: Found an estimated cost of 2 for instruction:   %shift
    277 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
    278 ; AVX: Found an estimated cost of 4 for instruction:   %shift
    279 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    280 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
    281 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    282   %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
    283   ret <16 x i16> %shift
    284 }
    285 
    286 define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
    287 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
    288 ; SSE2: Found an estimated cost of 26 for instruction:   %shift
    289 ; SSE41: Found an estimated cost of 26 for instruction:   %shift
    290 ; AVX: Found an estimated cost of 26 for instruction:   %shift
    291 ; AVX2: Found an estimated cost of 26 for instruction:   %shift
    292 ; XOP: Found an estimated cost of 1 for instruction:   %shift
    293   %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
    294   ret <16 x i8> %shift
    295 }
    296 
    297 define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
    298 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
    299 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
    300 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
    301 ; AVX: Found an estimated cost of 52 for instruction:   %shift
    302 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
    303 ; XOP: Found an estimated cost of 2 for instruction:   %shift
    304   %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
    305   ret <32 x i8> %shift
    306 }
    307 
    308 ;
    309 ; Uniform Constant Shifts
    310 ;
    311 
    312 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
    313 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
    314 ; SSE2: Found an estimated cost of 1 for instruction:   %shift
    315 ; SSE41: Found an estimated cost of 1 for instruction:   %shift
    316 ; AVX: Found an estimated cost of 1 for instruction:   %shift
    317 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    318 ; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
    319 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    320   %shift = shl <2 x i64> %a, <i64 7, i64 7>
    321   ret <2 x i64> %shift
    322 }
    323 
    324 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
    325 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
    326 ; SSE2: Found an estimated cost of 2 for instruction:   %shift
    327 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
    328 ; AVX: Found an estimated cost of 2 for instruction:   %shift
    329 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    330 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
    331 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    332   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
    333   ret <4 x i64> %shift
    334 }
    335 
    336 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
    337 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
    338 ; SSE2: Found an estimated cost of 1 for instruction:   %shift
    339 ; SSE41: Found an estimated cost of 1 for instruction:   %shift
    340 ; AVX: Found an estimated cost of 1 for instruction:   %shift
    341 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    342 ; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
    343 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    344   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
    345   ret <4 x i32> %shift
    346 }
    347 
    348 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
    349 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
    350 ; SSE2: Found an estimated cost of 2 for instruction:   %shift
    351 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
    352 ; AVX: Found an estimated cost of 2 for instruction:   %shift
    353 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    354 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
    355 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    356   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    357   ret <8 x i32> %shift
    358 }
    359 
    360 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
    361 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
    362 ; SSE2: Found an estimated cost of 1 for instruction:   %shift
    363 ; SSE41: Found an estimated cost of 1 for instruction:   %shift
    364 ; AVX: Found an estimated cost of 1 for instruction:   %shift
    365 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    366 ; XOP: Found an estimated cost of 1 for instruction:   %shift
    367   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    368   ret <8 x i16> %shift
    369 }
    370 
    371 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
    372 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
    373 ; SSE2: Found an estimated cost of 2 for instruction:   %shift
    374 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
    375 ; AVX: Found an estimated cost of 2 for instruction:   %shift
    376 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    377 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
    378 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
    379   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    380   ret <16 x i16> %shift
    381 }
    382 
    383 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
    384 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
    385 ; SSE2: Found an estimated cost of 1 for instruction:   %shift
    386 ; SSE41: Found an estimated cost of 1 for instruction:   %shift
    387 ; AVX: Found an estimated cost of 1 for instruction:   %shift
    388 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
    389 ; XOP: Found an estimated cost of 1 for instruction:   %shift
    390   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    391   ret <16 x i8> %shift
    392 }
    393 
    394 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
    395 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
    396 ; SSE2: Found an estimated cost of 2 for instruction:   %shift
    397 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
    398 ; AVX: Found an estimated cost of 2 for instruction:   %shift
    399 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
    400 ; XOP: Found an estimated cost of 2 for instruction:   %shift
    401   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    402   ret <32 x i8> %shift
    403 }
    404 
    405 ;
    406 ; Special Cases
    407 ;
    408 
    409 ; We always emit a single pmullw in the case of v8i16 vector shifts by
    410 ; non-uniform constant.
    411 
    412 define <8 x i16> @test1(<8 x i16> %a) {
    413   %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
    414   ret <8 x i16> %shl
    415 }
    416 ; CHECK: 'Cost Model Analysis' for function 'test1':
    417 ; CHECK: Found an estimated cost of 1 for instruction:   %shl
    418 
    419 
    420 define <8 x i16> @test2(<8 x i16> %a) {
    421   %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
    422   ret <8 x i16> %shl
    423 }
    424 ; CHECK: 'Cost Model Analysis' for function 'test2':
    425 ; CHECK: Found an estimated cost of 1 for instruction:   %shl
    426 
    427 
    428 ; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction.
    429 ; Make sure that the estimated cost is always 1 except for the case where
    430 ; we only have SSE2 support. With SSE2, we are forced to special lower the
    431 ; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle.
    432 
    433 define <4 x i32> @test3(<4 x i32> %a) {
    434   %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
    435   ret <4 x i32> %shl
    436 }
    437 ; CHECK: 'Cost Model Analysis' for function 'test3':
    438 ; SSE2: Found an estimated cost of 6 for instruction:   %shl
    439 ; SSE41: Found an estimated cost of 1 for instruction:   %shl
    440 ; AVX: Found an estimated cost of 1 for instruction:   %shl
    441 ; AVX2: Found an estimated cost of 1 for instruction:   %shl
    442 ; XOP: Found an estimated cost of 1 for instruction:   %shl
    443 
    444 
    445 define <4 x i32> @test4(<4 x i32> %a) {
    446   %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
    447   ret <4 x i32> %shl
    448 }
    449 ; CHECK: 'Cost Model Analysis' for function 'test4':
    450 ; SSE2: Found an estimated cost of 6 for instruction:   %shl
    451 ; SSE41: Found an estimated cost of 1 for instruction:   %shl
    452 ; AVX: Found an estimated cost of 1 for instruction:   %shl
    453 ; AVX2: Found an estimated cost of 1 for instruction:   %shl
    454 ; XOP: Found an estimated cost of 1 for instruction:   %shl
    455 
    456 
    457 ; On AVX2 we are able to lower the following shift into a single
    458 ; vpsllvq. Therefore, the expected cost is only 1.
    459 ; In all other cases, this shift is scalarized as the target does not support
    460 ; vpsllv instructions.
    461 
    462 define <2 x i64> @test5(<2 x i64> %a) {
    463   %shl = shl <2 x i64> %a, <i64 2, i64 3>
    464   ret <2 x i64> %shl
    465 }
    466 ; CHECK: 'Cost Model Analysis' for function 'test5':
    467 ; SSE2: Found an estimated cost of 4 for instruction:   %shl
    468 ; SSE41: Found an estimated cost of 4 for instruction:   %shl
    469 ; AVX: Found an estimated cost of 4 for instruction:   %shl
    470 ; AVX2: Found an estimated cost of 1 for instruction:   %shl
    471 ; XOP: Found an estimated cost of 1 for instruction:   %shl
    472 
    473 
    474 ; v16i16 and v8i32 shift left by non-uniform constant are lowered into
    475 ; vector multiply instructions.  With AVX (but not AVX2), the vector multiply
    476 ; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert.
    477 ;
    478 ; With AVX2, instruction vpmullw works with 256bit quantities and
    479 ; therefore there is no need to split the resulting vector multiply into
    480 ; a sequence of two multiply.
    481 ;
    482 ; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice
    483 ; the cost computed in the case of 'test1'. That is because the backend
    484 ; simply emits 2 pmullw with no extract/insert.
    485 
    486 
    487 define <16 x i16> @test6(<16 x i16> %a) {
    488   %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
    489   ret <16 x i16> %shl
    490 }
    491 ; CHECK: 'Cost Model Analysis' for function 'test6':
    492 ; SSE2: Found an estimated cost of 2 for instruction:   %shl
    493 ; SSE41: Found an estimated cost of 2 for instruction:   %shl
    494 ; AVX: Found an estimated cost of 4 for instruction:   %shl
    495 ; AVX2: Found an estimated cost of 1 for instruction:   %shl
    496 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shl
    497 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shl
    498 
    499 
    500 ; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice
    501 ; the cost computed in the case of 'test3'. That is because the multiply
    502 ; is type-legalized into two 4i32 vector multiply.
    503 
    504 define <8 x i32> @test7(<8 x i32> %a) {
    505   %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
    506   ret <8 x i32> %shl
    507 }
    508 ; CHECK: 'Cost Model Analysis' for function 'test7':
    509 ; SSE2: Found an estimated cost of 12 for instruction:   %shl
    510 ; SSE41: Found an estimated cost of 2 for instruction:   %shl
    511 ; AVX: Found an estimated cost of 4 for instruction:   %shl
    512 ; AVX2: Found an estimated cost of 1 for instruction:   %shl
    513 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shl
    514 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shl
    515 
    516 
    517 ; On AVX2 we are able to lower the following shift into a single
    518 ; vpsllvq. Therefore, the expected cost is only 1.
    519 ; In all other cases, this shift is scalarized as the target does not support
    520 ; vpsllv instructions.
    521 
    522 define <4 x i64> @test8(<4 x i64> %a) {
    523   %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
    524   ret <4 x i64> %shl
    525 }
    526 ; CHECK: 'Cost Model Analysis' for function 'test8':
    527 ; SSE2: Found an estimated cost of 8 for instruction:   %shl
    528 ; SSE41: Found an estimated cost of 8 for instruction:   %shl
    529 ; AVX: Found an estimated cost of 8 for instruction:   %shl
    530 ; AVX2: Found an estimated cost of 1 for instruction:   %shl
    531 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shl
    532 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shl
    533 
    534 
    535 ; Same as 'test6', with the difference that the cost is double.
    536 
    537 define <32 x i16> @test9(<32 x i16> %a) {
    538   %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
    539   ret <32 x i16> %shl
    540 }
    541 ; CHECK: 'Cost Model Analysis' for function 'test9':
    542 ; SSE2: Found an estimated cost of 4 for instruction:   %shl
    543 ; SSE41: Found an estimated cost of 4 for instruction:   %shl
    544 ; AVX: Found an estimated cost of 8 for instruction:   %shl
    545 ; AVX2: Found an estimated cost of 2 for instruction:   %shl
    546 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shl
    547 ; XOPAVX2: Found an estimated cost of 2 for instruction:   %shl
    548 
    549 
    550 ; Same as 'test7', except that now the cost is double.
    551 
    552 define <16 x i32> @test10(<16 x i32> %a) {
    553   %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
    554   ret <16 x i32> %shl
    555 }
    556 ; CHECK: 'Cost Model Analysis' for function 'test10':
    557 ; SSE2: Found an estimated cost of 24 for instruction:   %shl
    558 ; SSE41: Found an estimated cost of 4 for instruction:   %shl
    559 ; AVX: Found an estimated cost of 8 for instruction:   %shl
    560 ; AVX2: Found an estimated cost of 2 for instruction:   %shl
    561 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shl
    562 ; XOPAVX2: Found an estimated cost of 2 for instruction:   %shl
    563 
    564 
    565 ; On AVX2 we are able to lower the following shift into a sequence of
    566 ; two vpsllvq instructions. Therefore, the expected cost is only 2.
    567 ; In all other cases, this shift is scalarized as we don't have vpsllv
    568 ; instructions.
    569 
    570 define <8 x i64> @test11(<8 x i64> %a) {
    571   %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
    572   ret <8 x i64> %shl
    573 }
    574 ; CHECK: 'Cost Model Analysis' for function 'test11':
    575 ; SSE2: Found an estimated cost of 16 for instruction:   %shl
    576 ; SSE41: Found an estimated cost of 16 for instruction:   %shl
    577 ; AVX: Found an estimated cost of 16 for instruction:   %shl
    578 ; AVX2: Found an estimated cost of 2 for instruction:   %shl
    579 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shl
    580 ; XOPAVX2: Found an estimated cost of 2 for instruction:   %shl
    581