Home | History | Annotate | Download | only in X86
      1 ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
      2 ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -cost-model -analyze < %s | FileCheck --check-prefix=SSE41 %s
      3 
      4 define void @zext_v4i8_to_v4i64(<4 x i8>* %a) {
      5 ; SSE2: zext_v4i8_to_v4i64
      6 ; SSE2: cost of 4 {{.*}} zext
      7 ;
      8 ; SSE41: zext_v4i8_to_v4i64
      9 ; SSE41: cost of 2 {{.*}} zext
     10 ;
     11   %1 = load <4 x i8>, <4 x i8>* %a
     12   %2 = zext <4 x i8> %1 to <4 x i64>
     13   store <4 x i64> %2, <4 x i64>* undef, align 4
     14   ret void
     15 }
     16 
     17 define void @sext_v4i8_to_v4i64(<4 x i8>* %a) {
     18 ; SSE2: sext_v4i8_to_v4i64
     19 ; SSE2: cost of 8 {{.*}} sext
     20 ;
     21 ; SSE41: sext_v4i8_to_v4i64
     22 ; SSE41: cost of 2 {{.*}} sext
     23 ;
     24   %1 = load <4 x i8>, <4 x i8>* %a
     25   %2 = sext <4 x i8> %1 to <4 x i64>
     26   store <4 x i64> %2, <4 x i64>* undef, align 4
     27   ret void
     28 }
     29 
     30 define void @zext_v4i16_to_v4i64(<4 x i16>* %a) {
     31 ; SSE2: zext_v4i16_to_v4i64
     32 ; SSE2: cost of 3 {{.*}} zext
     33 ;
     34 ; SSE41: zext_v4i16_to_v4i64
     35 ; SSE41: cost of 2 {{.*}} zext
     36 ;
     37   %1 = load <4 x i16>, <4 x i16>* %a
     38   %2 = zext <4 x i16> %1 to <4 x i64>
     39   store <4 x i64> %2, <4 x i64>* undef, align 4
     40   ret void
     41 }
     42 
     43 define void @sext_v4i16_to_v4i64(<4 x i16>* %a) {
     44 ; SSE2: sext_v4i16_to_v4i64
     45 ; SSE2: cost of 10 {{.*}} sext
     46 ;
     47 ; SSE41: sext_v4i16_to_v4i64
     48 ; SSE41: cost of 2 {{.*}} sext
     49 ;
     50   %1 = load <4 x i16>, <4 x i16>* %a
     51   %2 = sext <4 x i16> %1 to <4 x i64>
     52   store <4 x i64> %2, <4 x i64>* undef, align 4
     53   ret void
     54 }
     55 
     56 
     57 define void @zext_v4i32_to_v4i64(<4 x i32>* %a) {
     58 ; SSE2: zext_v4i32_to_v4i64
     59 ; SSE2: cost of 3 {{.*}} zext
     60 ;
     61 ; SSE41: zext_v4i32_to_v4i64
     62 ; SSE41: cost of 2 {{.*}} zext
     63 ;
     64   %1 = load <4 x i32>, <4 x i32>* %a
     65   %2 = zext <4 x i32> %1 to <4 x i64>
     66   store <4 x i64> %2, <4 x i64>* undef, align 4
     67   ret void
     68 }
     69 
     70 define void @sext_v4i32_to_v4i64(<4 x i32>* %a) {
     71 ; SSE2: sext_v4i32_to_v4i64
     72 ; SSE2: cost of 5 {{.*}} sext
     73 ;
     74 ; SSE41: sext_v4i32_to_v4i64
     75 ; SSE41: cost of 2 {{.*}} sext
     76 ;
     77   %1 = load <4 x i32>, <4 x i32>* %a
     78   %2 = sext <4 x i32> %1 to <4 x i64>
     79   store <4 x i64> %2, <4 x i64>* undef, align 4
     80   ret void
     81 }
     82 
     83 define void @zext_v16i16_to_v16i32(<16 x i16>* %a) {
     84 ; SSE2: zext_v16i16_to_v16i32
     85 ; SSE2: cost of 6 {{.*}} zext
     86 ;
     87 ; SSE41: zext_v16i16_to_v16i32
     88 ; SSE41: cost of 4 {{.*}} zext
     89 ;
     90   %1 = load <16 x i16>, <16 x i16>* %a
     91   %2 = zext <16 x i16> %1 to <16 x i32>
     92   store <16 x i32> %2, <16 x i32>* undef, align 4
     93   ret void
     94 }
     95 
     96 define void @sext_v16i16_to_v16i32(<16 x i16>* %a) {
     97 ; SSE2: sext_v16i16_to_v16i32
     98 ; SSE2: cost of 8 {{.*}} sext
     99 ;
    100 ; SSE41: sext_v16i16_to_v16i32
    101 ; SSE41: cost of 4 {{.*}} sext
    102 ;
    103   %1 = load <16 x i16>, <16 x i16>* %a
    104   %2 = sext <16 x i16> %1 to <16 x i32>
    105   store <16 x i32> %2, <16 x i32>* undef, align 4
    106   ret void
    107 }
    108 
    109 define void @zext_v8i16_to_v8i32(<8 x i16>* %a) {
    110 ; SSE2: zext_v8i16_to_v8i32
    111 ; SSE2: cost of 3 {{.*}} zext
    112 ;
    113 ; SSE41: zext_v8i16_to_v8i32
    114 ; SSE41: cost of 2 {{.*}} zext
    115 ;
    116   %1 = load <8 x i16>, <8 x i16>* %a
    117   %2 = zext <8 x i16> %1 to <8 x i32>
    118   store <8 x i32> %2, <8 x i32>* undef, align 4
    119   ret void
    120 }
    121 
    122 define void @sext_v8i16_to_v8i32(<8 x i16>* %a) {
    123 ; SSE2: sext_v8i16_to_v8i32
    124 ; SSE2: cost of 4 {{.*}} sext
    125 ;
    126 ; SSE41: sext_v8i16_to_v8i32
    127 ; SSE41: cost of 2 {{.*}} sext
    128 ;
    129   %1 = load <8 x i16>, <8 x i16>* %a
    130   %2 = sext <8 x i16> %1 to <8 x i32>
    131   store <8 x i32> %2, <8 x i32>* undef, align 4
    132   ret void
    133 }
    134 
    135 define void @zext_v4i16_to_v4i32(<4 x i16>* %a) {
    136 ; SSE2: zext_v4i16_to_v4i32
    137 ; SSE2: cost of 1 {{.*}} zext
    138 ;
    139 ; SSE41: zext_v4i16_to_v4i32
    140 ; SSE41: cost of 1 {{.*}} zext
    141 ;
    142   %1 = load <4 x i16>, <4 x i16>* %a
    143   %2 = zext <4 x i16> %1 to <4 x i32>
    144   store <4 x i32> %2, <4 x i32>* undef, align 4
    145   ret void
    146 }
    147 
    148 define void @sext_v4i16_to_v4i32(<4 x i16>* %a) {
    149 ; SSE2: sext_v4i16_to_v4i32
    150 ; SSE2: cost of 2 {{.*}} sext
    151 ;
    152 ; SSE41: sext_v4i16_to_v4i32
    153 ; SSE41: cost of 1 {{.*}} sext
    154 ;
    155   %1 = load <4 x i16>, <4 x i16>* %a
    156   %2 = sext <4 x i16> %1 to <4 x i32>
    157   store <4 x i32> %2, <4 x i32>* undef, align 4
    158   ret void
    159 }
    160 
    161 define void @zext_v16i8_to_v16i32(<16 x i8>* %a) {
    162 ; SSE2: zext_v16i8_to_v16i32
    163 ; SSE2: cost of 9 {{.*}} zext
    164 ;
    165 ; SSE41: zext_v16i8_to_v16i32
    166 ; SSE41: cost of 4 {{.*}} zext
    167 ;
    168   %1 = load <16 x i8>, <16 x i8>* %a
    169   %2 = zext <16 x i8> %1 to <16 x i32>
    170   store <16 x i32> %2, <16 x i32>* undef, align 4
    171   ret void
    172 }
    173 
    174 define void @sext_v16i8_to_v16i32(<16 x i8>* %a) {
    175 ; SSE2: sext_v16i8_to_v16i32
    176 ; SSE2: cost of 12 {{.*}} sext
    177 ;
    178 ; SSE41: sext_v16i8_to_v16i32
    179 ; SSE41: cost of 4 {{.*}} sext
    180 ;
    181   %1 = load <16 x i8>, <16 x i8>* %a
    182   %2 = sext <16 x i8> %1 to <16 x i32>
    183   store <16 x i32> %2, <16 x i32>* undef, align 4
    184   ret void
    185 }
    186 
    187 define void @zext_v8i8_to_v8i32(<8 x i8>* %a) {
    188 ; SSE2: zext_v8i8_to_v8i32
    189 ; SSE2: cost of 6 {{.*}} zext
    190 ;
    191 ; SSE41: zext_v8i8_to_v8i32
    192 ; SSE41: cost of 2 {{.*}} zext
    193 ;
    194   %1 = load <8 x i8>, <8 x i8>* %a
    195   %2 = zext <8 x i8> %1 to <8 x i32>
    196   store <8 x i32> %2, <8 x i32>* undef, align 4
    197   ret void
    198 }
    199 
    200 define void @sext_v8i8_to_v8i32(<8 x i8>* %a) {
    201 ; SSE2: sext_v8i8_to_v8i32
    202 ; SSE2: cost of 6 {{.*}} sext
    203 ;
    204 ; SSE41: sext_v8i8_to_v8i32
    205 ; SSE41: cost of 2 {{.*}} sext
    206 ;
    207   %1 = load <8 x i8>, <8 x i8>* %a
    208   %2 = sext <8 x i8> %1 to <8 x i32>
    209   store <8 x i32> %2, <8 x i32>* undef, align 4
    210   ret void
    211 }
    212 
    213 define void @zext_v4i8_to_v4i32(<4 x i8>* %a) {
    214 ; SSE2: zext_v4i8_to_v4i32
    215 ; SSE2: cost of 2 {{.*}} zext
    216 ;
    217 ; SSE41: zext_v4i8_to_v4i32
    218 ; SSE41: cost of 1 {{.*}} zext
    219 ;
    220   %1 = load <4 x i8>, <4 x i8>* %a
    221   %2 = zext <4 x i8> %1 to <4 x i32>
    222   store <4 x i32> %2, <4 x i32>* undef, align 4
    223   ret void
    224 }
    225 
    226 define void @sext_v4i8_to_v4i32(<4 x i8>* %a) {
    227 ; SSE2: sext_v4i8_to_v4i32
    228 ; SSE2: cost of 3 {{.*}} sext
    229 ;
    230 ; SSE41: sext_v4i8_to_v4i32
    231 ; SSE41: cost of 1 {{.*}} sext
    232 ;
    233   %1 = load <4 x i8>, <4 x i8>* %a
    234   %2 = sext <4 x i8> %1 to <4 x i32>
    235   store <4 x i32> %2, <4 x i32>* undef, align 4
    236   ret void
    237 }
    238 
    239 define void @zext_v16i8_to_v16i16(<16 x i8>* %a) {
    240 ; SSE2: zext_v16i8_to_v16i16
    241 ; SSE2: cost of 3 {{.*}} zext
    242 ;
    243 ; SSE41: zext_v16i8_to_v16i16
    244 ; SSE41: cost of 2 {{.*}} zext
    245 ;
    246   %1 = load <16 x i8>, <16 x i8>* %a
    247   %2 = zext <16 x i8> %1 to <16 x i16>
    248   store <16 x i16> %2, <16 x i16>* undef, align 4
    249   ret void
    250 }
    251 
    252 define void @sext_v16i8_to_v16i16(<16 x i8>* %a) {
    253 ; SSE2: sext_v16i8_to_v16i16
    254 ; SSE2: cost of 4 {{.*}} sext
    255 ;
    256 ; SSE41: sext_v16i8_to_v16i16
    257 ; SSE41: cost of 2 {{.*}} sext
    258 ;
    259   %1 = load <16 x i8>, <16 x i8>* %a
    260   %2 = sext <16 x i8> %1 to <16 x i16>
    261   store <16 x i16> %2, <16 x i16>* undef, align 4
    262   ret void
    263 }
    264 
    265 define void @zext_v8i8_to_v8i16(<8 x i8>* %a) {
    266 ; SSE2: zext_v8i8_to_v8i16
    267 ; SSE2: cost of 1 {{.*}} zext
    268 ;
    269 ; SSE41: zext_v8i8_to_v8i16
    270 ; SSE41: cost of 1 {{.*}} zext
    271 ;
    272   %1 = load <8 x i8>, <8 x i8>* %a
    273   %2 = zext <8 x i8> %1 to <8 x i16>
    274   store <8 x i16> %2, <8 x i16>* undef, align 4
    275   ret void
    276 }
    277 
    278 define void @sext_v8i8_to_v8i16(<8 x i8>* %a) {
    279 ; SSE2: sext_v8i8_to_v8i16
    280 ; SSE2: cost of 2 {{.*}} sext
    281 ;
    282 ; SSE41: sext_v8i8_to_v8i16
    283 ; SSE41: cost of 1 {{.*}} sext
    284 ;
    285   %1 = load <8 x i8>, <8 x i8>* %a
    286   %2 = sext <8 x i8> %1 to <8 x i16>
    287   store <8 x i16> %2, <8 x i16>* undef, align 4
    288   ret void
    289 }
    290 
    291 define void @zext_v4i8_to_v4i16(<4 x i8>* %a) {
    292 ; SSE2: zext_v4i8_to_v4i16
    293 ; SSE2: cost of 1 {{.*}} zext
    294 ;
    295 ; SSE41: zext_v4i8_to_v4i16
    296 ; SSE41: cost of 1 {{.*}} zext
    297 ;
    298   %1 = load <4 x i8>, <4 x i8>* %a
    299   %2 = zext <4 x i8> %1 to <4 x i16>
    300   store <4 x i16> %2, <4 x i16>* undef, align 4
    301   ret void
    302 }
    303 
    304 define void @sext_v4i8_to_v4i16(<4 x i8>* %a) {
    305 ; SSE2: sext_v4i8_to_v4i16
    306 ; SSE2: cost of 6 {{.*}} sext
    307 ;
    308 ; SSE41: sext_v4i8_to_v4i16
    309 ; SSE41: cost of 2 {{.*}} sext
    310 ;
    311   %1 = load <4 x i8>, <4 x i8>* %a
    312   %2 = sext <4 x i8> %1 to <4 x i16>
    313   store <4 x i16> %2, <4 x i16>* undef, align 4
    314   ret void
    315 }
    316 
    317 define void @truncate_v16i32_to_v16i16(<16 x i32>* %a) {
    318 ; SSE2: truncate_v16i32_to_v16i16
    319 ; SSE2: cost of 10 {{.*}} trunc
    320 ;
    321 ; SSE41: truncate_v16i32_to_v16i16
    322 ; SSE41: cost of 6 {{.*}} trunc
    323 ;
    324   %1 = load <16 x i32>, <16 x i32>* %a
    325   %2 = trunc <16 x i32> %1 to <16 x i16>
    326   store <16 x i16> %2, <16 x i16>* undef, align 4
    327   ret void
    328 }
    329 
    330 define void @truncate_v8i32_to_v8i16(<8 x i32>* %a) {
    331 ; SSE2: truncate_v8i32_to_v8i16
    332 ; SSE2: cost of 5 {{.*}} trunc
    333 ;
    334 ; SSE41: truncate_v8i32_to_v8i16
    335 ; SSE41: cost of 3 {{.*}} trunc
    336 ;
    337   %1 = load <8 x i32>, <8 x i32>* %a
    338   %2 = trunc <8 x i32> %1 to <8 x i16>
    339   store <8 x i16> %2, <8 x i16>* undef, align 4
    340   ret void
    341 }
    342 
    343 define void @truncate_v4i32_to_v4i16(<4 x i32>* %a) {
    344 ; SSE2: truncate_v4i32_to_v4i16
    345 ; SSE2: cost of 3 {{.*}} trunc
    346 ;
    347 ; SSE41: truncate_v4i32_to_v4i16
    348 ; SSE41: cost of 1 {{.*}} trunc
    349 ;
    350   %1 = load <4 x i32>, <4 x i32>* %a
    351   %2 = trunc <4 x i32> %1 to <4 x i16>
    352   store <4 x i16> %2, <4 x i16>* undef, align 4
    353   ret void
    354 }
    355 
    356 define void @truncate_v16i32_to_v16i8(<16 x i32>* %a) {
    357 ; SSE2: truncate_v16i32_to_v16i8
    358 ; SSE2: cost of 7 {{.*}} trunc
    359 ;
    360 ; SSE41: truncate_v16i32_to_v16i8
    361 ; SSE41: cost of 7 {{.*}} trunc
    362 ;
    363   %1 = load <16 x i32>, <16 x i32>* %a
    364   %2 = trunc <16 x i32> %1 to <16 x i8>
    365   store <16 x i8> %2, <16 x i8>* undef, align 4
    366   ret void
    367 }
    368 
    369 define void @truncate_v8i32_to_v8i8(<8 x i32>* %a) {
    370 ; SSE2: truncate_v8i32_to_v8i8
    371 ; SSE2: cost of 4 {{.*}} trunc
    372 ;
    373 ; SSE41: truncate_v8i32_to_v8i8
    374 ; SSE41: cost of 3 {{.*}} trunc
    375 ;
    376   %1 = load <8 x i32>, <8 x i32>* %a
    377   %2 = trunc <8 x i32> %1 to <8 x i8>
    378   store <8 x i8> %2, <8 x i8>* undef, align 4
    379   ret void
    380 }
    381 
    382 define void @truncate_v4i32_to_v4i8(<4 x i32>* %a) {
    383 ; SSE2: truncate_v4i32_to_v4i8
    384 ; SSE2: cost of 3 {{.*}} trunc
    385 ;
    386 ; SSE41: truncate_v4i32_to_v4i8
    387 ; SSE41: cost of 1 {{.*}} trunc
    388 ;
    389   %1 = load <4 x i32>, <4 x i32>* %a
    390   %2 = trunc <4 x i32> %1 to <4 x i8>
    391   store <4 x i8> %2, <4 x i8>* undef, align 4
    392   ret void
    393 }
    394 
    395 define void @truncate_v16i16_to_v16i8(<16 x i16>* %a) {
    396 ; SSE2: truncate_v16i16_to_v16i8
    397 ; SSE2: cost of 3 {{.*}} trunc
    398 ;
    399 ; SSE41: truncate_v16i16_to_v16i8
    400 ; SSE41: cost of 3 {{.*}} trunc
    401 ;
    402   %1 = load <16 x i16>, <16 x i16>* %a
    403   %2 = trunc <16 x i16> %1 to <16 x i8>
    404   store <16 x i8> %2, <16 x i8>* undef, align 4
    405   ret void
    406 }
    407 
    408 define void @truncate_v8i16_to_v8i8(<8 x i16>* %a) {
    409 ; SSE2: truncate_v8i16_to_v8i8
    410 ; SSE2: cost of 2 {{.*}} trunc
    411 ;
    412 ; SSE41: truncate_v8i16_to_v8i8
    413 ; SSE41: cost of 1 {{.*}} trunc
    414 ;
    415   %1 = load <8 x i16>, <8 x i16>* %a
    416   %2 = trunc <8 x i16> %1 to <8 x i8>
    417   store <8 x i8> %2, <8 x i8>* undef, align 4
    418   ret void
    419 }
    420 
    421 define void @truncate_v4i16_to_v4i8(<4 x i16>* %a) {
    422 ; SSE2: truncate_v4i16_to_v4i8
    423 ; SSE2: cost of 4 {{.*}} trunc
    424 ;
    425 ; SSE41: truncate_v4i16_to_v4i8
    426 ; SSE41: cost of 2 {{.*}} trunc
    427 ;
    428   %1 = load <4 x i16>, <4 x i16>* %a
    429   %2 = trunc <4 x i16> %1 to <4 x i8>
    430   store <4 x i8> %2, <4 x i8>* undef, align 4
    431   ret void
    432 }
    433