Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
      2 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
      3 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
      4 
      5 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      6 target triple = "x86_64-apple-macosx10.8.0"
      7 
      8 define i32 @add(i32 %arg) {
      9 ; CHECK-LABEL: for function 'add'
     10   ; -- Same size registeres --
     11   ;CHECK: cost of 1 {{.*}} zext
     12   %A = zext <4 x i1> undef to <4 x i32>
     13   ;CHECK: cost of 2 {{.*}} sext
     14   %B = sext <4 x i1> undef to <4 x i32>
     15   ;CHECK: cost of 0 {{.*}} trunc
     16   %C = trunc <4 x i32> undef to <4 x i1>
     17 
     18   ; -- Different size registers --
     19   ;CHECK-NOT: cost of 1 {{.*}} zext
     20   %D = zext <8 x i1> undef to <8 x i32>
     21   ;CHECK-NOT: cost of 2 {{.*}} sext
     22   %E = sext <8 x i1> undef to <8 x i32>
     23   ;CHECK-NOT: cost of 2 {{.*}} trunc
     24   %F = trunc <8 x i32> undef to <8 x i1>
     25 
     26   ; -- scalars --
     27 
     28   ;CHECK: cost of 1 {{.*}} zext
     29   %G = zext i1 undef to i32
     30   ;CHECK: cost of 0 {{.*}} trunc
     31   %H = trunc i32 undef to i1
     32 
     33   ;CHECK: cost of 0 {{.*}} ret
     34   ret i32 undef
     35 }
     36 
     37 define i32 @zext_sext(<8 x i1> %in) {
     38 ; CHECK-AVX2-LABEL: for function 'zext_sext'
     39 ; CHECK-AVX-LABEL: for function 'zext_sext'
     40   ;CHECK-AVX2: cost of 3 {{.*}} zext
     41   ;CHECK-AVX: cost of 4 {{.*}} zext
     42   %Z = zext <8 x i1> %in to <8 x i32>
     43   ;CHECK-AVX2: cost of 3 {{.*}} sext
     44   ;CHECK-AVX: cost of 7 {{.*}} sext
     45   %S = sext <8 x i1> %in to <8 x i32>
     46 
     47   ;CHECK-AVX2: cost of 1 {{.*}} zext
     48   ;CHECK-AVX: cost of 4 {{.*}} zext
     49   %A1 = zext <16 x i8> undef to <16 x i16>
     50   ;CHECK-AVX2: cost of 1 {{.*}} sext
     51   ;CHECK-AVX: cost of 4 {{.*}} sext
     52   %A2 = sext <16 x i8> undef to <16 x i16>
     53   ;CHECK-AVX2: cost of 1 {{.*}} sext
     54   ;CHECK-AVX: cost of 4 {{.*}} sext
     55   %A = sext <8 x i16> undef to <8 x i32>
     56   ;CHECK-AVX2: cost of 1 {{.*}} zext
     57   ;CHECK-AVX: cost of 4 {{.*}} zext
     58   %B = zext <8 x i16> undef to <8 x i32>
     59   ;CHECK-AVX2: cost of 1 {{.*}} sext
     60   ;CHECK-AVX: cost of 4 {{.*}} sext
     61   %C = sext <4 x i32> undef to <4 x i64>
     62 
     63   ;CHECK-AVX2: cost of 3 {{.*}} zext
     64   ;CHECK-AVX: cost of 4 {{.*}} zext
     65   %C.v8i8.z = zext <8 x i8> undef to <8 x i32>
     66   ;CHECK-AVX2: cost of 3 {{.*}} sext
     67   ;CHECK-AVX: cost of 7 {{.*}} sext
     68   %C.v8i8.s = sext <8 x i8> undef to <8 x i32>
     69   ;CHECK-AVX2: cost of 3 {{.*}} zext
     70   ;CHECK-AVX: cost of 3 {{.*}} zext
     71   %C.v4i16.z = zext <4 x i16> undef to <4 x i64>
     72   ;CHECK-AVX2: cost of 3 {{.*}} sext
     73   ;CHECK-AVX: cost of 6 {{.*}} sext
     74   %C.v4i16.s = sext <4 x i16> undef to <4 x i64>
     75 
     76   ;CHECK-AVX2: cost of 3 {{.*}} zext
     77   ;CHECK-AVX: cost of 4 {{.*}} zext
     78   %C.v4i8.z = zext <4 x i8> undef to <4 x i64>
     79   ;CHECK-AVX2: cost of 3 {{.*}} sext
     80   ;CHECK-AVX: cost of 6 {{.*}} sext
     81   %C.v4i8.s = sext <4 x i8> undef to <4 x i64>
     82 
     83   ;CHECK-AVX2: cost of 1 {{.*}} zext
     84   ;CHECK-AVX: cost of 4 {{.*}} zext
     85   %D = zext <4 x i32> undef to <4 x i64>
     86 
     87   ;CHECK-AVX512: cost of 1 {{.*}} %D1 = zext
     88   %D1 = zext <8 x i32> undef to <8 x i64>
     89 
     90   ;CHECK-AVX512: cost of 1 {{.*}} %D2 = sext
     91   %D2 = sext <8 x i32> undef to <8 x i64>
     92 
     93   ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
     94   %D3 = zext <16 x i16> undef to <16 x i32>
     95   ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
     96   %D4 = zext <16 x i8> undef to <16 x i32>
     97   ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
     98   %D5 = zext <16 x i1> undef to <16 x i32>
     99 
    100   ;CHECK-AVX2: cost of 2 {{.*}} trunc
    101   ;CHECK-AVX: cost of 4 {{.*}} trunc
    102   %E = trunc <4 x i64> undef to <4 x i32>
    103   ;CHECK-AVX2: cost of 2 {{.*}} trunc
    104   ;CHECK-AVX: cost of 5 {{.*}} trunc
    105   %F = trunc <8 x i32> undef to <8 x i16>
    106   ;CHECK-AVX2: cost of 4 {{.*}} trunc
    107   ;CHECK-AVX: cost of 4 {{.*}} trunc
    108   %F1 = trunc <16 x i16> undef to <16 x i8>
    109   ;CHECK-AVX2: cost of 2 {{.*}} trunc
    110   ;CHECK-AVX: cost of 4 {{.*}} trunc
    111   %F2 = trunc <8 x i32> undef to <8 x i8>
    112   ;CHECK-AVX2: cost of 2 {{.*}} trunc
    113   ;CHECK-AVX: cost of 4 {{.*}} trunc
    114   %F3 = trunc <4 x i64> undef to <4 x i8>
    115 
    116   ;CHECK-AVX2: cost of 4 {{.*}} trunc
    117   ;CHECK-AVX: cost of 9 {{.*}} trunc
    118   ;CHECK_AVX512: cost of 1 {{.*}} G = trunc
    119   %G = trunc <8 x i64> undef to <8 x i32>
    120 
    121   ;CHECK-AVX512: cost of 1 {{.*}} %G1 = trunc
    122   %G1 = trunc <16 x i32> undef to <16 x i16>
    123 
    124   ;CHECK-AVX512: cost of 1 {{.*}} %G2 = trunc
    125   %G2 = trunc <16 x i32> undef to <16 x i8>
    126   ret i32 undef
    127 }
    128 
    129 define i32 @masks8(<8 x i1> %in) {
    130 ; CHECK-AVX2-LABEL: for function 'masks8'
    131 ; CHECK-AVX-LABEL: for function 'masks8'
    132 
    133   ;CHECK-AVX2: cost of 3 {{.*}} zext
    134   ;CHECK-AVX: cost of 4 {{.*}} zext
    135   %Z = zext <8 x i1> %in to <8 x i32>
    136   ;CHECK-AVX2: cost of 3 {{.*}} sext
    137   ;CHECK-AVX: cost of 7 {{.*}} sext
    138   %S = sext <8 x i1> %in to <8 x i32>
    139   ret i32 undef
    140 }
    141 
    142 define i32 @masks4(<4 x i1> %in) {
    143 ; CHECK-AVX2-LABEL: for function 'masks4'
    144 ; CHECK-AVX-LABEL: for function 'masks4'
    145 
    146   ;CHECK-AVX2: cost of 3 {{.*}} zext
    147   ;CHECK-AVX: cost of 4 {{.*}} zext
    148   %Z = zext <4 x i1> %in to <4 x i64>
    149   ;CHECK-AVX2: cost of 3 {{.*}} sext
    150   ;CHECK-AVX: cost of 6 {{.*}} sext
    151   %S = sext <4 x i1> %in to <4 x i64>
    152   ret i32 undef
    153 }
    154 
    155 define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
    156 ; CHECK-LABEL: for function 'sitofp4'
    157   ; CHECK: cost of 3 {{.*}} sitofp
    158   %A1 = sitofp <4 x i1> %a to <4 x float>
    159   ; CHECK: cost of 3 {{.*}} sitofp
    160   %A2 = sitofp <4 x i1> %a to <4 x double>
    161 
    162   ; CHECK: cost of 3 {{.*}} sitofp
    163   %B1 = sitofp <4 x i8> %b to <4 x float>
    164   ; CHECK: cost of 3 {{.*}} sitofp
    165   %B2 = sitofp <4 x i8> %b to <4 x double>
    166 
    167   ; CHECK: cost of 3 {{.*}} sitofp
    168   %C1 = sitofp <4 x i16> %c to <4 x float>
    169   ; CHECK: cost of 3 {{.*}} sitofp
    170   %C2 = sitofp <4 x i16> %c to <4 x double>
    171 
    172   ; CHECK: cost of 1 {{.*}} sitofp
    173   %D1 = sitofp <4 x i32> %d to <4 x float>
    174   ; CHECK: cost of 1 {{.*}} sitofp
    175   %D2 = sitofp <4 x i32> %d to <4 x double>
    176   ret void
    177 }
    178 
    179 define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
    180 ; CHECK-LABEL: for function 'sitofp8'
    181   ; CHECK: cost of 8 {{.*}} sitofp
    182   %A1 = sitofp <8 x i1> %a to <8 x float>
    183 
    184   ; CHECK: cost of 8 {{.*}} sitofp
    185   %B1 = sitofp <8 x i8> %b to <8 x float>
    186 
    187   ; CHECK: cost of 5 {{.*}} sitofp
    188   %C1 = sitofp <8 x i16> %c to <8 x float>
    189 
    190   ; CHECK: cost of 1 {{.*}} sitofp
    191   %D1 = sitofp <8 x i32> %d to <8 x float>
    192   ret void
    193 }
    194 
    195 define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
    196 ; CHECK-LABEL: for function 'uitofp4'
    197   ; CHECK: cost of 7 {{.*}} uitofp
    198   %A1 = uitofp <4 x i1> %a to <4 x float>
    199   ; CHECK: cost of 7 {{.*}} uitofp
    200   %A2 = uitofp <4 x i1> %a to <4 x double>
    201 
    202   ; CHECK: cost of 2 {{.*}} uitofp
    203   %B1 = uitofp <4 x i8> %b to <4 x float>
    204   ; CHECK: cost of 2 {{.*}} uitofp
    205   %B2 = uitofp <4 x i8> %b to <4 x double>
    206 
    207   ; CHECK: cost of 2 {{.*}} uitofp
    208   %C1 = uitofp <4 x i16> %c to <4 x float>
    209   ; CHECK: cost of 2 {{.*}} uitofp
    210   %C2 = uitofp <4 x i16> %c to <4 x double>
    211 
    212   ; CHECK-AVX2: cost of 6 {{.*}} uitofp
    213   %D1 = uitofp <4 x i32> %d to <4 x float>
    214   ; CHECK-AVX2: cost of 6 {{.*}} uitofp
    215   %D2 = uitofp <4 x i32> %d to <4 x double>
    216   ret void
    217 }
    218 
    219 define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
    220 ; CHECK-LABEL: for function 'uitofp8'
    221   ; CHECK-AVX2: cost of 6 {{.*}} uitofp
    222   %A1 = uitofp <8 x i1> %a to <8 x float>
    223 
    224   ; CHECK-AVX2: cost of 5 {{.*}} uitofp
    225   ; CHECK-AVX512: cost of 2 {{.*}} uitofp
    226   %B1 = uitofp <8 x i8> %b to <8 x float>
    227 
    228   ; CHECK-AVX2: cost of 5 {{.*}} uitofp
    229   ; CHECK-AVX512: cost of 2 {{.*}} uitofp
    230   %C1 = uitofp <8 x i16> %c to <8 x float>
    231 
    232   ; CHECK-AVX2: cost of 8 {{.*}} uitofp
    233   ; CHECK-AVX512: cost of 1 {{.*}} uitofp
    234   ; CHECK-AVX: cost of 9 {{.*}} uitofp
    235   %D1 = uitofp <8 x i32> %d to <8 x float>
    236   ret void
    237 }
    238 
    239 define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) {
    240 ;CHECK-LABEL: for function 'fp_conv'
    241   ; CHECK-AVX512: cost of 1 {{.*}} fpext
    242   %A1 = fpext <8 x float> %a to <8 x double>
    243 
    244   ; CHECK-AVX512: cost of 1 {{.*}} fpext
    245   %A2 = fpext <4 x float> %c to <4 x double>
    246 
    247   ; CHECK-AVX2:   cost of 3 {{.*}} %A3 = fpext
    248   ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
    249   %A3 = fpext <8 x float> %a to <8 x double>
    250 
    251   ; CHECK-AVX2:   cost of 3 {{.*}} %A4 = fptrunc
    252   ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
    253   %A4 = fptrunc <8 x double> undef to <8 x float>
    254 
    255   ; CHECK-AVX512: cost of 1 {{.*}} %A5 = fptrunc
    256   %A5 = fptrunc <4 x double> undef to <4 x float>
    257   ret void
    258 }
    259