Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
      2 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
      3 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
      4 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
      5 
      6 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      7 target triple = "x86_64-apple-macosx10.8.0"
      8 
      9 ; CHECK-LABEL: 'add'
     10 define i32 @add(i32 %arg) {
     11   ; SSSE3: cost of 1 {{.*}} %A = add
     12   ; SSE42: cost of 1 {{.*}} %A = add
     13   ; AVX: cost of 1 {{.*}} %A = add
     14   ; AVX2: cost of 1 {{.*}} %A = add
     15   %A = add <4 x i32> undef, undef
     16   ; SSSE3: cost of 2 {{.*}} %B = add
     17   ; SSE42: cost of 2 {{.*}} %B = add
     18   ; AVX: cost of 4 {{.*}} %B = add  
     19   ; AVX2: cost of 1 {{.*}} %B = add
     20   %B = add <8 x i32> undef, undef
     21   ; SSSE3: cost of 1 {{.*}} %C = add
     22   ; SSE42: cost of 1 {{.*}} %C = add
     23   ; AVX: cost of 1 {{.*}} %C = add
     24   ; AVX2: cost of 1 {{.*}} %C = add
     25   %C = add <2 x i64> undef, undef
     26   ; SSSE3: cost of 2 {{.*}} %D = add
     27   ; SSE42: cost of 2 {{.*}} %D = add
     28   ; AVX: cost of 4 {{.*}} %D = add
     29   ; AVX2: cost of 1 {{.*}} %D = add
     30   %D = add <4 x i64> undef, undef
     31   ; SSSE3: cost of 4 {{.*}} %E = add
     32   ; SSE42: cost of 4 {{.*}} %E = add
     33   ; AVX: cost of 8 {{.*}} %E = add
     34   ; AVX2: cost of 2 {{.*}} %E = add
     35   %E = add <8 x i64> undef, undef
     36   ret i32 undef
     37 }
     38 
     39 ; CHECK-LABEL: 'xor'
     40 define i32 @xor(i32 %arg) {
     41   ; SSSE3: cost of 1 {{.*}} %A = xor
     42   ; SSE42: cost of 1 {{.*}} %A = xor
     43   ; AVX: cost of 1 {{.*}} %A = xor
     44   ; AVX2: cost of 1 {{.*}} %A = xor
     45   %A = xor <4 x i32> undef, undef
     46   ; SSSE3: cost of 2 {{.*}} %B = xor
     47   ; SSE42: cost of 2 {{.*}} %B = xor
     48   ; AVX: cost of 1 {{.*}} %B = xor
     49   ; AVX2: cost of 1 {{.*}} %B = xor
     50   %B = xor <8 x i32> undef, undef
     51   ; SSSE3: cost of 1 {{.*}} %C = xor
     52   ; SSE42: cost of 1 {{.*}} %C = xor
     53   ; AVX: cost of 1 {{.*}} %C = xor
     54   ; AVX2: cost of 1 {{.*}} %C = xor
     55   %C = xor <2 x i64> undef, undef
     56   ; SSSE3: cost of 2 {{.*}} %D = xor
     57   ; SSE42: cost of 2 {{.*}} %D = xor
     58   ; AVX: cost of 1 {{.*}} %D = xor
     59   ; AVX2: cost of 1 {{.*}} %D = xor
     60   %D = xor <4 x i64> undef, undef
     61   ret i32 undef
     62 }
     63 
     64 ; CHECK-LABEL: 'mul'
     65 define void @mul() {
     66   ; A <2 x i32> gets expanded to a <2 x i64> vector.
     67   ; A <2 x i64> vector multiply is implemented using
     68   ; 3 PMULUDQ and 2 PADDS and 4 shifts.
     69   ; SSSE3: cost of 9 {{.*}} %A0 = mul
     70   ; SSE42: cost of 9 {{.*}} %A0 = mul
     71   ; AVX: cost of 9 {{.*}} %A0 = mul
     72   ; AVX2: cost of 9 {{.*}} %A0 = mul
     73   %A0 = mul <2 x i32> undef, undef
     74   ; SSSE3: cost of 6 {{.*}} %A1 = mul
     75   ; SSE42: cost of 1 {{.*}} %A1 = mul
     76   ; AVX: cost of 1 {{.*}} %A1 = mul
     77   ; AVX2: cost of 1 {{.*}} %A1 = mul
     78   %A1 = mul <4 x i32> undef, undef  
     79   ; SSSE3: cost of 9 {{.*}} %A2 = mul
     80   ; SSE42: cost of 9 {{.*}} %A2 = mul
     81   ; AVX: cost of 9 {{.*}} %A2 = mul
     82   ; AVX2: cost of 9 {{.*}} %A2 = mul
     83   %A2 = mul <2 x i64> undef, undef
     84   ; SSSE3: cost of 18 {{.*}} %A3 = mul
     85   ; SSE42: cost of 18 {{.*}} %A3 = mul
     86   ; AVX: cost of 18 {{.*}} %A3 = mul
     87   ; AVX2: cost of 9 {{.*}} %A3 = mul
     88   %A3 = mul <4 x i64> undef, undef
     89   ret void
     90 }
     91 
     92 ; CHECK-LABEL: 'fmul'
     93 define i32 @fmul(i32 %arg) {
     94   ; SSSE3: cost of 2 {{.*}} %A = fmul
     95   ; SSE42: cost of 2 {{.*}} %A = fmul
     96   ; AVX: cost of 2 {{.*}} %A = fmul
     97   ; AVX2: cost of 2 {{.*}} %A = fmul
     98   %A = fmul <4 x float> undef, undef
     99   ; SSSE3: cost of 4 {{.*}} %B = fmul
    100   ; SSE42: cost of 4 {{.*}} %B = fmul
    101   ; AVX: cost of 2 {{.*}} %B = fmul
    102   ; AVX2: cost of 2 {{.*}} %B = fmul
    103   %B = fmul <8 x float> undef, undef
    104   ret i32 undef
    105 }
    106 
    107 ; CHECK-LABEL: 'shift'
    108 define void @shift() {
    109   ; SSSE3: cost of 10 {{.*}} %A0 = shl
    110   ; SSE42: cost of 10 {{.*}} %A0 = shl
    111   ; AVX: cost of 10 {{.*}} %A0 = shl
    112   ; AVX2: cost of 1 {{.*}} %A0 = shl
    113   %A0 = shl <4 x i32> undef, undef
    114   ; SSSE3: cost of 4 {{.*}} %A1 = shl
    115   ; SSE42: cost of 4 {{.*}} %A1 = shl
    116   ; AVX: cost of 4 {{.*}} %A1 = shl
    117   ; AVX2: cost of 1 {{.*}} %A1 = shl
    118   %A1 = shl <2 x i64> undef, undef
    119   ; SSSE3: cost of 20 {{.*}} %A2 = shl
    120   ; SSE42: cost of 20 {{.*}} %A2 = shl
    121   ; AVX: cost of 20 {{.*}} %A2 = shl
    122   ; AVX2: cost of 1 {{.*}} %A2 = shl
    123   %A2 = shl <8 x i32> undef, undef
    124   ; SSSE3: cost of 8 {{.*}} %A3 = shl
    125   ; SSE42: cost of 8 {{.*}} %A3 = shl
    126   ; AVX: cost of 8 {{.*}} %A3 = shl
    127   ; AVX2: cost of 1 {{.*}} %A3 = shl
    128   %A3 = shl <4 x i64> undef, undef
    129 
    130   ; SSSE3: cost of 16 {{.*}} %B0 = lshr
    131   ; SSE42: cost of 16 {{.*}} %B0 = lshr
    132   ; AVX: cost of 16 {{.*}} %B0 = lshr
    133   ; AVX2: cost of 1 {{.*}} %B0 = lshr
    134   %B0 = lshr <4 x i32> undef, undef
    135   ; SSSE3: cost of 4 {{.*}} %B1 = lshr
    136   ; SSE42: cost of 4 {{.*}} %B1 = lshr
    137   ; AVX: cost of 4 {{.*}} %B1 = lshr
    138   ; AVX2: cost of 1 {{.*}} %B1 = lshr
    139   %B1 = lshr <2 x i64> undef, undef
    140   ; SSSE3: cost of 32 {{.*}} %B2 = lshr
    141   ; SSE42: cost of 32 {{.*}} %B2 = lshr
    142   ; AVX: cost of 32 {{.*}} %B2 = lshr
    143   ; AVX2: cost of 1 {{.*}} %B2 = lshr
    144   %B2 = lshr <8 x i32> undef, undef
    145   ; SSSE3: cost of 8 {{.*}} %B3 = lshr
    146   ; SSE42: cost of 8 {{.*}} %B3 = lshr
    147   ; AVX: cost of 8 {{.*}} %B3 = lshr
    148   ; AVX2: cost of 1 {{.*}} %B3 = lshr
    149   %B3 = lshr <4 x i64> undef, undef
    150 
    151   ; SSSE3: cost of 16 {{.*}} %C0 = ashr
    152   ; SSE42: cost of 16 {{.*}} %C0 = ashr
    153   ; AVX: cost of 16 {{.*}} %C0 = ashr
    154   ; AVX2: cost of 1 {{.*}} %C0 = ashr
    155   %C0 = ashr <4 x i32> undef, undef
    156   ; SSSE3: cost of 12 {{.*}} %C1 = ashr
    157   ; SSE42: cost of 12 {{.*}} %C1 = ashr
    158   ; AVX: cost of 12 {{.*}} %C1 = ashr
    159   ; AVX2: cost of 4 {{.*}} %C1 = ashr
    160   %C1 = ashr <2 x i64> undef, undef
    161   ; SSSE3: cost of 32 {{.*}} %C2 = ashr
    162   ; SSE42: cost of 32 {{.*}} %C2 = ashr
    163   ; AVX: cost of 32 {{.*}} %C2 = ashr
    164   ; AVX2: cost of 1 {{.*}} %C2 = ashr
    165   %C2 = ashr <8 x i32> undef, undef
    166   ; SSSE3: cost of 24 {{.*}} %C3 = ashr
    167   ; SSE42: cost of 24 {{.*}} %C3 = ashr
    168   ; AVX: cost of 24 {{.*}} %C3 = ashr
    169   ; AVX2: cost of 4 {{.*}} %C3 = ashr
    170   %C3 = ashr <4 x i64> undef, undef
    171 
    172   ret void
    173 }
    174