Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
      2 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=SSE3
      3 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2
      4 
      5 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      6 target triple = "x86_64-apple-macosx10.8.0"
      7 
      8 define i32 @add(i32 %arg) {
      9   ;CHECK: cost of 1 {{.*}} add
     10   %A = add <4 x i32> undef, undef
     11   ;CHECK: cost of 4 {{.*}} add
     12   %B = add <8 x i32> undef, undef
     13   ;CHECK: cost of 1 {{.*}} add
     14   %C = add <2 x i64> undef, undef
     15   ;CHECK: cost of 4 {{.*}} add
     16   %D = add <4 x i64> undef, undef
     17   ;CHECK: cost of 8 {{.*}} add
     18   %E = add <8 x i64> undef, undef
     19   ;CHECK: cost of 0 {{.*}} ret
     20   ret i32 undef
     21 }
     22 
     23 
     24 define i32 @xor(i32 %arg) {
     25   ;CHECK: cost of 1 {{.*}} xor
     26   %A = xor <4 x i32> undef, undef
     27   ;CHECK: cost of 1 {{.*}} xor
     28   %B = xor <8 x i32> undef, undef
     29   ;CHECK: cost of 1 {{.*}} xor
     30   %C = xor <2 x i64> undef, undef
     31   ;CHECK: cost of 1 {{.*}} xor
     32   %D = xor <4 x i64> undef, undef
     33   ;CHECK: cost of 0 {{.*}} ret
     34   ret i32 undef
     35 }
     36 
     37 ; CHECK: mul
     38 define void @mul() {
     39   ; A <2 x i32> gets expanded to a <2 x i64> vector.
     40   ; A <2 x i64> vector multiply is implemented using
     41   ; 3 PMULUDQ and 2 PADDS and 4 shifts.
     42   ;CHECK: cost of 9 {{.*}} mul
     43   %A0 = mul <2 x i32> undef, undef
     44   ;CHECK: cost of 9 {{.*}} mul
     45   %A1 = mul <2 x i64> undef, undef
     46   ;CHECK: cost of 18 {{.*}} mul
     47   %A2 = mul <4 x i64> undef, undef
     48   ret void
     49 }
     50 
     51 ; SSE3: sse3mull
     52 define void @sse3mull() {
     53   ; SSE3: cost of 6 {{.*}} mul
     54   %A0 = mul <4 x i32> undef, undef
     55   ret void
     56   ; SSE3: avx2mull
     57 }
     58 
     59 ; AVX2: avx2mull
     60 define void @avx2mull() {
     61   ; AVX2: cost of 9 {{.*}} mul
     62   %A0 = mul <4 x i64> undef, undef
     63   ret void
     64   ; AVX2: fmul
     65 }
     66 
     67 ; CHECK: fmul
     68 define i32 @fmul(i32 %arg) {
     69   ;CHECK: cost of 2 {{.*}} fmul
     70   %A = fmul <4 x float> undef, undef
     71   ;CHECK: cost of 2 {{.*}} fmul
     72   %B = fmul <8 x float> undef, undef
     73   ret i32 undef
     74 }
     75 
     76 ; AVX: shift
     77 ; AVX2: shift
     78 define void @shift() {
     79   ; AVX: cost of 2 {{.*}} shl
     80   ; AVX2: cost of 1 {{.*}} shl
     81   %A0 = shl <4 x i32> undef, undef
     82   ; AVX: cost of 2 {{.*}} shl
     83   ; AVX2: cost of 1 {{.*}} shl
     84   %A1 = shl <2 x i64> undef, undef
     85 
     86   ; AVX: cost of 2 {{.*}} lshr
     87   ; AVX2: cost of 1 {{.*}} lshr
     88   %B0 = lshr <4 x i32> undef, undef
     89   ; AVX: cost of 2 {{.*}} lshr
     90   ; AVX2: cost of 1 {{.*}} lshr
     91   %B1 = lshr <2 x i64> undef, undef
     92 
     93   ; AVX: cost of 2 {{.*}} ashr
     94   ; AVX2: cost of 1 {{.*}} ashr
     95   %C0 = ashr <4 x i32> undef, undef
     96   ; AVX: cost of 6 {{.*}} ashr
     97   ; AVX2: cost of 20 {{.*}} ashr
     98   %C1 = ashr <2 x i64> undef, undef
     99 
    100   ret void
    101 }
    102 
    103 ; AVX: avx2shift
    104 ; AVX2: avx2shift
    105 define void @avx2shift() {
    106   ; AVX: cost of 2 {{.*}} shl
    107   ; AVX2: cost of 1 {{.*}} shl
    108   %A0 = shl <8 x i32> undef, undef
    109   ; AVX: cost of 2 {{.*}} shl
    110   ; AVX2: cost of 1 {{.*}} shl
    111   %A1 = shl <4 x i64> undef, undef
    112 
    113   ; AVX: cost of 2 {{.*}} lshr
    114   ; AVX2: cost of 1 {{.*}} lshr
    115   %B0 = lshr <8 x i32> undef, undef
    116   ; AVX: cost of 2 {{.*}} lshr
    117   ; AVX2: cost of 1 {{.*}} lshr
    118   %B1 = lshr <4 x i64> undef, undef
    119 
    120   ; AVX: cost of 2 {{.*}} ashr
    121   ; AVX2: cost of 1 {{.*}} ashr
    122   %C0 = ashr <8 x i32> undef, undef
    123   ; AVX: cost of 12 {{.*}} ashr
    124   ; AVX2: cost of 40 {{.*}} ashr
    125   %C1 = ashr <4 x i64> undef, undef
    126 
    127   ret void
    128 }
    129