Home | History | Annotate | Download | only in X86
      1 ; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
      2 
      3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
      4 target triple = "x86_64-apple-macosx10.8.0"
      5 
      6 ; int foo(int * restrict B,  int * restrict A, int n, int m) {
      7 ;   B[0] = n * A[0] + m * A[0];
      8 ;   B[1] = n * A[1] + m * A[1];
      9 ;   B[2] = n * A[2] + m * A[2];
     10 ;   B[3] = n * A[3] + m * A[3];
     11 ;   return 0;
     12 ; }
     13 
     14 ; CHECK-LABEL: @foo(
     15 ; CHECK: load <4 x i32>
     16 ; CHECK: mul <4 x i32>
     17 ; CHECK: store <4 x i32>
     18 ; CHECK: ret
     19 define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) #0 {
     20 entry:
     21   %0 = load i32* %A, align 4
     22   %mul238 = add i32 %m, %n
     23   %add = mul i32 %0, %mul238
     24   store i32 %add, i32* %B, align 4
     25   %arrayidx4 = getelementptr inbounds i32* %A, i64 1
     26   %1 = load i32* %arrayidx4, align 4
     27   %add8 = mul i32 %1, %mul238
     28   %arrayidx9 = getelementptr inbounds i32* %B, i64 1
     29   store i32 %add8, i32* %arrayidx9, align 4
     30   %arrayidx10 = getelementptr inbounds i32* %A, i64 2
     31   %2 = load i32* %arrayidx10, align 4
     32   %add14 = mul i32 %2, %mul238
     33   %arrayidx15 = getelementptr inbounds i32* %B, i64 2
     34   store i32 %add14, i32* %arrayidx15, align 4
     35   %arrayidx16 = getelementptr inbounds i32* %A, i64 3
     36   %3 = load i32* %arrayidx16, align 4
     37   %add20 = mul i32 %3, %mul238
     38   %arrayidx21 = getelementptr inbounds i32* %B, i64 3
     39   store i32 %add20, i32* %arrayidx21, align 4
     40   ret i32 0
     41 }
     42 
     43 
     44 ; int extr_user(int * restrict B,  int * restrict A, int n, int m) {
     45 ;   B[0] = n * A[0] + m * A[0];
     46 ;   B[1] = n * A[1] + m * A[1];
     47 ;   B[2] = n * A[2] + m * A[2];
     48 ;   B[3] = n * A[3] + m * A[3];
     49 ;   return A[0];
     50 ; }
     51 
     52 ; CHECK-LABEL: @extr_user(
     53 ; CHECK: load <4 x i32>
     54 ; CHECK: store <4 x i32>
     55 ; CHECK: extractelement <4 x i32>
     56 ; CHECK-NEXT: ret
     57 define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
     58 entry:
     59   %0 = load i32* %A, align 4
     60   %mul238 = add i32 %m, %n
     61   %add = mul i32 %0, %mul238
     62   store i32 %add, i32* %B, align 4
     63   %arrayidx4 = getelementptr inbounds i32* %A, i64 1
     64   %1 = load i32* %arrayidx4, align 4
     65   %add8 = mul i32 %1, %mul238
     66   %arrayidx9 = getelementptr inbounds i32* %B, i64 1
     67   store i32 %add8, i32* %arrayidx9, align 4
     68   %arrayidx10 = getelementptr inbounds i32* %A, i64 2
     69   %2 = load i32* %arrayidx10, align 4
     70   %add14 = mul i32 %2, %mul238
     71   %arrayidx15 = getelementptr inbounds i32* %B, i64 2
     72   store i32 %add14, i32* %arrayidx15, align 4
     73   %arrayidx16 = getelementptr inbounds i32* %A, i64 3
     74   %3 = load i32* %arrayidx16, align 4
     75   %add20 = mul i32 %3, %mul238
     76   %arrayidx21 = getelementptr inbounds i32* %B, i64 3
     77   store i32 %add20, i32* %arrayidx21, align 4
     78   ret i32 %0  ;<--------- This value has multiple users
     79 }
     80 
     81 ; In this example we have an external user that is not the first element in the vector.
     82 ; CHECK-LABEL: @extr_user1(
     83 ; CHECK: load <4 x i32>
     84 ; CHECK: store <4 x i32>
     85 ; CHECK: extractelement <4 x i32>
     86 ; CHECK-NEXT: ret
     87 define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
     88 entry:
     89   %0 = load i32* %A, align 4
     90   %mul238 = add i32 %m, %n
     91   %add = mul i32 %0, %mul238
     92   store i32 %add, i32* %B, align 4
     93   %arrayidx4 = getelementptr inbounds i32* %A, i64 1
     94   %1 = load i32* %arrayidx4, align 4
     95   %add8 = mul i32 %1, %mul238
     96   %arrayidx9 = getelementptr inbounds i32* %B, i64 1
     97   store i32 %add8, i32* %arrayidx9, align 4
     98   %arrayidx10 = getelementptr inbounds i32* %A, i64 2
     99   %2 = load i32* %arrayidx10, align 4
    100   %add14 = mul i32 %2, %mul238
    101   %arrayidx15 = getelementptr inbounds i32* %B, i64 2
    102   store i32 %add14, i32* %arrayidx15, align 4
    103   %arrayidx16 = getelementptr inbounds i32* %A, i64 3
    104   %3 = load i32* %arrayidx16, align 4
    105   %add20 = mul i32 %3, %mul238
    106   %arrayidx21 = getelementptr inbounds i32* %B, i64 3
    107   store i32 %add20, i32* %arrayidx21, align 4
    108   ret i32 %1  ;<--------- This value has multiple users
    109 }
    110