Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
      2 ; REQUIRES: asserts
      3 
      4 ; @sharedidx is an unrolled variant of this loop:
      5 ;  for (unsigned long i = 0; i < len; i += s) {
      6 ;    c[i] = a[i] + b[i];
      7 ;  }
      8 ; where 's' cannot be folded into the addressing mode.
      9 ;
     10 ; This is not quite profitable to chain. But with -stress-ivchain, we
     11 ; can form three address chains in place of the shared induction
     12 ; variable.
     13 
     14 ; rdar://10674430
     15 define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
     16 entry:
     17 ; CHECK-LABEL: sharedidx:
     18   %cmp8 = icmp eq i32 %len, 0
     19   br i1 %cmp8, label %for.end, label %for.body
     20 
     21 for.body:                                         ; preds = %entry, %for.body.3
     22 ; CHECK: %for.body
     23 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
     24 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
     25   %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
     26   %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
     27   %0 = load i8* %arrayidx, align 1
     28   %conv6 = zext i8 %0 to i32
     29   %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
     30   %1 = load i8* %arrayidx1, align 1
     31   %conv27 = zext i8 %1 to i32
     32   %add = add nsw i32 %conv27, %conv6
     33   %conv3 = trunc i32 %add to i8
     34   %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
     35   store i8 %conv3, i8* %arrayidx4, align 1
     36   %add5 = add i32 %i.09, %s
     37   %cmp = icmp ult i32 %add5, %len
     38   br i1 %cmp, label %for.body.1, label %for.end
     39 
     40 for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
     41   ret void
     42 
     43 for.body.1:                                       ; preds = %for.body
     44 ; CHECK: %for.body.1
     45 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
     46 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
     47   %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
     48   %2 = load i8* %arrayidx.1, align 1
     49   %conv6.1 = zext i8 %2 to i32
     50   %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
     51   %3 = load i8* %arrayidx1.1, align 1
     52   %conv27.1 = zext i8 %3 to i32
     53   %add.1 = add nsw i32 %conv27.1, %conv6.1
     54   %conv3.1 = trunc i32 %add.1 to i8
     55   %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
     56   store i8 %conv3.1, i8* %arrayidx4.1, align 1
     57   %add5.1 = add i32 %add5, %s
     58   %cmp.1 = icmp ult i32 %add5.1, %len
     59   br i1 %cmp.1, label %for.body.2, label %for.end
     60 
     61 for.body.2:                                       ; preds = %for.body.1
     62 ; CHECK: %for.body.2
     63 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
     64 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
     65   %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
     66   %4 = load i8* %arrayidx.2, align 1
     67   %conv6.2 = zext i8 %4 to i32
     68   %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
     69   %5 = load i8* %arrayidx1.2, align 1
     70   %conv27.2 = zext i8 %5 to i32
     71   %add.2 = add nsw i32 %conv27.2, %conv6.2
     72   %conv3.2 = trunc i32 %add.2 to i8
     73   %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
     74   store i8 %conv3.2, i8* %arrayidx4.2, align 1
     75   %add5.2 = add i32 %add5.1, %s
     76   %cmp.2 = icmp ult i32 %add5.2, %len
     77   br i1 %cmp.2, label %for.body.3, label %for.end
     78 
     79 for.body.3:                                       ; preds = %for.body.2
     80 ; CHECK: %for.body.3
     81 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
     82 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
     83   %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
     84   %6 = load i8* %arrayidx.3, align 1
     85   %conv6.3 = zext i8 %6 to i32
     86   %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
     87   %7 = load i8* %arrayidx1.3, align 1
     88   %conv27.3 = zext i8 %7 to i32
     89   %add.3 = add nsw i32 %conv27.3, %conv6.3
     90   %conv3.3 = trunc i32 %add.3 to i8
     91   %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
     92   store i8 %conv3.3, i8* %arrayidx4.3, align 1
     93   %add5.3 = add i32 %add5.2, %s
     94   %cmp.3 = icmp ult i32 %add5.3, %len
     95   br i1 %cmp.3, label %for.body, label %for.end
     96 }
     97