Home | History | Annotate | Download | only in LoopAccessAnalysis
      1 ; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
      2 
      3 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
      4 target triple = "aarch64--linux-gnueabi"
      5 
      6 ; 3 reads and 3 writes should need 12 memchecks
      7 ; CHECK: function 'testf':
      8 ; CHECK: Memory dependences are safe with run-time checks
      9 
     10 ; Memory dependencies have labels starting from 0, so in
     11 ; order to verify that we have n checks, we look for
     12 ; (n-1): and not n:.
     13 
     14 ; CHECK: Run-time memory checks:
     15 ; CHECK-NEXT: Check 0:
     16 ; CHECK: Check 11:
     17 ; CHECK-NOT: Check 12:
     18 
     19 define void @testf(i16* %a,
     20                i16* %b,
     21                i16* %c,
     22                i16* %d,
     23                i16* %e,
     24                i16* %f) {
     25 entry:
     26   br label %for.body
     27 
     28 for.body:                                         ; preds = %for.body, %entry
     29   %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
     30 
     31   %add = add nuw nsw i64 %ind, 1
     32 
     33   %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
     34   %loadA = load i16, i16* %arrayidxA, align 2
     35 
     36   %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
     37   %loadB = load i16, i16* %arrayidxB, align 2
     38 
     39   %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %ind
     40   %loadC = load i16, i16* %arrayidxC, align 2
     41 
     42   %mul = mul i16 %loadB, %loadA
     43   %mul1 = mul i16 %mul, %loadC
     44 
     45   %arrayidxD = getelementptr inbounds i16, i16* %d, i64 %ind
     46   store i16 %mul1, i16* %arrayidxD, align 2
     47 
     48   %arrayidxE = getelementptr inbounds i16, i16* %e, i64 %ind
     49   store i16 %mul, i16* %arrayidxE, align 2
     50 
     51   %arrayidxF = getelementptr inbounds i16, i16* %f, i64 %ind
     52   store i16 %mul1, i16* %arrayidxF, align 2
     53 
     54   %exitcond = icmp eq i64 %add, 20
     55   br i1 %exitcond, label %for.end, label %for.body
     56 
     57 for.end:                                          ; preds = %for.body
     58   ret void
     59 }
     60 
     61 ; The following (testg and testh) check that we can group
     62 ; memory checks of accesses which differ by a constant value.
     63 ; Both tests are based on the following C code:
     64 ;
     65 ; void testh(short *a, short *b, short *c) {
     66 ;   unsigned long ind = 0;
     67 ;   for (unsigned long ind = 0; ind < 20; ++ind) {
     68 ;     c[2 * ind] = a[ind] * a[ind + 1];
     69 ;     c[2 * ind + 1] = a[ind] * a[ind + 1] * b[ind];
     70 ;   }
     71 ; }
     72 ;
     73 ; It is sufficient to check the intervals
     74 ; [a, a + 21], [b, b + 20] against [c, c + 41].
     75 
     76 ; 3 reads and 2 writes - two of the reads can be merged,
     77 ; and the writes can be merged as well. This gives us a
     78 ; total of 2 memory checks.
     79 
     80 ; CHECK: function 'testg':
     81 
     82 ; CHECK: Run-time memory checks:
     83 ; CHECK-NEXT:   Check 0:
     84 ; CHECK-NEXT:     Comparing group ([[ZERO:.+]]):
     85 ; CHECK-NEXT:       %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
     86 ; CHECK-NEXT:       %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
     87 ; CHECK-NEXT:     Against group ([[ONE:.+]]):
     88 ; CHECK-NEXT:       %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
     89 ; CHECK-NEXT:       %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
     90 ; CHECK-NEXT:   Check 1:
     91 ; CHECK-NEXT:     Comparing group ({{.*}}[[ZERO]]):
     92 ; CHECK-NEXT:       %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
     93 ; CHECK-NEXT:       %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
     94 ; CHECK-NEXT:     Against group ([[TWO:.+]]):
     95 ; CHECK-NEXT:       %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
     96 ; CHECK-NEXT:   Grouped accesses:
     97 ; CHECK-NEXT:    Group {{.*}}[[ZERO]]:
     98 ; CHECK-NEXT:       (Low: %c High: (78 + %c))
     99 ; CHECK-NEXT:         Member: {(2 + %c),+,4}
    100 ; CHECK-NEXT:         Member: {%c,+,4}
    101 ; CHECK-NEXT:     Group {{.*}}[[ONE]]:
    102 ; CHECK-NEXT:       (Low: %a High: (40 + %a))
    103 ; CHECK-NEXT:         Member: {(2 + %a),+,2}
    104 ; CHECK-NEXT:         Member: {%a,+,2}
    105 ; CHECK-NEXT:     Group {{.*}}[[TWO]]:
    106 ; CHECK-NEXT:       (Low: %b High: (38 + %b))
    107 ; CHECK-NEXT:         Member: {%b,+,2}
    108 
    109 define void @testg(i16* %a,
    110                i16* %b,
    111                i16* %c) {
    112 entry:
    113   br label %for.body
    114 
    115 for.body:                                         ; preds = %for.body, %entry
    116   %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
    117   %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
    118 
    119   %add = add nuw nsw i64 %ind, 1
    120   %store_ind_inc = add nuw nsw i64 %store_ind, 1
    121   %store_ind_next = add nuw nsw i64 %store_ind_inc, 1
    122 
    123   %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
    124   %loadA = load i16, i16* %arrayidxA, align 2
    125 
    126   %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
    127   %loadA1 = load i16, i16* %arrayidxA1, align 2
    128 
    129   %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
    130   %loadB = load i16, i16* %arrayidxB, align 2
    131 
    132   %mul = mul i16 %loadA, %loadA1
    133   %mul1 = mul i16 %mul, %loadB
    134 
    135   %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
    136   store i16 %mul1, i16* %arrayidxC, align 2
    137 
    138   %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
    139   store i16 %mul, i16* %arrayidxC1, align 2
    140 
    141   %exitcond = icmp eq i64 %add, 20
    142   br i1 %exitcond, label %for.end, label %for.body
    143 
    144 for.end:                                          ; preds = %for.body
    145   ret void
    146 }
    147 
    148 ; 3 reads and 2 writes - the writes can be merged into a single
    149 ; group, but the GEPs used for the reads are not marked as inbounds.
    150 ; We can still merge them because we are using a unit stride for
    151 ; accesses, so we cannot overflow the GEPs.
    152 
    153 ; CHECK: function 'testh':
    154 ; CHECK: Run-time memory checks:
    155 ; CHECK-NEXT:   Check 0:
    156 ; CHECK-NEXT:     Comparing group ([[ZERO:.+]]):
    157 ; CHECK-NEXT:         %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
    158 ; CHECK-NEXT:         %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
    159 ; CHECK-NEXT:     Against group ([[ONE:.+]]):
    160 ; CHECK-NEXT:         %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
    161 ; CHECK-NEXT:         %arrayidxA = getelementptr i16, i16* %a, i64 %ind
    162 ; CHECK-NEXT:   Check 1:
    163 ; CHECK-NEXT:     Comparing group ({{.*}}[[ZERO]]):
    164 ; CHECK-NEXT:         %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
    165 ; CHECK-NEXT:         %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
    166 ; CHECK-NEXT:     Against group ([[TWO:.+]]):
    167 ; CHECK-NEXT:         %arrayidxB = getelementptr i16, i16* %b, i64 %ind
    168 ; CHECK-NEXT:   Grouped accesses:
    169 ; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
    170 ; CHECK-NEXT:       (Low: %c High: (78 + %c))
    171 ; CHECK-NEXT:         Member: {(2 + %c),+,4}
    172 ; CHECK-NEXT:         Member: {%c,+,4}
    173 ; CHECK-NEXT:     Group {{.*}}[[ONE]]:
    174 ; CHECK-NEXT:       (Low: %a High: (40 + %a))
    175 ; CHECK-NEXT:         Member: {(2 + %a),+,2}
    176 ; CHECK-NEXT:         Member: {%a,+,2}
    177 ; CHECK-NEXT:     Group {{.*}}[[TWO]]:
    178 ; CHECK-NEXT:       (Low: %b High: (38 + %b))
    179 ; CHECK-NEXT:         Member: {%b,+,2}
    180 
    181 define void @testh(i16* %a,
    182                i16* %b,
    183                i16* %c) {
    184 entry:
    185   br label %for.body
    186 
    187 for.body:                                         ; preds = %for.body, %entry
    188   %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
    189   %store_ind = phi i64 [ 0, %entry ], [ %store_ind_next, %for.body ]
    190 
    191   %add = add nuw nsw i64 %ind, 1
    192   %store_ind_inc = add nuw nsw i64 %store_ind, 1
    193   %store_ind_next = add nuw nsw i64 %store_ind_inc, 1
    194 
    195   %arrayidxA = getelementptr i16, i16* %a, i64 %ind
    196   %loadA = load i16, i16* %arrayidxA, align 2
    197 
    198   %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
    199   %loadA1 = load i16, i16* %arrayidxA1, align 2
    200 
    201   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
    202   %loadB = load i16, i16* %arrayidxB, align 2
    203 
    204   %mul = mul i16 %loadA, %loadA1
    205   %mul1 = mul i16 %mul, %loadB
    206 
    207   %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
    208   store i16 %mul1, i16* %arrayidxC, align 2
    209 
    210   %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
    211   store i16 %mul, i16* %arrayidxC1, align 2
    212 
    213   %exitcond = icmp eq i64 %add, 20
    214   br i1 %exitcond, label %for.end, label %for.body
    215 
    216 for.end:                                          ; preds = %for.body
    217   ret void
    218 }
    219 
    220 ; Don't merge pointers if we need to perform a check against a pointer
    221 ; to the same underlying object (doing so would emit a check that could be
    222 ; falsely invalidated) For example, in the following loop:
    223 ;
    224 ; for (i = 0; i < 5000; ++i)
    225 ;   a[i + offset] = a[i] + a[i + 10000]
    226 ;
    227 ; we should not merge the intervals associated with the reads (0,5000) and
    228 ; (10000, 15000) into (0, 15000) as this will pottentially fail the check
    229 ; against the interval associated with the write.
    230 ;
    231 ; We cannot have this check unless ShouldRetryWithRuntimeCheck is set,
    232 ; and therefore the grouping algorithm would create a separate group for
    233 ; each pointer.
    234 
    235 ; CHECK: function 'testi':
    236 ; CHECK: Run-time memory checks:
    237 ; CHECK-NEXT:   Check 0:
    238 ; CHECK-NEXT:     Comparing group ([[ZERO:.+]]):
    239 ; CHECK-NEXT:       %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
    240 ; CHECK-NEXT:     Against group ([[ONE:.+]]):
    241 ; CHECK-NEXT:       %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
    242 ; CHECK-NEXT:   Check 1:
    243 ; CHECK-NEXT:     Comparing group ({{.*}}[[ZERO]]):
    244 ; CHECK-NEXT:       %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
    245 ; CHECK-NEXT:     Against group ([[TWO:.+]]):
    246 ; CHECK-NEXT:       %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
    247 ; CHECK-NEXT:   Grouped accesses:
    248 ; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
    249 ; CHECK-NEXT:       (Low: ((2 * %offset) + %a) High: (9998 + (2 * %offset) + %a))
    250 ; CHECK-NEXT:         Member: {((2 * %offset) + %a),+,2}<nsw><%for.body>
    251 ; CHECK-NEXT:     Group {{.*}}[[ONE]]:
    252 ; CHECK-NEXT:       (Low: %a High: (9998 + %a))
    253 ; CHECK-NEXT:         Member: {%a,+,2}<%for.body>
    254 ; CHECK-NEXT:     Group {{.*}}[[TWO]]:
    255 ; CHECK-NEXT:       (Low: (20000 + %a) High: (29998 + %a))
    256 ; CHECK-NEXT:         Member: {(20000 + %a),+,2}<%for.body>
    257 
    258 define void @testi(i16* %a,
    259                    i64 %offset) {
    260 entry:
    261   br label %for.body
    262 
    263 for.body:                                         ; preds = %for.body, %entry
    264   %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
    265   %store_ind = phi i64 [ %offset, %entry ], [ %store_ind_inc, %for.body ]
    266 
    267   %add = add nuw nsw i64 %ind, 1
    268   %store_ind_inc = add nuw nsw i64 %store_ind, 1
    269 
    270   %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
    271   %ind2 = add nuw nsw i64 %ind, 10000
    272   %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
    273 
    274   %loadA1 = load i16, i16* %arrayidxA1, align 2
    275   %loadA2 = load i16, i16* %arrayidxA2, align 2
    276 
    277   %addres = add i16 %loadA1, %loadA2
    278 
    279   %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
    280   store i16 %addres, i16* %storeidx, align 2
    281 
    282   %exitcond = icmp eq i64 %add, 5000
    283   br i1 %exitcond, label %for.end, label %for.body
    284 
    285 for.end:                                          ; preds = %for.body
    286   ret void
    287 }
    288