Home | History | Annotate | Download | only in LoopAccessAnalysis
      1 ; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s -check-prefix=LAA
      2 ; RUN: opt -passes='require<aa>,require<scalar-evolution>,require<aa>,loop(print-access-info)' -aa-pipeline='basic-aa' -disable-output < %s  2>&1 | FileCheck %s --check-prefix=LAA
      3 ; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
      4 
      5 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
      6 
      7 ; For this loop:
      8 ;   unsigned index = 0;
      9 ;   for (int i = 0; i < n; i++) {
     10 ;    A[2 * index] = A[2 * index] + B[i];
     11 ;    index++;
     12 ;   }
     13 ;
     14 ; SCEV is unable to prove that A[2 * i] does not overflow.
     15 ;
     16 ; Analyzing the IR does not help us because the GEPs are not
     17 ; affine AddRecExprs. However, we can turn them into AddRecExprs
     18 ; using SCEV Predicates.
     19 ;
     20 ; Once we have an affine expression we need to add an additional NUSW
     21 ; to check that the pointers don't wrap since the GEPs are not
     22 ; inbound.
     23 
     24 ; LAA-LABEL: f1
     25 ; LAA: Memory dependences are safe{{$}}
     26 ; LAA: SCEV assumptions:
     27 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
     28 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
     29 
     30 ; The expression for %mul_ext as analyzed by SCEV is
     31 ;    (zext i32 {0,+,2}<%for.body> to i64)
     32 ; We have added the nusw flag to turn this expression into the SCEV expression:
     33 ;    i64 {0,+,2}<%for.body>
     34 
     35 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
     36 ; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64)) + %a)
     37 ; LAA-NEXT: --> {%a,+,4}<%for.body>
     38 
     39 
     40 ; LV-LABEL: f1
     41 ; LV-LABEL: for.body.lver.check
     42 
     43 ; LV:      [[BETrunc:%[^ ]*]] = trunc i64 [[BE:%[^ ]*]] to i32
     44 ; LV-NEXT: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc]])
     45 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
     46 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
     47 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 0, [[OFMulResult]]
     48 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 0, [[OFMulResult]]
     49 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp ugt i32 [[SubEnd]], 0
     50 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp ult i32 [[AddEnd]], 0
     51 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg]], i1 [[CmpPos]]
     52 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
     53 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
     54 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
     55 
     56 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
     57 
     58 ; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
     59 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
     60 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
     61 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
     62 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
     63 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
     64 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
     65 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
     66 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
     67 
     68 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
     69 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
     70 define void @f1(i16* noalias %a,
     71                 i16* noalias %b, i64 %N) {
     72 entry:
     73   br label %for.body
     74 
     75 for.body:                                         ; preds = %for.body, %entry
     76   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
     77   %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
     78 
     79   %mul = mul i32 %ind1, 2
     80   %mul_ext = zext i32 %mul to i64
     81 
     82   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
     83   %loadA = load i16, i16* %arrayidxA, align 2
     84 
     85   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
     86   %loadB = load i16, i16* %arrayidxB, align 2
     87 
     88   %add = mul i16 %loadA, %loadB
     89 
     90   store i16 %add, i16* %arrayidxA, align 2
     91 
     92   %inc = add nuw nsw i64 %ind, 1
     93   %inc1 = add i32 %ind1, 1
     94 
     95   %exitcond = icmp eq i64 %inc, %N
     96   br i1 %exitcond, label %for.end, label %for.body
     97 
     98 for.end:                                          ; preds = %for.body
     99   ret void
    100 }
    101 
    102 ; For this loop:
    103 ;   unsigned index = n;
    104 ;   for (int i = 0; i < n; i++) {
    105 ;    A[2 * index] = A[2 * index] + B[i];
    106 ;    index--;
    107 ;   }
    108 ;
    109 ; the SCEV expression for 2 * index is not an AddRecExpr
    110 ; (and implictly not affine). However, we are able to make assumptions
    111 ; that will turn the expression into an affine one and continue the
    112 ; analysis.
    113 ;
    114 ; Once we have an affine expression we need to add an additional NUSW
    115 ; to check that the pointers don't wrap since the GEPs are not
    116 ; inbounds.
    117 ;
    118 ; This loop has a negative stride for A, and the nusw flag is required in
    119 ; order to properly extend the increment from i32 -4 to i64 -4.
    120 
    121 ; LAA-LABEL: f2
    122 ; LAA: Memory dependences are safe{{$}}
    123 ; LAA: SCEV assumptions:
    124 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
    125 ; LAA-NEXT: {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
    126 
    127 ; The expression for %mul_ext as analyzed by SCEV is
    128 ;     (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
    129 ; We have added the nusw flag to turn this expression into the following SCEV:
    130 ;     i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
    131 
    132 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
    133 ; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a)
    134 ; LAA-NEXT: --> {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
    135 
    136 ; LV-LABEL: f2
    137 ; LV-LABEL: for.body.lver.check
    138 
    139 ; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
    140 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
    141 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
    142 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
    143 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
    144 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp ugt i32 [[SubEnd]], [[Start]]
    145 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp ult i32 [[AddEnd]], [[Start]]
    146 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
    147 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
    148 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
    149 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
    150 
    151 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
    152 
    153 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
    154 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
    155 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
    156 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
    157 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
    158 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
    159 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
    160 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
    161 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
    162 
    163 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
    164 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
    165 define void @f2(i16* noalias %a,
    166                 i16* noalias %b, i64 %N) {
    167 entry:
    168   %TruncN = trunc i64 %N to i32
    169   br label %for.body
    170 
    171 for.body:                                         ; preds = %for.body, %entry
    172   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
    173   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
    174 
    175   %mul = mul i32 %ind1, 2
    176   %mul_ext = zext i32 %mul to i64
    177 
    178   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
    179   %loadA = load i16, i16* %arrayidxA, align 2
    180 
    181   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
    182   %loadB = load i16, i16* %arrayidxB, align 2
    183 
    184   %add = mul i16 %loadA, %loadB
    185 
    186   store i16 %add, i16* %arrayidxA, align 2
    187 
    188   %inc = add nuw nsw i64 %ind, 1
    189   %dec = sub i32 %ind1, 1
    190 
    191   %exitcond = icmp eq i64 %inc, %N
    192   br i1 %exitcond, label %for.end, label %for.body
    193 
    194 for.end:                                          ; preds = %for.body
    195   ret void
    196 }
    197 
    198 ; We replicate the tests above, but this time sign extend 2 * index instead
    199 ; of zero extending it.
    200 
    201 ; LAA-LABEL: f3
    202 ; LAA: Memory dependences are safe{{$}}
    203 ; LAA: SCEV assumptions:
    204 ; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
    205 ; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
    206 
    207 ; The expression for %mul_ext as analyzed by SCEV is
    208 ;     i64 (sext i32 {0,+,2}<%for.body> to i64)
    209 ; We have added the nssw flag to turn this expression into the following SCEV:
    210 ;     i64 {0,+,2}<%for.body>
    211 
    212 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
    213 ; LAA-NEXT: ((2 * (sext i32 {0,+,2}<%for.body> to i64)) + %a)
    214 ; LAA-NEXT: --> {%a,+,4}<%for.body>
    215 
    216 ; LV-LABEL: f3
    217 ; LV-LABEL: for.body.lver.check
    218 
    219 ; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
    220 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
    221 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
    222 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 0, [[OFMulResult]]
    223 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 0, [[OFMulResult]]
    224 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], 0
    225 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], 0
    226 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg]], i1 [[CmpPos]]
    227 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
    228 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
    229 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
    230 
    231 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
    232 
    233 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
    234 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
    235 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
    236 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
    237 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
    238 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
    239 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
    240 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
    241 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
    242 
    243 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
    244 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
    245 define void @f3(i16* noalias %a,
    246                 i16* noalias %b, i64 %N) {
    247 entry:
    248   br label %for.body
    249 
    250 for.body:                                         ; preds = %for.body, %entry
    251   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
    252   %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
    253 
    254   %mul = mul i32 %ind1, 2
    255   %mul_ext = sext i32 %mul to i64
    256 
    257   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
    258   %loadA = load i16, i16* %arrayidxA, align 2
    259 
    260   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
    261   %loadB = load i16, i16* %arrayidxB, align 2
    262 
    263   %add = mul i16 %loadA, %loadB
    264 
    265   store i16 %add, i16* %arrayidxA, align 2
    266 
    267   %inc = add nuw nsw i64 %ind, 1
    268   %inc1 = add i32 %ind1, 1
    269 
    270   %exitcond = icmp eq i64 %inc, %N
    271   br i1 %exitcond, label %for.end, label %for.body
    272 
    273 for.end:                                          ; preds = %for.body
    274   ret void
    275 }
    276 
    277 ; LAA-LABEL: f4
    278 ; LAA: Memory dependences are safe{{$}}
    279 ; LAA: SCEV assumptions:
    280 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
    281 ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
    282 
    283 ; The expression for %mul_ext as analyzed by SCEV is
    284 ;     i64  (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
    285 ; We have added the nssw flag to turn this expression into the following SCEV:
    286 ;     i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
    287 
    288 ; LAA: [PSE]  %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
    289 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a)
    290 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
    291 
    292 ; LV-LABEL: f4
    293 ; LV-LABEL: for.body.lver.check
    294 
    295 ; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
    296 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
    297 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
    298 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
    299 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
    300 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], [[Start]]
    301 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], [[Start]]
    302 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
    303 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
    304 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
    305 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
    306 
    307 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
    308 
    309 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
    310 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
    311 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
    312 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
    313 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
    314 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
    315 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
    316 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
    317 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
    318 
    319 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
    320 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
    321 define void @f4(i16* noalias %a,
    322                 i16* noalias %b, i64 %N) {
    323 entry:
    324   %TruncN = trunc i64 %N to i32
    325   br label %for.body
    326 
    327 for.body:                                         ; preds = %for.body, %entry
    328   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
    329   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
    330 
    331   %mul = mul i32 %ind1, 2
    332   %mul_ext = sext i32 %mul to i64
    333 
    334   %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
    335   %loadA = load i16, i16* %arrayidxA, align 2
    336 
    337   %arrayidxB = getelementptr i16, i16* %b, i64 %ind
    338   %loadB = load i16, i16* %arrayidxB, align 2
    339 
    340   %add = mul i16 %loadA, %loadB
    341 
    342   store i16 %add, i16* %arrayidxA, align 2
    343 
    344   %inc = add nuw nsw i64 %ind, 1
    345   %dec = sub i32 %ind1, 1
    346 
    347   %exitcond = icmp eq i64 %inc, %N
    348   br i1 %exitcond, label %for.end, label %for.body
    349 
    350 for.end:                                          ; preds = %for.body
    351   ret void
    352 }
    353 
    354 ; The following function is similar to the one above, but has the GEP
    355 ; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
    356 ; This means that the SCEV expression for %mul can wrap and we need
    357 ; a SCEV predicate to continue analysis.
    358 ;
    359 ; We can still analyze this by adding the required no wrap SCEV predicates.
    360 
    361 ; LAA-LABEL: f5
    362 ; LAA: Memory dependences are safe{{$}}
    363 ; LAA: SCEV assumptions:
    364 ; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
    365 ; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
    366 
    367 ; LAA: [PSE]  %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul:
    368 ; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)<nsw>
    369 ; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
    370 
    371 ; LV-LABEL: f5
    372 ; LV-LABEL: for.body.lver.check
    373 ; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
    374 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
    375 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
    376 ; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
    377 ; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
    378 ; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], [[Start]]
    379 ; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], [[Start]]
    380 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
    381 ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
    382 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
    383 ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
    384 
    385 ; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
    386 
    387 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
    388 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
    389 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
    390 ; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
    391 ; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
    392 ; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
    393 ; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
    394 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
    395 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
    396 
    397 ; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
    398 ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
    399 define void @f5(i16* noalias %a,
    400                 i16* noalias %b, i64 %N) {
    401 entry:
    402   %TruncN = trunc i64 %N to i32
    403   br label %for.body
    404 
    405 for.body:                                         ; preds = %for.body, %entry
    406   %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
    407   %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
    408 
    409   %mul = mul i32 %ind1, 2
    410 
    411   %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul
    412   %loadA = load i16, i16* %arrayidxA, align 2
    413 
    414   %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
    415   %loadB = load i16, i16* %arrayidxB, align 2
    416 
    417   %add = mul i16 %loadA, %loadB
    418 
    419   store i16 %add, i16* %arrayidxA, align 2
    420 
    421   %inc = add nuw nsw i64 %ind, 1
    422   %dec = sub i32 %ind1, 1
    423 
    424   %exitcond = icmp eq i64 %inc, %N
    425   br i1 %exitcond, label %for.end, label %for.body
    426 
    427 for.end:                                          ; preds = %for.body
    428   ret void
    429 }
    430