Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt < %s -loop-vectorize -S | FileCheck %s
      3 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
      4 target triple = "x86_64-apple-macosx10.11.0"
      5 
      6 ; This test checks vector GEP before scatter.
      7 ; The code bellow crashed due to destroyed SSA while incorrect vectorization of
      8 ; the GEP.
      9 
     10 @d = global [10 x [10 x i32]] zeroinitializer, align 16
     11 @c = external global i32, align 4
     12 @a = external global i32, align 4
     13 @b = external global i64, align 8
     14 
     15 ; Function Attrs: norecurse nounwind ssp uwtable
     16 define void @_Z3fn1v() #0 {
     17 ; CHECK-LABEL: @_Z3fn1v(
     18 ; CHECK:       vector.body:
     19 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ]
     20 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ 
     21 ; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <16 x i64> [ 
     22 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
     23 ; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
     24 ; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]]
     25 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 0
     26 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP11]]
     27 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0
     28 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 1
     29 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP14]]
     30 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1
     31 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 2
     32 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP17]]
     33 ; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2
     34 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 3
     35 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP20]]
     36 ; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3
     37 ; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 4
     38 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP23]]
     39 ; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4
     40 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 5
     41 ; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP26]]
     42 ; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5
     43 ; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 6
     44 ; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP29]]
     45 ; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6
     46 ; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 7
     47 ; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP32]]
     48 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7
     49 ; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 8
     50 ; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP35]]
     51 ; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8
     52 ; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 9
     53 ; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP38]]
     54 ; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9
     55 ; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 10
     56 ; CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP41]]
     57 ; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10
     58 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 11
     59 ; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP44]]
     60 ; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11
     61 ; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 12
     62 ; CHECK-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP47]]
     63 ; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12
     64 ; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 13
     65 ; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP50]]
     66 ; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13
     67 ; CHECK-NEXT:    [[TMP53:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 14
     68 ; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP53]]
     69 ; CHECK-NEXT:    [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14
     70 ; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 15
     71 ; CHECK-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP56]]
     72 ; CHECK-NEXT:    [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15
     73 ; CHECK-NEXT:    [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
     74 ; CHECK-NEXT:    [[TMP60:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 0
     75 ; CHECK-NEXT:    [[TMP61:%.*]] = extractelement <16 x i64> [[TMP59]], i32 0
     76 ; CHECK-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP60]], i64 [[TMP61]], i64 0
     77 ; CHECK-NEXT:    [[TMP63:%.*]] = insertelement <16 x i32*> undef, i32* [[TMP62]], i32 0
     78 ; CHECK-NEXT:    [[TMP64:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 1
     79 ; CHECK-NEXT:    [[TMP65:%.*]] = extractelement <16 x i64> [[TMP59]], i32 1
     80 ; CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP64]], i64 [[TMP65]], i64 0
     81 ; CHECK-NEXT:    [[TMP67:%.*]] = insertelement <16 x i32*> [[TMP63]], i32* [[TMP66]], i32 1
     82 ; CHECK-NEXT:    [[TMP68:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 2
     83 ; CHECK-NEXT:    [[TMP69:%.*]] = extractelement <16 x i64> [[TMP59]], i32 2
     84 ; CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP68]], i64 [[TMP69]], i64 0
     85 ; CHECK-NEXT:    [[TMP71:%.*]] = insertelement <16 x i32*> [[TMP67]], i32* [[TMP70]], i32 2
     86 ; CHECK-NEXT:    [[TMP72:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 3
     87 ; CHECK-NEXT:    [[TMP73:%.*]] = extractelement <16 x i64> [[TMP59]], i32 3
     88 ; CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP72]], i64 [[TMP73]], i64 0
     89 ; CHECK-NEXT:    [[TMP75:%.*]] = insertelement <16 x i32*> [[TMP71]], i32* [[TMP74]], i32 3
     90 ; CHECK-NEXT:    [[TMP76:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 4
     91 ; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <16 x i64> [[TMP59]], i32 4
     92 ; CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP76]], i64 [[TMP77]], i64 0
     93 ; CHECK-NEXT:    [[TMP79:%.*]] = insertelement <16 x i32*> [[TMP75]], i32* [[TMP78]], i32 4
     94 ; CHECK-NEXT:    [[TMP80:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 5
     95 ; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <16 x i64> [[TMP59]], i32 5
     96 ; CHECK-NEXT:    [[TMP82:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP80]], i64 [[TMP81]], i64 0
     97 ; CHECK-NEXT:    [[TMP83:%.*]] = insertelement <16 x i32*> [[TMP79]], i32* [[TMP82]], i32 5
     98 ; CHECK-NEXT:    [[TMP84:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 6
     99 ; CHECK-NEXT:    [[TMP85:%.*]] = extractelement <16 x i64> [[TMP59]], i32 6
    100 ; CHECK-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP84]], i64 [[TMP85]], i64 0
    101 ; CHECK-NEXT:    [[TMP87:%.*]] = insertelement <16 x i32*> [[TMP83]], i32* [[TMP86]], i32 6
    102 ; CHECK-NEXT:    [[TMP88:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 7
    103 ; CHECK-NEXT:    [[TMP89:%.*]] = extractelement <16 x i64> [[TMP59]], i32 7
    104 ; CHECK-NEXT:    [[TMP90:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP88]], i64 [[TMP89]], i64 0
    105 ; CHECK-NEXT:    [[TMP91:%.*]] = insertelement <16 x i32*> [[TMP87]], i32* [[TMP90]], i32 7
    106 ; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 8
    107 ; CHECK-NEXT:    [[TMP93:%.*]] = extractelement <16 x i64> [[TMP59]], i32 8
    108 ; CHECK-NEXT:    [[TMP94:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP92]], i64 [[TMP93]], i64 0
    109 ; CHECK-NEXT:    [[TMP95:%.*]] = insertelement <16 x i32*> [[TMP91]], i32* [[TMP94]], i32 8
    110 ; CHECK-NEXT:    [[TMP96:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 9
    111 ; CHECK-NEXT:    [[TMP97:%.*]] = extractelement <16 x i64> [[TMP59]], i32 9
    112 ; CHECK-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP96]], i64 [[TMP97]], i64 0
    113 ; CHECK-NEXT:    [[TMP99:%.*]] = insertelement <16 x i32*> [[TMP95]], i32* [[TMP98]], i32 9
    114 ; CHECK-NEXT:    [[TMP100:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 10
    115 ; CHECK-NEXT:    [[TMP101:%.*]] = extractelement <16 x i64> [[TMP59]], i32 10
    116 ; CHECK-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP100]], i64 [[TMP101]], i64 0
    117 ; CHECK-NEXT:    [[TMP103:%.*]] = insertelement <16 x i32*> [[TMP99]], i32* [[TMP102]], i32 10
    118 ; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 11
    119 ; CHECK-NEXT:    [[TMP105:%.*]] = extractelement <16 x i64> [[TMP59]], i32 11
    120 ; CHECK-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP104]], i64 [[TMP105]], i64 0
    121 ; CHECK-NEXT:    [[TMP107:%.*]] = insertelement <16 x i32*> [[TMP103]], i32* [[TMP106]], i32 11
    122 ; CHECK-NEXT:    [[TMP108:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 12
    123 ; CHECK-NEXT:    [[TMP109:%.*]] = extractelement <16 x i64> [[TMP59]], i32 12
    124 ; CHECK-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP108]], i64 [[TMP109]], i64 0
    125 ; CHECK-NEXT:    [[TMP111:%.*]] = insertelement <16 x i32*> [[TMP107]], i32* [[TMP110]], i32 12
    126 ; CHECK-NEXT:    [[TMP112:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 13
    127 ; CHECK-NEXT:    [[TMP113:%.*]] = extractelement <16 x i64> [[TMP59]], i32 13
    128 ; CHECK-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP112]], i64 [[TMP113]], i64 0
    129 ; CHECK-NEXT:    [[TMP115:%.*]] = insertelement <16 x i32*> [[TMP111]], i32* [[TMP114]], i32 13
    130 ; CHECK-NEXT:    [[TMP116:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 14
    131 ; CHECK-NEXT:    [[TMP117:%.*]] = extractelement <16 x i64> [[TMP59]], i32 14
    132 ; CHECK-NEXT:    [[TMP118:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP116]], i64 [[TMP117]], i64 0
    133 ; CHECK-NEXT:    [[TMP119:%.*]] = insertelement <16 x i32*> [[TMP115]], i32* [[TMP118]], i32 14
    134 ; CHECK-NEXT:    [[TMP120:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 15
    135 ; CHECK-NEXT:    [[TMP121:%.*]] = extractelement <16 x i64> [[TMP59]], i32 15
    136 ; CHECK-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP120]], i64 [[TMP121]], i64 0
    137 ; CHECK-NEXT:    [[TMP123:%.*]] = insertelement <16 x i32*> [[TMP119]], i32* [[TMP122]], i32 15
    138 ; CHECK-NEXT:    [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0
    139 ; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
    140 entry:
    141   %0 = load i32, i32* @c, align 4
    142   %cmp34 = icmp sgt i32 %0, 8
    143   br i1 %cmp34, label %for.body.lr.ph, label %for.cond.cleanup
    144 
    145 for.body.lr.ph:                                   ; preds = %entry
    146   %1 = load i32, i32* @a, align 4
    147   %tobool = icmp eq i32 %1, 0
    148   %2 = load i64, i64* @b, align 8
    149   %mul = mul i64 %2, 4063299859190
    150   %tobool6 = icmp eq i64 %mul, 0
    151   %3 = sext i32 %0 to i64
    152   br i1 %tobool, label %for.body.us.preheader, label %for.body.preheader
    153 
    154 for.body.preheader:                               ; preds = %for.body.lr.ph
    155   br label %for.body
    156 
    157 for.body.us.preheader:                            ; preds = %for.body.lr.ph
    158   br label %for.body.us
    159 
    160 for.body.us:                                      ; preds = %for.body.us.preheader, %for.cond.cleanup4.us-lcssa.us.us
    161   %indvars.iv78 = phi i64 [ %indvars.iv.next79, %for.cond.cleanup4.us-lcssa.us.us ], [ 8, %for.body.us.preheader ]
    162   %indvars.iv70 = phi i64 [ %indvars.iv.next71, %for.cond.cleanup4.us-lcssa.us.us ], [ 0, %for.body.us.preheader ]
    163   %4 = sub nsw i64 8, %indvars.iv78
    164   %add.ptr.us = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv78
    165   %5 = add nsw i64 %4, %indvars.iv70
    166   %arraydecay.us.us.us = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %5, i64 0
    167   br i1 %tobool6, label %for.body5.us.us.us.preheader, label %for.body5.us.us48.preheader
    168 
    169 for.body5.us.us48.preheader:                      ; preds = %for.body.us
    170   store i32 8, i32* %arraydecay.us.us.us, align 16
    171   %indvars.iv.next66 = or i64 %indvars.iv70, 1
    172   %6 = add nsw i64 %4, %indvars.iv.next66
    173   %arraydecay.us.us55.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %6, i64 0
    174   store i32 8, i32* %arraydecay.us.us55.1, align 8
    175   br label %for.cond.cleanup4.us-lcssa.us.us
    176 
    177 for.body5.us.us.us.preheader:                     ; preds = %for.body.us
    178   store i32 7, i32* %arraydecay.us.us.us, align 16
    179   %indvars.iv.next73 = or i64 %indvars.iv70, 1
    180   %7 = add nsw i64 %4, %indvars.iv.next73
    181   %arraydecay.us.us.us.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %7, i64 0
    182   store i32 7, i32* %arraydecay.us.us.us.1, align 8
    183   br label %for.cond.cleanup4.us-lcssa.us.us
    184 
    185 for.cond.cleanup4.us-lcssa.us.us:                 ; preds = %for.body5.us.us48.preheader, %for.body5.us.us.us.preheader
    186   %indvars.iv.next79 = add nuw nsw i64 %indvars.iv78, 2
    187   %cmp.us = icmp slt i64 %indvars.iv.next79, %3
    188   %indvars.iv.next71 = add nuw nsw i64 %indvars.iv70, 2
    189   br i1 %cmp.us, label %for.body.us, label %for.cond.cleanup.loopexit
    190 
    191 for.cond.cleanup.loopexit:                        ; preds = %for.cond.cleanup4.us-lcssa.us.us
    192   br label %for.cond.cleanup
    193 
    194 for.cond.cleanup.loopexit99:                      ; preds = %for.body
    195   br label %for.cond.cleanup
    196 
    197 for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit99, %for.cond.cleanup.loopexit, %entry
    198   ret void
    199 
    200 for.body:                                         ; preds = %for.body.preheader, %for.body
    201   %indvars.iv95 = phi i64 [ %indvars.iv.next96, %for.body ], [ 8, %for.body.preheader ]
    202   %indvars.iv87 = phi i64 [ %indvars.iv.next88, %for.body ], [ 0, %for.body.preheader ]
    203   %8 = sub nsw i64 8, %indvars.iv95
    204   %add.ptr = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv95
    205   %9 = add nsw i64 %8, %indvars.iv87
    206   %arraydecay.us31 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %9, i64 0
    207   store i32 8, i32* %arraydecay.us31, align 16
    208   %indvars.iv.next90 = or i64 %indvars.iv87, 1
    209   %10 = add nsw i64 %8, %indvars.iv.next90
    210   %arraydecay.us31.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %10, i64 0
    211   store i32 8, i32* %arraydecay.us31.1, align 8
    212   %indvars.iv.next96 = add nuw nsw i64 %indvars.iv95, 2
    213   %cmp = icmp slt i64 %indvars.iv.next96, %3
    214   %indvars.iv.next88 = add nuw nsw i64 %indvars.iv87, 2
    215   br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99
    216 }
    217 
    218 attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
    219